Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix python's send_columns failing to convert correctly from ComponentBatch to ComponentColumn in some cases #7155

Merged
merged 7 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/store/re_types/definitions/rerun/archetypes/tensor.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ namespace rerun.archetypes;
/// \cpp data can be passed in without a copy from raw pointers or by reference from `std::vector`/`std::array`/c-arrays.
/// \cpp If needed, this "borrow-behavior" can be extended by defining your own `rerun::CollectionAdapter`.
///
/// \py It's not currently possible to use `send_columns` with tensors since construction
/// \py of `rerun.components.TensorDataBatch` does not support more than a single element.
/// \py This will be addressed as part of https://github.com/rerun-io/rerun/issues/6832.
///
/// \example archetypes/tensor_simple title="Simple tensor" image="https://static.rerun.io/tensor_simple/baacb07712f7b706e3c80e696f70616c6c20b367/1200w.png"
table Tensor (
"attr.rust.derive": "PartialEq",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ namespace rerun.datatypes;
///
/// These dimensions are combined with an index to look up values from the `buffer` field,
/// which stores a contiguous array of typed values.
///
/// \py It's not currently possible to use `send_columns` with tensors since construction
/// \py of `rerun.components.TensorDataBatch` does not support more than a single element.
/// \py This will be addressed as part of https://github.com/rerun-io/rerun/issues/6832.
table TensorData (
"attr.python.aliases": "npt.ArrayLike",
"attr.python.array_aliases": "npt.ArrayLike",
Expand Down
9 changes: 0 additions & 9 deletions rerun_py/rerun_sdk/rerun/_baseclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,6 @@ def as_arrow_array(self) -> pa.Array:
return pa.ListArray.from_arrays(offsets, array)


ComponentColumnLike = ComponentBatchLike | ComponentColumn
"""
Type alias for component column-like objects.

Every component batch can be interpreted as a component column.
`ComponentColumn` implements the `ComponentBatchLike` interface but is still explicitly included here.
"""


class ComponentBatchMixin(ComponentBatchLike):
def component_name(self) -> str:
"""
Expand Down
4 changes: 4 additions & 0 deletions rerun_py/rerun_sdk/rerun/archetypes/tensor.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions rerun_py/rerun_sdk/rerun/datatypes/blob_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def native_to_pa_array_override(data: BlobArrayLike, data_type: pa.DataType) ->
inners = []
elif isinstance(data[0], Blob):
inners = [pa.array(np.array(datum.data, dtype=np.uint8).flatten()) for datum in data] # type: ignore[union-attr]
elif isinstance(data[0], bytes):
inners = [pa.array(np.frombuffer(datum, dtype=np.uint8)) for datum in data] # type: ignore[arg-type]
else:
inners = [pa.array(np.array(datum, dtype=np.uint8).flatten()) for datum in data]

Expand Down
4 changes: 4 additions & 0 deletions rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 23 additions & 9 deletions rerun_py/rerun_sdk/rerun/send_columns.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

from typing import Iterable, Protocol, TypeVar
from typing import Iterable, Protocol, TypeVar, Union

import pyarrow as pa
import rerun_bindings as bindings

from ._baseclasses import Archetype, ComponentColumnLike
from ._baseclasses import Archetype, ComponentBatchMixin, ComponentColumn
from ._log import IndicatorComponentBatch
from .error_utils import catch_and_log_exceptions
from .recording_stream import RecordingStream
Expand Down Expand Up @@ -84,7 +84,7 @@ def as_arrow_array(self) -> pa.Array:
def send_columns(
entity_path: str,
times: Iterable[TimeColumnLike],
components: Iterable[ComponentColumnLike],
components: Iterable[Union[ComponentBatchMixin, ComponentColumn]],
recording: RecordingStream | None = None,
strict: bool | None = None,
) -> None:
Expand Down Expand Up @@ -148,7 +148,11 @@ def send_columns(
of timestamps. Generally you should use one of the provided classes: [`TimeSequenceColumn`][],
[`TimeSecondsColumn`][], or [`TimeNanosColumn`][].
components:
The batches of components to log. Each `ComponentColumnLike` object represents a single column of data.
The columns of components to log. Each object represents a single column of data.

If a batch of components is passed, it will be partitioned with one element per timepoint.
In order to send multiple components per time value, explicitly create a [`ComponentColumn`][rerun.ComponentColumn]
either by constructing it directly, or by calling the `.partition()` method on a `ComponentBatch` type.
recording:
Specifies the [`rerun.RecordingStream`][] to use.
If left unspecified, defaults to the current active data recording, if there is one.
Expand Down Expand Up @@ -182,15 +186,25 @@ def send_columns(
indicators.append(c)
continue
component_name = c.component_name()
component_column = c.as_arrow_array() # type: ignore[union-attr]

if isinstance(c, ComponentColumn):
component_column = c
elif isinstance(c, ComponentBatchMixin):
component_column = c.partition([1] * len(c)) # type: ignore[arg-type]
else:
raise TypeError(
f"Expected either a type that implements the `ComponentMixin` or a `ComponentColumn`, got: {type(c)}"
)
arrow_list_array = component_column.as_arrow_array()

if expected_length is None:
expected_length = len(component_column)
elif len(component_column) != expected_length:
expected_length = len(arrow_list_array)
elif len(arrow_list_array) != expected_length:
raise ValueError(
f"All times and components in a batch must have the same length. Expected length: {expected_length} but got: {len(component_column)} for component: {component_name}"
f"All times and components in a batch must have the same length. Expected length: {expected_length} but got: {len(arrow_list_array)} for component: {component_name}"
)

components_args[component_name] = component_column
components_args[component_name] = arrow_list_array

for i in indicators:
if expected_length is None:
Expand Down
20 changes: 18 additions & 2 deletions rerun_py/tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,23 @@
def test_blob() -> None:
"""Blob should accept bytes input."""

bites = b"Hello world"
bytes = b"Hello world"
array = np.array([72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100], dtype=np.uint8)

assert rr.components.BlobBatch(bites).as_arrow_array() == rr.components.BlobBatch(array).as_arrow_array()
assert rr.datatypes.BlobBatch(bytes).as_arrow_array() == rr.datatypes.BlobBatch(array).as_arrow_array()


def test_blob_arrays() -> None:
COUNT = 10

# bytes & array
bytes = [b"Hello world"] * COUNT
array = [np.array([72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100], dtype=np.uint8)] * COUNT
assert rr.datatypes.BlobBatch(bytes).as_arrow_array() == rr.datatypes.BlobBatch(array).as_arrow_array()
assert len(rr.datatypes.BlobBatch(bytes)) == COUNT
assert len(rr.datatypes.BlobBatch(array)) == COUNT

# 2D numpy array
array_2d = np.array([[72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]] * COUNT, dtype=np.uint8)
assert rr.datatypes.BlobBatch(bytes).as_arrow_array() == rr.datatypes.BlobBatch(array_2d).as_arrow_array()
assert len(rr.datatypes.BlobBatch(array_2d)) == COUNT
Loading