Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix python's send_columns failing to convert correctly from ComponentBatch to ComponentColumn in some cases #7155

Merged
merged 7 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/store/re_types/definitions/rerun/archetypes/tensor.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ namespace rerun.archetypes;
/// \cpp data can be passed in without a copy from raw pointers or by reference from `std::vector`/`std::array`/c-arrays.
/// \cpp If needed, this "borrow-behavior" can be extended by defining your own `rerun::CollectionAdapter`.
///
/// \py It's currently possible to use `send_columns` with tensors since construction
/// \py of `rerun.components.TensorDataBatch` does not support more than a single element.
/// \py This will be addressed as part of https://github.com/rerun-io/rerun/issues/6832.
///
/// \example archetypes/tensor_simple title="Simple tensor" image="https://static.rerun.io/tensor_simple/baacb07712f7b706e3c80e696f70616c6c20b367/1200w.png"
table Tensor (
"attr.rust.derive": "PartialEq",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ namespace rerun.datatypes;
///
/// These dimensions are combined with an index to look up values from the `buffer` field,
/// which stores a contiguous array of typed values.
///
/// \py It's currently possible to use `send_columns` with tensors since construction
/// \py of `rerun.components.TensorDataBatch` does not support more than a single element.
/// \py This will be addressed as part of https://github.com/rerun-io/rerun/issues/6832.
table TensorData (
"attr.python.aliases": "npt.ArrayLike",
"attr.python.array_aliases": "npt.ArrayLike",
Expand Down
4 changes: 4 additions & 0 deletions rerun_py/rerun_sdk/rerun/archetypes/tensor.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions rerun_py/rerun_sdk/rerun/datatypes/blob_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def native_to_pa_array_override(data: BlobArrayLike, data_type: pa.DataType) ->
inners = []
elif isinstance(data[0], Blob):
inners = [pa.array(np.array(datum.data, dtype=np.uint8).flatten()) for datum in data] # type: ignore[union-attr]
elif isinstance(data[0], bytes):
inners = [pa.array(np.frombuffer(datum, dtype=np.uint8)) for datum in data] # type: ignore[arg-type]
else:
inners = [pa.array(np.array(datum, dtype=np.uint8).flatten()) for datum in data]

Expand Down
4 changes: 4 additions & 0 deletions rerun_py/rerun_sdk/rerun/datatypes/tensor_data.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 12 additions & 6 deletions rerun_py/rerun_sdk/rerun/send_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pyarrow as pa
import rerun_bindings as bindings

from ._baseclasses import Archetype, ComponentColumnLike
from ._baseclasses import Archetype, ComponentColumn, ComponentColumnLike
from ._log import IndicatorComponentBatch
from .error_utils import catch_and_log_exceptions
from .recording_stream import RecordingStream
Expand Down Expand Up @@ -182,15 +182,21 @@ def send_columns(
indicators.append(c)
continue
component_name = c.component_name()
component_column = c.as_arrow_array() # type: ignore[union-attr]

if isinstance(c, ComponentColumn):
component_column = c
else:
component_column = c.partition([1] * len(c)) # type: ignore[union-attr, attr-defined, arg-type]
arrow_list_array = component_column.as_arrow_array()

if expected_length is None:
expected_length = len(component_column)
elif len(component_column) != expected_length:
expected_length = len(arrow_list_array)
elif len(arrow_list_array) != expected_length:
raise ValueError(
f"All times and components in a batch must have the same length. Expected length: {expected_length} but got: {len(component_column)} for component: {component_name}"
f"All times and components in a batch must have the same length. Expected length: {expected_length} but got: {len(arrow_list_array)} for component: {component_name}"
)

components_args[component_name] = component_column
components_args[component_name] = arrow_list_array

for i in indicators:
if expected_length is None:
Expand Down
20 changes: 18 additions & 2 deletions rerun_py/tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,23 @@
def test_blob() -> None:
"""Blob should accept bytes input."""

bites = b"Hello world"
bytes = b"Hello world"
array = np.array([72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100], dtype=np.uint8)

assert rr.components.BlobBatch(bites).as_arrow_array() == rr.components.BlobBatch(array).as_arrow_array()
assert rr.datatypes.BlobBatch(bytes).as_arrow_array() == rr.datatypes.BlobBatch(array).as_arrow_array()


def test_blob_arrays() -> None:
COUNT = 10

# bytes & array
bytes = [b"Hello world"] * COUNT
array = [np.array([72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100], dtype=np.uint8)] * COUNT
assert rr.datatypes.BlobBatch(bytes).as_arrow_array() == rr.datatypes.BlobBatch(array).as_arrow_array()
assert len(rr.datatypes.BlobBatch(bytes)) == COUNT
assert len(rr.datatypes.BlobBatch(array)) == COUNT

# 2D numpy array
array_2d = np.array([[72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100]] * COUNT, dtype=np.uint8)
assert rr.datatypes.BlobBatch(bytes).as_arrow_array() == rr.datatypes.BlobBatch(array_2d).as_arrow_array()
assert len(rr.datatypes.BlobBatch(array_2d)) == COUNT
Loading