From 4c4a5d4dd4efe201920f303eb41916e74ce233bd Mon Sep 17 00:00:00 2001
From: Andreas Reich <andreas@rerun.io>
Date: Thu, 29 Jun 2023 17:34:52 +0200
Subject: [PATCH] Fix pinhole visualization not working with camera extrinsics
 & intrinsics on the same path (#2568)

<!--
Open the PR up as a draft until you feel it is ready for a proper
review.

Do not make PR:s from your own `main` branch, as that makes it difficult
for reviewers to add their own fixes.

Add any improvements to the branch as new commits to make it easier for
reviewers to follow the progress. All commits will be squashed to a
single commit once the PR is merged into `main`.

Make sure you mention any issues that this PR closes in the description,
as well as any other related issues.

To get an auto-generated PR description you can put "copilot:summary" or
"copilot:walkthrough" anywhere.
-->

### What

Our recent transform reform (as landed in 0.7) allows us to log pinhole
and transform on the same entity. By convention we first apply
transform3d then pinhole as-if transform3d was higher in the hierarchy.
This was already the case, but the displayed pinhole frustum was broken.

I used this to simplify the object hierarchy in both the rust and python
objectron. While doing so I noticed that Rust objectron didn't have the
half-box fix applied yet we had on the Python version.


![image](https://github.com/rerun-io/rerun/assets/1220815/e41c40eb-5ca6-4906-80c3-1a656f4cc0f8)


### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I have tested https://demo.rerun.io/pr/2568 (if applicable)

<!-- This line will get updated when the PR build summary job finishes.
-->
PR Build Summary: https://build.rerun.io/pr/2568

<!-- pr-link-docs:start -->
Docs preview: https://rerun.io/preview/3284fda/docs
Examples preview: https://rerun.io/preview/3284fda/examples
<!-- pr-link-docs:end -->
---
 .../src/scene/parts/cameras.rs                | 24 ++++++++++++-------
 examples/python/objectron/main.py             |  8 +++----
 examples/rust/objectron/src/main.rs           | 15 +++++-------
 3 files changed, 26 insertions(+), 21 deletions(-)
diff --git a/crates/re_space_view_spatial/src/scene/parts/cameras.rs b/crates/re_space_view_spatial/src/scene/parts/cameras.rs
index 2223bf0c7368..280852fda03e 100644
--- a/crates/re_space_view_spatial/src/scene/parts/cameras.rs
+++ b/crates/re_space_view_spatial/src/scene/parts/cameras.rs
@@ -1,6 +1,6 @@
 use re_components::{
     coordinates::{Handedness, SignedAxis3},
-    Component, InstanceKey, Pinhole, ViewCoordinates,
+    Component, InstanceKey, Pinhole, Transform3D, ViewCoordinates,
 };
 use re_data_store::{EntityPath, EntityProperties};
 use re_renderer::renderer::LineStripFlags;
@@ -56,12 +56,14 @@ pub struct CamerasPart {
 }
 
 impl CamerasPart {
+    #[allow(clippy::too_many_arguments)]
     fn visit_instance(
         &mut self,
         scene_context: &SpatialSceneContext,
         ent_path: &EntityPath,
         props: &EntityProperties,
         pinhole: Pinhole,
+        transform_at_entity: Option<Transform3D>,
         view_coordinates: ViewCoordinates,
         entity_highlight: &SpaceViewOutlineMasks,
     ) {
@@ -70,13 +72,10 @@ impl CamerasPart {
         // The transform *at* this entity path already has the pinhole transformation we got passed in!
         // This makes sense, since if there's an image logged here one would expect that the transform applies.
         // We're however first interested in the rigid transform that led here, so query the parent transform.
-        //
-        // Note that currently a transform on an object can't have both a pinhole AND a rigid transform,
-        // which makes this rather well defined here.
         let parent_path = ent_path
             .parent()
             .expect("root path can't be part of scene query");
-        let Some(world_from_parent) = scene_context.transforms.reference_from_entity(&parent_path) else {
+        let Some(mut world_from_camera) = scene_context.transforms.reference_from_entity(&parent_path) else {
                 return;
             };
 
@@ -94,16 +93,24 @@ impl CamerasPart {
             return;
         }
 
+        // There's one wrinkle with using the parent transform though:
+        // The entity itself may have a 3D transform which (by convention!) we apply *before* the pinhole camera.
+        // Let's add that if it exists.
+        if let Some(transform_at_entity) = transform_at_entity {
+            world_from_camera =
+                world_from_camera * transform_at_entity.to_parent_from_child_transform();
+        }
+
         // If this transform is not representable an iso transform transform we can't display it yet.
         // This would happen if the camera is under another camera or under a transform with non-uniform scale.
-        let Some(world_from_camera) = macaw::IsoTransform::from_mat4(&world_from_parent.into()) else {
+        let Some(world_from_camera_iso) = macaw::IsoTransform::from_mat4(&world_from_camera.into()) else {
             return;
         };
 
         self.space_cameras.push(SpaceCamera3D {
             ent_path: ent_path.clone(),
             view_coordinates,
-            world_from_camera,
+            world_from_camera: world_from_camera_iso,
             pinhole: Some(pinhole),
             picture_plane_distance: frustum_length,
         });
@@ -160,7 +167,7 @@ impl CamerasPart {
         let mut line_builder = scene_context.shared_render_builders.lines();
         let mut batch = line_builder
             .batch("camera frustum")
-            .world_from_obj(world_from_parent)
+            .world_from_obj(world_from_camera)
             .outline_mask_ids(entity_highlight.overall)
             .picking_object_id(instance_layer_id.object);
         let lines = batch
@@ -212,6 +219,7 @@ impl ScenePart<SpatialSpaceView> for CamerasPart {
                     ent_path,
                     &props,
                     pinhole,
+                    store.query_latest_component::<Transform3D>(ent_path, &query),
                     view_coordinates,
                     entity_highlight,
                 );
diff --git a/examples/python/objectron/main.py b/examples/python/objectron/main.py
index 357d33d34bfe..1f01b3ab0052 100755
--- a/examples/python/objectron/main.py
+++ b/examples/python/objectron/main.py
@@ -116,7 +116,7 @@ def log_ar_frames(samples: Iterable[SampleARFrame], seq: Sequence) -> None:
         rr.set_time_seconds("time", sample.timestamp)
         frame_times.append(sample.timestamp)
 
-        rr.log_image_file("world/camera/video", img_path=sample.image_path, img_format=rr.ImageFormat.JPEG)
+        rr.log_image_file("world/camera", img_path=sample.image_path, img_format=rr.ImageFormat.JPEG)
         log_camera(sample.frame.camera)
         log_point_cloud(sample.frame.raw_feature_points)
 
@@ -149,7 +149,7 @@ def log_camera(cam: ARCamera) -> None:
     )
     rr.log_view_coordinates("world/camera", xyz="RDF")  # X=Right, Y=Down, Z=Forward
     rr.log_pinhole(
-        "world/camera/video",
+        "world/camera",
         child_from_parent=intrinsics,
         width=w,
         height=h,
@@ -203,11 +203,11 @@ def log_frame_annotations(frame_times: list[float], frame_annotations: list[Fram
             keypoint_pos2s *= IMAGE_RESOLUTION
 
             if len(keypoint_pos2s) == 9:
-                log_projected_bbox(f"world/camera/video/estimates/box-{obj_ann.object_id}", keypoint_pos2s)
+                log_projected_bbox(f"world/camera/estimates/box-{obj_ann.object_id}", keypoint_pos2s)
             else:
                 for id, pos2 in zip(keypoint_ids, keypoint_pos2s):
                     rr.log_point(
-                        f"world/camera/video/estimates/box-{obj_ann.object_id}/{id}",
+                        f"world/camera/estimates/box-{obj_ann.object_id}/{id}",
                         pos2,
                         color=[130, 160, 250, 255],
                     )
diff --git a/examples/rust/objectron/src/main.rs b/examples/rust/objectron/src/main.rs
index 495cb5a7705a..383a3956a42d 100644
--- a/examples/rust/objectron/src/main.rs
+++ b/examples/rust/objectron/src/main.rs
@@ -123,7 +123,7 @@ fn log_baseline_objects(
                 return None;
             }
 
-            let box3: Box3D = glam::Vec3::from_slice(&object.scale).into();
+            let box3: Box3D = (glam::Vec3::from_slice(&object.scale) * 0.5).into();
             let transform = {
                 let translation = glam::Vec3::from_slice(&object.translation);
                 // NOTE: the dataset is all row-major, transpose those matrices!
@@ -153,7 +153,7 @@ fn log_video_frame(rec_stream: &RecordingStream, ar_frame: &ArFrame) -> anyhow::
     let image_path = ar_frame.dir.join(format!("video/{}.jpg", ar_frame.index));
     let tensor = rerun::components::Tensor::from_jpeg_file(&image_path)?;
 
-    MsgSender::new("world/camera/video")
+    MsgSender::new("world/camera")
         .with_timepoint(ar_frame.timepoint.clone())
         .with_component(&[tensor])?
         .send(rec_stream)?;
@@ -198,7 +198,7 @@ fn log_ar_camera(
             rot,
         ))])?
         .send(rec_stream)?;
-    MsgSender::new("world/camera/video")
+    MsgSender::new("world/camera")
         .with_timepoint(timepoint)
         .with_component(&[Pinhole {
             image_from_cam: intrinsics.into(),
@@ -262,12 +262,9 @@ fn log_frame_annotations(
             })
             .unzip();
 
-        let mut msg = MsgSender::new(format!(
-            "world/camera/video/estimates/box-{}",
-            ann.object_id
-        ))
-        .with_timepoint(timepoint.clone())
-        .with_splat(ColorRGBA::from_rgb(130, 160, 250))?;
+        let mut msg = MsgSender::new(format!("world/camera/estimates/box-{}", ann.object_id))
+            .with_timepoint(timepoint.clone())
+            .with_splat(ColorRGBA::from_rgb(130, 160, 250))?;
 
         if points.len() == 9 {
             // Build the preprojected bounding box out of 2D line segments.