luxonis · jkbmrz · Jun 24, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
diff --git a/...essing/utils/message_creation/__init__.py → ml/messages/creation_functions/__init__.py b/...essing/utils/message_creation/__init__.py → ml/messages/creation_functions/__init__.py
@@ -1,11 +1,13 @@
-from .depth import create_depth_message
+from .image import create_image_message
 from .segmentation import create_segmentation_message
 from .keypoints import create_hand_keypoints_message
 from .detection import create_detection_message
+from .depth import create_depth_message
 
 __all__ = [
-    "create_depth_message",
+    "create_image_message",
     "create_segmentation_message",
     "create_hand_keypoints_message",
     "create_detection_message",
-]
+    "create_depth_message",
+]
diff --git a/ml/messages/creation_functions/depth.py b/ml/messages/creation_functions/depth.py
@@ -0,0 +1,65 @@
+from typing import Literal
+import depthai as dai
+import numpy as np
+
+UINT16_MAX_VALUE = 65535
+
+
+def create_depth_message(
+    depth_map: np.array, depth_type: Literal["relative", "metric"]
+) -> dai.ImgFrame:
+    """
+    Creates a depth message in the form of an ImgFrame using the provided depth map and depth type.
+
+    Args:
+        depth_map (np.array): A NumPy array representing the depth map with shape (CHW or HWC).
+        depth_type (Literal['relative', 'metric']): A string indicating the type of depth map.
+            It can either be 'relative' or 'metric'.
+
+    Returns:
+        dai.ImgFrame: An ImgFrame object containing the depth information.
+
+    """
+
+    if not isinstance(depth_map, np.ndarray):
+        raise ValueError(f"Expected numpy array, got {type(depth_map)}.")
+    if len(depth_map.shape) != 3:
+        raise ValueError(f"Expected 3D input, got {len(depth_map.shape)}D input.")
+
+    if depth_map.shape[0] == 1:
+        depth_map = depth_map[0,:,:] # CHW to HW
+    elif depth_map.shape[2] == 1:
+        depth_map = depth_map[:,:,0] # HWC to HW
+    else:
+        raise ValueError(
+            "Unexpected image shape. Expected CHW or HWC, got", depth_map.shape
+        )
+
+    if depth_type == "relative":
+        data_type = dai.ImgFrame.Type.RAW16
+
+        # normalize depth map to the range [0, 65535]
+        min_val = depth_map.min()
+        max_val = depth_map.max()
+        if min_val == max_val:  # avoid division by zero
+            depth_map = np.zeros_like(depth_map)
+        else:
+            depth_map = (depth_map - min_val) / (max_val - min_val) * UINT16_MAX_VALUE
+        depth_map = depth_map.astype(np.uint16)
+
+    elif depth_type == "metric":
+        raise NotImplementedError(
+            "The message for 'metric' depth type is not yet implemented."
+        )
+    else:
+        raise ValueError(
+            f"Invalid depth type: {depth_type}. Only 'relative' and 'metric' are supported."
+        )
+
+    imgFrame = dai.ImgFrame()
+    imgFrame.setFrame(depth_map)
+    imgFrame.setWidth(depth_map.shape[1])
+    imgFrame.setHeight(depth_map.shape[0])
+    imgFrame.setType(data_type)
+
+    return imgFrame
diff --git a/ml/messages/creation_functions/detection.py b/ml/messages/creation_functions/detection.py
@@ -0,0 +1,115 @@
+import numpy as np
+from typing import List, Tuple
+import depthai as dai
+
+from ...messages import (
+    ImgDetectionWithKeypoints,
+    ImgDetectionsWithKeypoints,
+)
+
+
+def create_detection_message(
+    bboxes: np.ndarray,
+    scores: np.ndarray,
+    labels: List[int] = None,
+    keypoints: List[List[Tuple[float, float]]] = None,
+) -> dai.ImgDetections:
+    """
+    Create a message for the detection. The message contains the bounding boxes, labels, and confidence scores of detected objects.
+    If there are no labels or we only have one class, we can set labels to None and all detections will have label set to 0.
+
+    Args:
+        bboxes (np.ndarray): Detected bounding boxes of shape (N,4) meaning [...,[x_min, y_min, x_max, y_max],...].
+        scores (np.ndarray): Confidence scores of detected objects of shape (N,).
+        labels (List[int], optional): Labels of detected objects of shape (N,). Defaults to None.
+        keypoints (List[List[Tuple[float, float]]], optional): Keypoints of detected objects of shape (N,2). Defaults to None.
+
+    Returns:
+        dai.ImgDetections OR ImgDetectionsWithKeypoints: Message containing the bounding boxes, labels, confidence scores, and keypoints of detected objects.
+    """
+
+    # checks for bboxes
+    if not isinstance(bboxes, np.ndarray):
+        raise ValueError(f"bboxes should be numpy array, got {type(bboxes)}.")
+    if len(bboxes) != 0:
+        if len(bboxes.shape) != 2:
+            raise ValueError(
+                f"bboxes should be of shape (N,4) meaning [...,[x_min, y_min, x_max, y_max],...], got {bboxes.shape}."
+            )
+        if bboxes.shape[1] != 4:
+            raise ValueError(
+                f"bboxes 2nd dimension should be of size 4 e.g. [x_min, y_min, x_max, y_max] got {bboxes.shape[1]}."
+            )
+        x_valid = bboxes[:, 0] < bboxes[:, 2]
+        y_valid = bboxes[:, 1] < bboxes[:, 3]
+        if not (np.all(x_valid) and np.all(y_valid)):
+            raise ValueError(
+                f"bboxes should be in format [x_min, y_min, x_max, y_max] where xmin < xmax and ymin < ymax."
+            )
+
+    # checks for scores
+    if not isinstance(scores, np.ndarray):
+        raise ValueError(f"scores should be numpy array, got {type(scores)}.")
+    if len(scores) != 0:
+        if len(scores.shape) != 1:
+            raise ValueError(f"scores should be of shape (N,) meaning, got {scores.shape}.")
+        if scores.shape[0] != bboxes.shape[0]:
+            raise ValueError(
+                f"scores should have same length as bboxes, got {scores.shape[0]} and {bboxes.shape[0]}."
+            )
+
+    # checks for labels
+    if labels is not None and len(labels) != 0:
+        if not isinstance(labels, List):
+            raise ValueError(f"labels should be list, got {type(labels)}.")
+        for label in labels:
+            if not isinstance(label, int):
+                raise ValueError(
+                    f"labels should be list of integers, got {type(label)}."
+                )
+        if len(labels) != bboxes.shape[0]:
+            raise ValueError(
+                f"labels should have same length as bboxes, got {len(labels)} and {bboxes.shape[0]}."
+            )
+
+    # checks for keypoints
+    if keypoints is not None and len(keypoints) != 0:
+        if not isinstance(keypoints, List):
+            raise ValueError(f"keypoints should be list, got {type(keypoints)}.")
+        for pointcloud in keypoints:
+            for point in pointcloud:
+                if not isinstance(point, Tuple):
+                    raise ValueError(
+                        f"keypoint pairs should be list of tuples, got {type(point)}."
+                    )
+        if len(keypoints) != bboxes.shape[0]:
+            raise ValueError(
+                f"keypoints should have same length as bboxes, got {len(keypoints)} and {bboxes.shape[0]}."
+            )
+
+    if keypoints is not None:
+        img_detection = ImgDetectionWithKeypoints
+        img_detections = ImgDetectionsWithKeypoints
+    else:
+        img_detection = dai.ImgDetection
+        img_detections = dai.ImgDetections
+
+    detections = []
+    for i in range(bboxes.shape[0]):
+        detection = img_detection()
+        detection.xmin = bboxes[i][0]
+        detection.ymin = bboxes[i][1]
+        detection.xmax = bboxes[i][2]
+        detection.ymax = bboxes[i][3]
+        detection.confidence = scores[i]
+        if labels is None:
+            detection.label = 0
+        else:
+            detection.label = labels[i]
+        if keypoints is not None:
+            detection.keypoints = keypoints[i]
+        detections.append(detection)
+
+    detections_msg = img_detections()
+    detections_msg.detections = detections
+    return detections_msg
diff --git a/ml/messages/creation_functions/image.py b/ml/messages/creation_functions/image.py
@@ -0,0 +1,48 @@
+import depthai as dai
+import numpy as np
+import cv2
+
+
+def create_image_message(
+    image: np.array,
+    is_bgr: bool = True,
+) -> dai.ImgFrame:
+    """
+    Create a depthai message for an image array.
+
+    @type image: np.array
+    @ivar image: Image array in HWC or CHW format.
+
+    @type is_bgr: bool
+    @ivar is_bgr: If True, the image is in BGR format. If False, the image is in RGB format.
+    """
+
+    if image.shape[0] in [1, 3]:
+        hwc = False
+    elif image.shape[2] in [1, 3]:
+        hwc = True
+    else:
+        raise ValueError(
+            "Unexpected image shape. Expected CHW or HWC, got", image.shape
+        )
+
+    if not hwc:
+        image = np.transpose(image, (1, 2, 0))
+
+    if image.shape[2] == 1: # grayscale
+        image = image[:,:,0]
+        img_frame_type = dai.ImgFrame.Type.GRAY8  # HW image
+        height, width = image.shape
+    else:
+        if not is_bgr:
+            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        img_frame_type = dai.ImgFrame.Type.BGR888i  # HWC BGR image
+        height, width, _ = image.shape
+
+    imgFrame = dai.ImgFrame()
+    imgFrame.setFrame(image)
+    imgFrame.setWidth(width)
+    imgFrame.setHeight(height)
+    imgFrame.setType(img_frame_type)
+
+    return imgFrame
diff --git a/...ssing/utils/message_creation/keypoints.py → ml/messages/creation_functions/keypoints.py b/...ssing/utils/message_creation/keypoints.py → ml/messages/creation_functions/keypoints.py
@@ -1,7 +1,7 @@
 import depthai as dai
 import numpy as np
 from typing import List
-from ....messages import HandKeypoints
+from ...messages import HandKeypoints
 
 def create_hand_keypoints_message(hand_keypoints: np.ndarray, handedness: float, confidence: float, confidence_threshold: float) -> HandKeypoints:
     """

diff --git a/...ng/utils/message_creation/segmentation.py → ...ssages/creation_functions/segmentation.py b/...ng/utils/message_creation/segmentation.py → ...ssages/creation_functions/segmentation.py
@@ -1,16 +1,14 @@
-from .zero_dce import ZeroDCEParser
-from .dncnn3 import DnCNN3Parser
-from .depth_anything import DepthAnythingParser
+from .image_output import ImageOutputParser
+from .monocular_depth import MonocularDepthParser
 from .yunet import YuNetParser
 from .mediapipe_hand_detection import MPHandDetectionParser
 from .mediapipe_hand_landmarker import MPHandLandmarkParser
 from .scrfd import SCRFDParser
 from .segmentation import SegmentationParser
 
 __all__ = [
-    'ZeroDCEParser', 
-    'DnCNN3Parser',
-    'DepthAnythingParser',
+    'ImageOutputParser',
+    'MonocularDepthParser',
     'YuNetParser',
     'MPHandDetectionParser',
     'MPHandLandmarkParser',