luxonis · jkbmrz · Nov 13, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
@@ -57,6 +57,8 @@ def build(self, head_config: Dict[str, Any]) -> "BaseParser":
         @param head_config: A dictionary containing configuration details relevant to
             the parser, including parameters and settings required for output parsing.
         @type head_config: Dict[str, Any]
+        @return: The parser object with the head configuration set.
+        @rtype: BaseParser
         """
         pass
 

@@ -23,8 +23,8 @@ class LaneDetectionParser(BaseParser):
         Griding number.
     cls_num_per_lane : int
         Number of points per lane.
-    input_shape : Tuple[int, int]
-        Input shape.
+    input_size : Tuple[int, int]
+        Input size (width,height).
 
     Output Message/s
     ----------------
@@ -44,7 +44,7 @@ def __init__(
         row_anchors: List[int] = None,
         griding_num: int = None,
         cls_num_per_lane: int = None,
-        input_shape: Tuple[int, int] = (288, 800),
+        input_size: Tuple[int, int] = None,
     ) -> None:
         """Initializes the lane detection parser node.
 
@@ -56,16 +56,16 @@ def __init__(
         @type griding_num: int
         @param cls_num_per_lane: Number of points per lane.
         @type cls_num_per_lane: int
-        @param input_shape: Input shape.
-        @type input_shape: Tuple[int, int]
+        @param input_size: Input size (width,height).
+        @type input_size: Tuple[int, int]
         """
         super().__init__()
         self.output_layer_name = output_layer_name
 
         self.row_anchors = row_anchors
         self.griding_num = griding_num
         self.cls_num_per_lane = cls_num_per_lane
-        self.input_shape = input_shape
+        self.input_size = input_size
 
     def setOutputLayerName(self, output_layer_name: str) -> None:
         """Set the output layer name for the lane detection model.
@@ -109,32 +109,40 @@ def setClsNumPerLane(self, cls_num_per_lane: int) -> None:
             raise ValueError("Number of points per lane must be an integer.")
         self.cls_num_per_lane = cls_num_per_lane
 
-    def setInputShape(self, input_shape: Tuple[int, int]) -> None:
-        """Set the input shape for the lane detection model.
+    def setInputSize(self, input_size: Tuple[int, int]) -> None:
+        """Set the input size for the lane detection model.
 
-        @param input_shape: Input shape.
-        @type input_shape: Tuple[int, int]
+        @param input_size: Input size (width,height).
+        @type input_size: Tuple[int, int]
         """
-        if not isinstance(input_shape, tuple):
-            raise ValueError("Input shape must be a tuple.")
-        if len(input_shape) != 2:
-            raise ValueError("Input shape must be a tuple of two integers.")
-        if not all(isinstance(size, int) for size in input_shape):
-            raise ValueError("Input shape must be a tuple of integers.")
-        self.input_shape = input_shape
+        if not isinstance(input_size, tuple):
+            raise ValueError("Input size must be a tuple.")
+        if len(input_size) != 2:
+            raise ValueError("Input size must be a tuple of two integers.")
+        if not all(isinstance(size, int) for size in input_size):
+            raise ValueError("Input size must be a tuple of integers.")
+        self.input_size = input_size
 
     def build(
         self,
         head_config: Dict[str, Any],
+        inputs_size: List[List[int]],
     ) -> "LaneDetectionParser":
         """Configures the parser.
 
         @param head_config: The head configuration for the parser.
         @type head_config: Dict[str, Any]
+        @param inputs_size: Model inputs size.
+        @type inputs_size: List[List[int]]
         @return: The parser object with the head configuration set.
         @rtype: LaneDetectionParser
         """
 
+        if len(inputs_size) != 1:
+            raise ValueError(
+                f"Only one input supported for LaneDetectionParser, got {len(inputs_size)} inputs."
+            )
+        self.input_size = inputs_size[0]
         output_layers = head_config.get("outputs", [])
         if len(output_layers) != 1:
             raise ValueError(
@@ -180,8 +188,8 @@ def run(self):
                 anchors=self.row_anchors,
                 griding_num=self.griding_num,
                 cls_num_per_lane=self.cls_num_per_lane,
-                input_width=self.input_shape[1],
-                input_height=self.input_shape[0],
+                input_width=self.input_size[0],
+                input_height=self.input_size[1],
                 y=y,
             )
 

@@ -20,23 +20,23 @@ def manual_product(*args):
 
 
 def generate_anchors(
-    input_shape: Tuple[int, int],
+    input_size: Tuple[int, int],
     min_sizes: List[List[int]] = None,
     strides: List[int] = None,
 ):
     """Generate a set of default bounding boxes, known as anchors.
     The code is taken from https://github.com/Kazuhito00/YuNet-ONNX-TFLite-Sample/tree/main
 
-    @param input_shape: A tuple representing the width and height of the input image.
-    @type input_shape: Tuple[int, int]
+    @param input_size: A tuple representing the width and height of the input image.
+    @type input_size: Tuple[int, int]
     @param min_sizes: A list of lists, where each inner list contains the minimum sizes of the anchors for different feature maps.
     @type min_sizes List[List[int]]
     @param strides: Strides for each feature map layer.
     @type strides: List[int]
     @return: Anchors.
     @rtype: np.ndarray
     """
-    w, h = input_shape
+    w, h = input_size
 
     if min_sizes is None:
         min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
@@ -70,7 +70,7 @@ def generate_anchors(
 
 
 def decode_detections(
-    input_shape: Tuple[int, int],
+    input_size: Tuple[int, int],
     loc: np.ndarray,
     conf: np.ndarray,
     iou: np.ndarray,
@@ -80,8 +80,8 @@ def decode_detections(
     Decodes the output of an object detection model by converting the model's predictions (localization, confidence, and IoU scores) into bounding boxes, keypoints, and scores.
     The code is taken from https://github.com/Kazuhito00/YuNet-ONNX-TFLite-Sample/tree/main
 
-    @param input_shape: The shape of the input image (height, width).
-    @type input_shape: tuple
+    @param input_size: The size of the input image (height, width).
+    @type input_size: tuple
     @param loc: The predicted locations (or offsets) of the bounding boxes.
     @type loc: np.ndarray
     @param conf: The predicted class confidence scores.
@@ -98,12 +98,12 @@ def decode_detections(
 
     """
 
-    w, h = input_shape
+    w, h = input_size
 
     if variance is None:
         variance = [0.1, 0.2]
 
-    anchors = generate_anchors(input_shape)
+    anchors = generate_anchors(input_size)
 
     # Get scores
     cls_scores = conf[:, 1]
@@ -168,7 +168,7 @@ def format_detections(
     bboxes: np.ndarray,
     keypoints: np.ndarray,
     scores: np.ndarray,
-    input_shape: Tuple[int, int],
+    input_size: Tuple[int, int],
 ):
     """Format detections into a list of dictionaries.
 
@@ -178,16 +178,16 @@ def format_detections(
     @type np.ndarray
     @param scores: A numpy array of shape (N,) containing the scores.
     @type np.ndarray
-    @param input_shape: A tuple representing the height and width of the input image.
-    @type input_shape: tuple
+    @param input_size: A tuple representing the height and width of the input image.
+    @type input_size: tuple
     @return: A tuple of bboxes, keypoints, and scores.
         - bboxes: NumPy array of shape (N, 4) containing the decoded bounding boxes in the format [x_min, y_min, width, height].
         - keypoints: A NumPy array of shape (N, 10) containing the decoded keypoint coordinates for each anchor.
         - scores: A NumPy array of shape (N, 1) containing the combined scores for each anchor.
     @rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
     """
 
-    w, h = input_shape
+    w, h = input_size
 
     bboxes = normalize_bboxes(bboxes, height=h, width=w)
 

@@ -1,4 +1,4 @@
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, List, Tuple
 
 import depthai as dai
 import numpy as np
@@ -22,7 +22,7 @@ class YuNetParser(DetectionParser):
     max_det : int
         Maximum number of detections to keep.
     input_size : Tuple[int, int]
-        Input size.
+        Input size (width, height).
     loc_output_layer_name: str
         Name of the output layer containing the location predictions.
     conf_output_layer_name: str
@@ -55,7 +55,7 @@ def __init__(
         @type iou_threshold: float
         @param max_det: Maximum number of detections to keep.
         @type max_det: int
-        @param input_size: Input shape of the model (width, height).
+        @param input_size: Input size of the model (width, height).
         @type input_size: Tuple[int, int]
         @param loc_output_layer_name: Output layer name for the location predictions.
         @type loc_output_layer_name: str
@@ -120,22 +120,26 @@ def setOutputLayerIou(self, iou_output_layer_name: str) -> None:
     def build(
         self,
         head_config: Dict[str, Any],
+        inputs_size: List[List[int]],
     ) -> "YuNetParser":
         """Configures the parser.
 
         @param head_config: The head configuration for the parser.
         @type head_config: Dict[str, Any]
+        @param inputs_size: Model inputs size.
+        @type inputs_size: List[List[int]]
         @return: The parser object with the head configuration set.
         @rtype: YuNetParser
         """
 
         super().build(head_config)
         output_layers = head_config.get("outputs", [])
-        self.input_size = head_config.get("input_size", self.input_size)
-        if len(output_layers) != 3:
+        if len(inputs_size) != 1:
             raise ValueError(
-                f"YuNetParser expects exactly 3 output layers, got {output_layers} layers."
+                f"Only one input supported for LaneDetectionParser, got {len(inputs_size)} inputs."
             )
+        self.input_size = inputs_size[0]
+        print(self.input_size)
         for output_layer in output_layers:
             if "loc" in output_layer:
                 self.loc_output_layer_name = output_layer
@@ -242,7 +246,7 @@ def run(self):
 
             # decode detections
             bboxes, keypoints, scores = decode_detections(
-                input_shape=self.input_size,
+                input_size=self.input_size,
                 loc=loc,
                 conf=conf,
                 iou=iou,
@@ -261,7 +265,7 @@ def run(self):
                 bboxes=bboxes,
                 keypoints=keypoints,
                 scores=scores,
-                input_shape=self.input_size,
+                input_size=self.input_size,
             )
 
             # run nms

@@ -1,3 +1,4 @@
+import inspect
 from typing import Dict
 
 import depthai as dai
@@ -53,7 +54,23 @@ def build(self, nn_archive: dai.NNArchive, head_index: int = None) -> Dict:
                 )
 
             head = decode_head(head)
-            parsers[index] = pipeline.create(parser).build(head)
+            sig = inspect.signature(parser.build)
+            if "inputs_size" in sig.parameters:
+                inputs_size = []
+                for input in nn_archive.getConfig().model.inputs:
+                    breakpoint()
+                    if input.layout == "NHWC":
+                        _, height, width, _ = input.shape
+                    elif input.layout == "NCHW":
+                        _, _, height, width = input.shape
+                    else:
+                        raise ValueError(
+                            f"Input layout {input.layout} not supported for input_size extraction."
+                        )
+                    inputs_size.append([width, height])
+                parsers[index] = pipeline.create(parser).build(head, inputs_size)
+            else:
+                parsers[index] = pipeline.create(parser).build(head)
 
         return parsers