Examples compatible with parsers v2. (#117)

* Fix scores check. * Correctly instantiate mask. * Clip values. * Fix examples. * added nicer import for ParsingNeuralNetwork * pre-commit fix * [Automated] Updated coverage badge --------- Co-authored-by: klemen1999 <klemenskrlj8@gmail.com> Co-authored-by: GitHub Actions <actions@github.com>
luxonis · Oct 25, 2024 · 92a4d92 · 92a4d92
1 parent a90fc0c
commit 92a4d92
Show file tree

Hide file tree

Showing 20 changed files with 166 additions and 383 deletions.
diff --git a/.github/labeler.yaml b/.github/labeler.yaml
@@ -28,6 +28,4 @@ fix:
     - 'hotfix/*'
     - 'issue/*'
     - 'bugfix/*'
-    - 'patch/*'
-release:
-  - base-branch: 'main'
+    - 'patch/*'
diff --git a/depthai_nodes/__init__.py b/depthai_nodes/__init__.py
@@ -1,3 +1,5 @@
 from .ml.parsers import *
+from .parser_generator import ParserGenerator  # noqa: F401
+from .parsing_neural_network import ParsingNeuralNetwork  # noqa: F401
 
 __version__ = "0.0.2"
diff --git a/depthai_nodes/ml/messages/classification.py b/depthai_nodes/ml/messages/classification.py
@@ -69,6 +69,6 @@ def scores(self, value: NDArray[np.float32]):
             raise TypeError(f"Scores must be a np.ndarray, instead got {type(value)}.")
         if value.ndim != 1:
             raise ValueError("Scores must be a 1D a np.ndarray.")
-        if value.dtype != np.float32:
+        if value.size > 0 and value.dtype != np.float32:
             raise ValueError("Scores must be a np.ndarray of floats.")
         self._scores = value
diff --git a/depthai_nodes/ml/messages/img_detections.py b/depthai_nodes/ml/messages/img_detections.py
@@ -256,7 +256,7 @@ def __init__(self) -> None:
         """Initializes the ImgDetectionsExtended object."""
         super().__init__()
         self._detections: List[ImgDetectionExtended] = []
-        self._masks: SegmentationMask = np.array([])
+        self._masks: SegmentationMask = SegmentationMask()
 
     @property
     def detections(self) -> List[ImgDetectionExtended]:

diff --git a/depthai_nodes/ml/parsers/keypoints.py b/depthai_nodes/ml/parsers/keypoints.py
@@ -156,6 +156,8 @@ def run(self):
 
             keypoints /= self.scale_factor
 
+            keypoints = np.clip(keypoints, 0, 1)
+
             msg = create_keypoints_message(keypoints)
             msg.setTimestamp(output.getTimestamp())
 

diff --git a/depthai_nodes/ml/parsers/mediapipe_palm_detection.py b/depthai_nodes/ml/parsers/mediapipe_palm_detection.py
@@ -189,6 +189,9 @@ def run(self):
             points = points.astype(float) / self.scale
             bboxes = bboxes.astype(float) / self.scale
 
+            bboxes = np.clip(bboxes, 0, 1)
+            points = np.clip(points, 0, 1)
+
             detections_msg = create_detection_message(
                 bboxes=bboxes, scores=scores, angles=angles, keypoints=points
             )

diff --git a/depthai_nodes/ml/parsers/scrfd.py b/depthai_nodes/ml/parsers/scrfd.py
@@ -212,6 +212,7 @@ def run(self):
                 nms_threshold=self.iou_threshold,
             )
             bboxes = xyxy_to_xywh(bboxes)
+            bboxes = np.clip(bboxes, 0, 1)
             detection_msg = create_detection_message(
                 bboxes=bboxes, scores=scores, keypoints=keypoints
             )

diff --git a/depthai_nodes/ml/parsers/utils/ppdet.py b/depthai_nodes/ml/parsers/utils/ppdet.py
@@ -195,10 +195,11 @@ def parse_paddle_detection_outputs(
         angles.append(box[4])
 
     boxes = np.array(boxes)
-    boxes[:, 0] /= width
-    boxes[:, 1] /= height
-    boxes[:, 2] /= width
-    boxes[:, 3] /= height
-    corners = np.clip(corners, 0, 1)
+    if boxes.size > 0:
+        boxes[:, 0] /= width
+        boxes[:, 1] /= height
+        boxes[:, 2] /= width
+        boxes[:, 3] /= height
+        corners = np.clip(corners, 0, 1)
 
     return boxes, np.array(angles), np.array(corners_array), np.array(scores)
diff --git a/depthai_nodes/ml/parsers/yolo.py b/depthai_nodes/ml/parsers/yolo.py
@@ -332,6 +332,9 @@ def run(self):
                     # Fill the final mask with the instance values
                     final_mask[resized_mask > 0] = i
 
+            bboxes = np.array(bboxes)
+            bboxes = np.clip(bboxes, 0, 1)
+
             if mode == self._KPTS_MODE:
                 additional_output = np.array(additional_output)
                 keypoints = np.array([])
@@ -340,23 +343,24 @@ def run(self):
                     keypoints = additional_output[:, :, :2]
                     keypoints_scores = additional_output[:, :, 2]
 
+                keypoints = np.clip(keypoints, 0, 1)
                 detections_message = create_detection_message(
-                    bboxes=np.array(bboxes),
+                    bboxes=bboxes,
                     scores=np.array(scores),
                     labels=np.array(labels),
                     keypoints=keypoints,
                     keypoints_scores=keypoints_scores,
                 )
             elif mode == self._SEG_MODE:
                 detections_message = create_detection_message(
-                    bboxes=np.array(bboxes),
+                    bboxes=bboxes,
                     scores=np.array(scores),
                     labels=np.array(labels),
                     masks=final_mask,
                 )
             else:
                 detections_message = create_detection_message(
-                    bboxes=np.array(bboxes),
+                    bboxes=bboxes,
                     scores=np.array(scores),
                     labels=np.array(labels),
                 )

diff --git a/depthai_nodes/ml/parsers/yunet.py b/depthai_nodes/ml/parsers/yunet.py
@@ -1,6 +1,7 @@
 from typing import Any, Dict, Tuple
 
 import depthai as dai
+import numpy as np
 
 from ..messages.creators import create_detection_message
 from .detection import DetectionParser
@@ -277,6 +278,9 @@ def run(self):
             keypoints = keypoints[keep_indices]
             scores = scores[keep_indices]
 
+            bboxes = np.clip(bboxes, 0, 1)
+            keypoints = np.clip(keypoints, 0, 1)
+
             detections_message = create_detection_message(
                 bboxes=bboxes, scores=scores, keypoints=keypoints
             )

diff --git a/examples/main.py b/examples/main.py
@@ -1,10 +1,11 @@
 import depthai as dai
 from utils.arguments import initialize_argparser, parse_fps_limit, parse_model_slug
 from utils.model import get_input_shape, get_model_from_hub, get_parser
-from utils.parser import setup_parser
 from utils.xfeat import xfeat_mono, xfeat_stereo
 from visualization.visualize import visualize
 
+from depthai_nodes.parser_generator import ParserGenerator
+
 # Initialize the argument parser
 arg_parser, args = initialize_argparser()
 
@@ -66,8 +67,15 @@
                 nn_archive,
             )
 
-        parser = pipeline.create(parser_class)
-        setup_parser(parser, nn_archive, parser_name)
+        parsers = pipeline.create(ParserGenerator).build(nn_archive)
+
+        if len(parsers) == 0:
+            raise ValueError("No parsers were generated.")
+
+        if len(parsers) > 1:
+            raise ValueError("Only models with one parser are supported.")
+
+        parser = parsers[0]
 
         # Linking
         network.out.link(parser.input)

diff --git a/examples/utils/parser.py b/examples/utils/parser.py
diff --git a/examples/utils/xfeat.py b/examples/utils/xfeat.py
@@ -27,8 +27,8 @@ def xfeat_mono(nn_archive: dai.NNArchive, input_shape: List[int], fps_limit: int
 
         # Set up parser
         parser = XFeatMonoParser()
-        parser.setOriginalSize(input_shape)
-        parser.setInputSize(input_shape)
+        parser.setOriginalSize(tuple(input_shape))
+        parser.setInputSize(tuple(input_shape))
         parser.setMaxKeypoints(2048)
 
         # Linking
@@ -105,8 +105,8 @@ def xfeat_stereo(nn_archive: dai.NNArchive, input_shape: List[int], fps_limit: i
         right_network.setNumInferenceThreads(2)
 
         parser = pipeline.create(XFeatStereoParser)
-        parser.setOriginalSize(input_shape)
-        parser.setInputSize(input_shape)
+        parser.setOriginalSize(tuple(input_shape))
+        parser.setInputSize(tuple(input_shape))
         parser.setMaxKeypoints(512)
 
         left_network.out.link(parser.reference_input)