Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: input size setting #126

Merged
merged 12 commits into from
Nov 13, 2024
2 changes: 2 additions & 0 deletions depthai_nodes/ml/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def build(self, head_config: Dict[str, Any]) -> "BaseParser":
@param head_config: A dictionary containing configuration details relevant to
the parser, including parameters and settings required for output parsing.
@type head_config: Dict[str, Any]
@return: The parser object with the head configuration set.
@rtype: BaseParser
"""
pass

Expand Down
46 changes: 27 additions & 19 deletions depthai_nodes/ml/parsers/lane_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class LaneDetectionParser(BaseParser):
Griding number.
cls_num_per_lane : int
Number of points per lane.
input_shape : Tuple[int, int]
Input shape.
input_size : Tuple[int, int]
Input size (width,height).

Output Message/s
----------------
Expand All @@ -44,7 +44,7 @@ def __init__(
row_anchors: List[int] = None,
griding_num: int = None,
cls_num_per_lane: int = None,
input_shape: Tuple[int, int] = (288, 800),
input_size: Tuple[int, int] = None,
) -> None:
"""Initializes the lane detection parser node.

Expand All @@ -56,16 +56,16 @@ def __init__(
@type griding_num: int
@param cls_num_per_lane: Number of points per lane.
@type cls_num_per_lane: int
@param input_shape: Input shape.
@type input_shape: Tuple[int, int]
@param input_size: Input size (width,height).
@type input_size: Tuple[int, int]
"""
super().__init__()
self.output_layer_name = output_layer_name

self.row_anchors = row_anchors
self.griding_num = griding_num
self.cls_num_per_lane = cls_num_per_lane
self.input_shape = input_shape
self.input_size = input_size

def setOutputLayerName(self, output_layer_name: str) -> None:
"""Set the output layer name for the lane detection model.
Expand Down Expand Up @@ -109,32 +109,40 @@ def setClsNumPerLane(self, cls_num_per_lane: int) -> None:
raise ValueError("Number of points per lane must be an integer.")
self.cls_num_per_lane = cls_num_per_lane

def setInputShape(self, input_shape: Tuple[int, int]) -> None:
"""Set the input shape for the lane detection model.
def setInputSize(self, input_size: Tuple[int, int]) -> None:
"""Set the input size for the lane detection model.

@param input_shape: Input shape.
@type input_shape: Tuple[int, int]
@param input_size: Input size (width,height).
@type input_size: Tuple[int, int]
"""
if not isinstance(input_shape, tuple):
raise ValueError("Input shape must be a tuple.")
if len(input_shape) != 2:
raise ValueError("Input shape must be a tuple of two integers.")
if not all(isinstance(size, int) for size in input_shape):
raise ValueError("Input shape must be a tuple of integers.")
self.input_shape = input_shape
if not isinstance(input_size, tuple):
raise ValueError("Input size must be a tuple.")
if len(input_size) != 2:
raise ValueError("Input size must be a tuple of two integers.")
if not all(isinstance(size, int) for size in input_size):
raise ValueError("Input size must be a tuple of integers.")
self.input_size = input_size

def build(
self,
head_config: Dict[str, Any],
inputs_size: List[List[int]],
) -> "LaneDetectionParser":
"""Configures the parser.

@param head_config: The head configuration for the parser.
@type head_config: Dict[str, Any]
@param inputs_size: Model inputs size.
@type inputs_size: List[List[int]]
@return: The parser object with the head configuration set.
@rtype: LaneDetectionParser
"""

if len(inputs_size) != 1:
raise ValueError(
f"Only one input supported for LaneDetectionParser, got {len(inputs_size)} inputs."
)
self.input_size = inputs_size[0]
output_layers = head_config.get("outputs", [])
if len(output_layers) != 1:
raise ValueError(
Expand Down Expand Up @@ -180,8 +188,8 @@ def run(self):
anchors=self.row_anchors,
griding_num=self.griding_num,
cls_num_per_lane=self.cls_num_per_lane,
input_width=self.input_shape[1],
input_height=self.input_shape[0],
input_width=self.input_size[0],
input_height=self.input_size[1],
y=y,
)

Expand Down
26 changes: 13 additions & 13 deletions depthai_nodes/ml/parsers/utils/yunet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,23 @@ def manual_product(*args):


def generate_anchors(
input_shape: Tuple[int, int],
input_size: Tuple[int, int],
min_sizes: List[List[int]] = None,
strides: List[int] = None,
):
"""Generate a set of default bounding boxes, known as anchors.
The code is taken from https://github.com/Kazuhito00/YuNet-ONNX-TFLite-Sample/tree/main

@param input_shape: A tuple representing the width and height of the input image.
@type input_shape: Tuple[int, int]
@param input_size: A tuple representing the width and height of the input image.
@type input_size: Tuple[int, int]
@param min_sizes: A list of lists, where each inner list contains the minimum sizes of the anchors for different feature maps.
@type min_sizes List[List[int]]
@param strides: Strides for each feature map layer.
@type strides: List[int]
@return: Anchors.
@rtype: np.ndarray
"""
w, h = input_shape
w, h = input_size

if min_sizes is None:
min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
Expand Down Expand Up @@ -70,7 +70,7 @@ def generate_anchors(


def decode_detections(
input_shape: Tuple[int, int],
input_size: Tuple[int, int],
loc: np.ndarray,
conf: np.ndarray,
iou: np.ndarray,
Expand All @@ -80,8 +80,8 @@ def decode_detections(
Decodes the output of an object detection model by converting the model's predictions (localization, confidence, and IoU scores) into bounding boxes, keypoints, and scores.
The code is taken from https://github.com/Kazuhito00/YuNet-ONNX-TFLite-Sample/tree/main

@param input_shape: The shape of the input image (height, width).
@type input_shape: tuple
@param input_size: The size of the input image (height, width).
@type input_size: tuple
@param loc: The predicted locations (or offsets) of the bounding boxes.
@type loc: np.ndarray
@param conf: The predicted class confidence scores.
Expand All @@ -98,12 +98,12 @@ def decode_detections(

"""

w, h = input_shape
w, h = input_size

if variance is None:
variance = [0.1, 0.2]

anchors = generate_anchors(input_shape)
anchors = generate_anchors(input_size)

# Get scores
cls_scores = conf[:, 1]
Expand Down Expand Up @@ -168,7 +168,7 @@ def format_detections(
bboxes: np.ndarray,
keypoints: np.ndarray,
scores: np.ndarray,
input_shape: Tuple[int, int],
input_size: Tuple[int, int],
):
"""Format detections into a list of dictionaries.

Expand All @@ -178,16 +178,16 @@ def format_detections(
@type np.ndarray
@param scores: A numpy array of shape (N,) containing the scores.
@type np.ndarray
@param input_shape: A tuple representing the height and width of the input image.
@type input_shape: tuple
@param input_size: A tuple representing the height and width of the input image.
@type input_size: tuple
@return: A tuple of bboxes, keypoints, and scores.
- bboxes: NumPy array of shape (N, 4) containing the decoded bounding boxes in the format [x_min, y_min, width, height].
- keypoints: A NumPy array of shape (N, 10) containing the decoded keypoint coordinates for each anchor.
- scores: A NumPy array of shape (N, 1) containing the combined scores for each anchor.
@rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
"""

w, h = input_shape
w, h = input_size

bboxes = normalize_bboxes(bboxes, height=h, width=w)

Expand Down
20 changes: 12 additions & 8 deletions depthai_nodes/ml/parsers/yunet.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Tuple
from typing import Any, Dict, List, Tuple

import depthai as dai
import numpy as np
Expand All @@ -22,7 +22,7 @@ class YuNetParser(DetectionParser):
max_det : int
Maximum number of detections to keep.
input_size : Tuple[int, int]
Input size.
Input size (width, height).
loc_output_layer_name: str
Name of the output layer containing the location predictions.
conf_output_layer_name: str
Expand Down Expand Up @@ -55,7 +55,7 @@ def __init__(
@type iou_threshold: float
@param max_det: Maximum number of detections to keep.
@type max_det: int
@param input_size: Input shape of the model (width, height).
@param input_size: Input size of the model (width, height).
@type input_size: Tuple[int, int]
@param loc_output_layer_name: Output layer name for the location predictions.
@type loc_output_layer_name: str
Expand Down Expand Up @@ -120,22 +120,26 @@ def setOutputLayerIou(self, iou_output_layer_name: str) -> None:
def build(
self,
head_config: Dict[str, Any],
inputs_size: List[List[int]],
) -> "YuNetParser":
"""Configures the parser.

@param head_config: The head configuration for the parser.
@type head_config: Dict[str, Any]
@param inputs_size: Model inputs size.
@type inputs_size: List[List[int]]
@return: The parser object with the head configuration set.
@rtype: YuNetParser
"""

super().build(head_config)
output_layers = head_config.get("outputs", [])
self.input_size = head_config.get("input_size", self.input_size)
if len(output_layers) != 3:
if len(inputs_size) != 1:
raise ValueError(
f"YuNetParser expects exactly 3 output layers, got {output_layers} layers."
f"Only one input supported for LaneDetectionParser, got {len(inputs_size)} inputs."
)
self.input_size = inputs_size[0]
print(self.input_size)
for output_layer in output_layers:
if "loc" in output_layer:
self.loc_output_layer_name = output_layer
Expand Down Expand Up @@ -242,7 +246,7 @@ def run(self):

# decode detections
bboxes, keypoints, scores = decode_detections(
input_shape=self.input_size,
input_size=self.input_size,
loc=loc,
conf=conf,
iou=iou,
Expand All @@ -261,7 +265,7 @@ def run(self):
bboxes=bboxes,
keypoints=keypoints,
scores=scores,
input_shape=self.input_size,
input_size=self.input_size,
)

# run nms
Expand Down
19 changes: 18 additions & 1 deletion depthai_nodes/parser_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
from typing import Dict

import depthai as dai
Expand Down Expand Up @@ -53,7 +54,23 @@ def build(self, nn_archive: dai.NNArchive, head_index: int = None) -> Dict:
)

head = decode_head(head)
parsers[index] = pipeline.create(parser).build(head)
sig = inspect.signature(parser.build)
if "inputs_size" in sig.parameters:
inputs_size = []
for input in nn_archive.getConfig().model.inputs:
breakpoint()
if input.layout == "NHWC":
_, height, width, _ = input.shape
elif input.layout == "NCHW":
_, _, height, width = input.shape
else:
raise ValueError(
f"Input layout {input.layout} not supported for input_size extraction."
)
inputs_size.append([width, height])
parsers[index] = pipeline.create(parser).build(head, inputs_size)
else:
parsers[index] = pipeline.create(parser).build(head)

return parsers

Expand Down
Loading