Skip to content

Commit

Permalink
fix: input size setting (#126)
Browse files Browse the repository at this point in the history
* feat: remove default input_shape for LaneDetectionParser

* feat: add input size extraction to ParserGenerator

* feat: add automatic input_size detection to LaneDetectionParser

* feat: add automatic input_size detection to YuNetParser

* fix: BaseParser build method docstring

* fix: naming of input_shape to input_size

* fix: pre-commit

* fix: YuNet multiple inputs error description

* fix: typos

* feat: add input_shape and layout to head_config and remove inputs_size from parser arguments

* fix: pre-commit

* fix: head_config model inputs nesting
  • Loading branch information
jkbmrz authored Nov 13, 2024
1 parent 37a2d92 commit 869ed96
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 43 deletions.
2 changes: 2 additions & 0 deletions depthai_nodes/ml/parsers/base_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def build(self, head_config: Dict[str, Any]) -> "BaseParser":
@param head_config: A dictionary containing configuration details relevant to
the parser, including parameters and settings required for output parsing.
@type head_config: Dict[str, Any]
@return: The parser object with the head configuration set.
@rtype: BaseParser
"""
pass

Expand Down
54 changes: 35 additions & 19 deletions depthai_nodes/ml/parsers/lane_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ class LaneDetectionParser(BaseParser):
Griding number.
cls_num_per_lane : int
Number of points per lane.
input_shape : Tuple[int, int]
Input shape.
input_size : Tuple[int, int]
Input size (width,height).
Output Message/s
----------------
Expand All @@ -44,7 +44,7 @@ def __init__(
row_anchors: List[int] = None,
griding_num: int = None,
cls_num_per_lane: int = None,
input_shape: Tuple[int, int] = (288, 800),
input_size: Tuple[int, int] = None,
) -> None:
"""Initializes the lane detection parser node.
Expand All @@ -56,16 +56,16 @@ def __init__(
@type griding_num: int
@param cls_num_per_lane: Number of points per lane.
@type cls_num_per_lane: int
@param input_shape: Input shape.
@type input_shape: Tuple[int, int]
@param input_size: Input size (width,height).
@type input_size: Tuple[int, int]
"""
super().__init__()
self.output_layer_name = output_layer_name

self.row_anchors = row_anchors
self.griding_num = griding_num
self.cls_num_per_lane = cls_num_per_lane
self.input_shape = input_shape
self.input_size = input_size

def setOutputLayerName(self, output_layer_name: str) -> None:
"""Set the output layer name for the lane detection model.
Expand Down Expand Up @@ -109,19 +109,19 @@ def setClsNumPerLane(self, cls_num_per_lane: int) -> None:
raise ValueError("Number of points per lane must be an integer.")
self.cls_num_per_lane = cls_num_per_lane

def setInputShape(self, input_shape: Tuple[int, int]) -> None:
"""Set the input shape for the lane detection model.
def setInputSize(self, input_size: Tuple[int, int]) -> None:
"""Set the input size for the lane detection model.
@param input_shape: Input shape.
@type input_shape: Tuple[int, int]
@param input_size: Input size (width,height).
@type input_size: Tuple[int, int]
"""
if not isinstance(input_shape, tuple):
raise ValueError("Input shape must be a tuple.")
if len(input_shape) != 2:
raise ValueError("Input shape must be a tuple of two integers.")
if not all(isinstance(size, int) for size in input_shape):
raise ValueError("Input shape must be a tuple of integers.")
self.input_shape = input_shape
if not isinstance(input_size, tuple):
raise ValueError("Input size must be a tuple.")
if len(input_size) != 2:
raise ValueError("Input size must be a tuple of two integers.")
if not all(isinstance(size, int) for size in input_size):
raise ValueError("Input size must be a tuple of integers.")
self.input_size = input_size

def build(
self,
Expand All @@ -147,6 +147,22 @@ def build(
"cls_num_per_lane", self.cls_num_per_lane
)

inputs = head_config["model_inputs"]
if len(inputs) != 1:
raise ValueError(
f"Only one input supported for LaneDetectionParser, got {len(inputs)} inputs."
)
self.input_shape = inputs[0].get("shape")
self.layout = inputs[0].get("layout")
if self.layout == "NHWC":
self.input_size = (self.input_shape[2], self.input_shape[1])
elif self.layout == "NCHW":
self.input_size = (self.input_shape[3], self.input_shape[2])
else:
raise ValueError(
f"Input layout {self.layout} not supported for input_size extraction."
)

return self

def run(self):
Expand Down Expand Up @@ -180,8 +196,8 @@ def run(self):
anchors=self.row_anchors,
griding_num=self.griding_num,
cls_num_per_lane=self.cls_num_per_lane,
input_width=self.input_shape[1],
input_height=self.input_shape[0],
input_width=self.input_size[0],
input_height=self.input_size[1],
y=y,
)

Expand Down
26 changes: 13 additions & 13 deletions depthai_nodes/ml/parsers/utils/yunet.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,23 +20,23 @@ def manual_product(*args):


def generate_anchors(
input_shape: Tuple[int, int],
input_size: Tuple[int, int],
min_sizes: List[List[int]] = None,
strides: List[int] = None,
):
"""Generate a set of default bounding boxes, known as anchors.
The code is taken from https://github.com/Kazuhito00/YuNet-ONNX-TFLite-Sample/tree/main
@param input_shape: A tuple representing the width and height of the input image.
@type input_shape: Tuple[int, int]
@param input_size: A tuple representing the width and height of the input image.
@type input_size: Tuple[int, int]
@param min_sizes: A list of lists, where each inner list contains the minimum sizes of the anchors for different feature maps.
@type min_sizes List[List[int]]
@param strides: Strides for each feature map layer.
@type strides: List[int]
@return: Anchors.
@rtype: np.ndarray
"""
w, h = input_shape
w, h = input_size

if min_sizes is None:
min_sizes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
Expand Down Expand Up @@ -70,7 +70,7 @@ def generate_anchors(


def decode_detections(
input_shape: Tuple[int, int],
input_size: Tuple[int, int],
loc: np.ndarray,
conf: np.ndarray,
iou: np.ndarray,
Expand All @@ -80,8 +80,8 @@ def decode_detections(
Decodes the output of an object detection model by converting the model's predictions (localization, confidence, and IoU scores) into bounding boxes, keypoints, and scores.
The code is taken from https://github.com/Kazuhito00/YuNet-ONNX-TFLite-Sample/tree/main
@param input_shape: The shape of the input image (height, width).
@type input_shape: tuple
@param input_size: The size of the input image (height, width).
@type input_size: tuple
@param loc: The predicted locations (or offsets) of the bounding boxes.
@type loc: np.ndarray
@param conf: The predicted class confidence scores.
Expand All @@ -98,12 +98,12 @@ def decode_detections(
"""

w, h = input_shape
w, h = input_size

if variance is None:
variance = [0.1, 0.2]

anchors = generate_anchors(input_shape)
anchors = generate_anchors(input_size)

# Get scores
cls_scores = conf[:, 1]
Expand Down Expand Up @@ -168,7 +168,7 @@ def format_detections(
bboxes: np.ndarray,
keypoints: np.ndarray,
scores: np.ndarray,
input_shape: Tuple[int, int],
input_size: Tuple[int, int],
):
"""Format detections into a list of dictionaries.
Expand All @@ -178,16 +178,16 @@ def format_detections(
@type np.ndarray
@param scores: A numpy array of shape (N,) containing the scores.
@type np.ndarray
@param input_shape: A tuple representing the height and width of the input image.
@type input_shape: tuple
@param input_size: A tuple representing the width and height of the input image.
@type input_size: tuple
@return: A tuple of bboxes, keypoints, and scores.
- bboxes: NumPy array of shape (N, 4) containing the decoded bounding boxes in the format [x_min, y_min, width, height].
- keypoints: A NumPy array of shape (N, 10) containing the decoded keypoint coordinates for each anchor.
- scores: A NumPy array of shape (N, 1) containing the combined scores for each anchor.
@rtype: Tuple[np.ndarray, np.ndarray, np.ndarray]
"""

w, h = input_shape
w, h = input_size

bboxes = normalize_bboxes(bboxes, height=h, width=w)

Expand Down
28 changes: 19 additions & 9 deletions depthai_nodes/ml/parsers/yunet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class YuNetParser(DetectionParser):
max_det : int
Maximum number of detections to keep.
input_size : Tuple[int, int]
Input size.
Input size (width, height).
loc_output_layer_name: str
Name of the output layer containing the location predictions.
conf_output_layer_name: str
Expand Down Expand Up @@ -55,7 +55,7 @@ def __init__(
@type iou_threshold: float
@param max_det: Maximum number of detections to keep.
@type max_det: int
@param input_size: Input shape of the model (width, height).
@param input_size: Input size of the model (width, height).
@type input_size: Tuple[int, int]
@param loc_output_layer_name: Output layer name for the location predictions.
@type loc_output_layer_name: str
Expand Down Expand Up @@ -131,11 +131,6 @@ def build(

super().build(head_config)
output_layers = head_config.get("outputs", [])
self.input_size = head_config.get("input_size", self.input_size)
if len(output_layers) != 3:
raise ValueError(
f"YuNetParser expects exactly 3 output layers, got {output_layers} layers."
)
for output_layer in output_layers:
if "loc" in output_layer:
self.loc_output_layer_name = output_layer
Expand All @@ -147,6 +142,21 @@ def build(
raise ValueError(
f"Unexpected output layer {output_layer}. Only loc, conf, and iou output layers are supported."
)
inputs = head_config["model_inputs"]
if len(inputs) != 1:
raise ValueError(
f"Only one input supported for YuNetParser, got {len(inputs)} inputs."
)
self.input_shape = inputs[0].get("shape")
self.layout = inputs[0].get("layout")
if self.layout == "NHWC":
self.input_size = (self.input_shape[2], self.input_shape[1])
elif self.layout == "NCHW":
self.input_size = (self.input_shape[3], self.input_shape[2])
else:
raise ValueError(
f"Input layout {self.layout} not supported for input_size extraction."
)

return self

Expand Down Expand Up @@ -242,7 +252,7 @@ def run(self):

# decode detections
bboxes, keypoints, scores = decode_detections(
input_shape=self.input_size,
input_size=self.input_size,
loc=loc,
conf=conf,
iou=iou,
Expand All @@ -261,7 +271,7 @@ def run(self):
bboxes=bboxes,
keypoints=keypoints,
scores=scores,
input_shape=self.input_size,
input_size=self.input_size,
)

# run nms
Expand Down
9 changes: 7 additions & 2 deletions depthai_nodes/parser_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,13 @@ def build(self, nn_archive: dai.NNArchive, head_index: int = None) -> Dict:
f"Parser {parser_name} does not inherit from BaseParser class."
)

head = decode_head(head)
parsers[index] = pipeline.create(parser).build(head)
head_config = decode_head(head)
head_config["model_inputs"] = []
for input in nn_archive.getConfig().model.inputs:
head_config["model_inputs"].append(
{"shape": input.shape, "layout": input.layout}
)
parsers[index] = pipeline.create(parser).build(head_config)

return parsers

Expand Down

0 comments on commit 869ed96

Please sign in to comment.