Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restructure; Add Parsers, Util Functions and Message Creators #4

Merged
merged 42 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
b08d354
add util function for image message creation
jkbmrz May 29, 2024
f45074d
add util function for detections message creation
jkbmrz May 29, 2024
828cf60
populate message creation __init__ file
jkbmrz May 29, 2024
a88bddc
populate utils __init__ file
jkbmrz May 29, 2024
328a339
add util function for reverting normalization
jkbmrz May 29, 2024
eb2a411
adjust dncnn3 postprocessing node with new utils functions
jkbmrz May 29, 2024
940568e
adjust zero_dce postprocessing node with new utils functions
jkbmrz May 29, 2024
c96a563
add util function for monocular depth message creation
jkbmrz Jun 6, 2024
4c5dc99
adjust util function for detections message creation
jkbmrz Jun 6, 2024
80598a3
add util function for decoding detections
jkbmrz Jun 6, 2024
e9c4bc6
adjust yunet postprocessing node with new utils functions
jkbmrz Jun 6, 2024
0f29aa5
adjust depth_anything postprocessing node with new utils functions
jkbmrz Jun 6, 2024
71e8cca
fix imports
jkbmrz Jun 6, 2024
c8769e1
adjust utils __init__.py
jkbmrz Jun 6, 2024
ca9db68
fix imports
jkbmrz Jun 6, 2024
cc9b070
remove nms.py from utils
jkbmrz Jun 6, 2024
e5113f7
Merge branch 'main' into feature/add_node_utils
jkbmrz Jun 6, 2024
fe72fc4
make a generic monocular depth parser
jkbmrz Jun 6, 2024
9531ac1
add generic monocular depth parser
jkbmrz Jun 6, 2024
0b126c8
fix imports
jkbmrz Jun 6, 2024
7f23a0a
add generic image-to-image postprocessing node
jkbmrz Jun 19, 2024
6ae043d
fix naming
jkbmrz Jun 19, 2024
a090353
feat: add normalize arg to unnormalizate function
jkbmrz Jun 19, 2024
f965988
refactor: move image format detection to message creation step
jkbmrz Jun 19, 2024
92f2ea4
feature: add automatic detection of strides and input_size for YunetP…
jkbmrz Jun 19, 2024
4813db6
refactor: move automatic detection of number of cols and rows to deco…
jkbmrz Jun 19, 2024
2e455dd
refactor: remove unneeded np.expand of cls dim
jkbmrz Jun 19, 2024
5099601
refactor: vectorize detection decoding
jkbmrz Jun 19, 2024
3939204
test: check bbox format during detection message creation
jkbmrz Jun 19, 2024
edbb000
fix: correct bbox format check
jkbmrz Jun 19, 2024
17ce753
feature: merge the old depth message with monocular depth message cre…
jkbmrz Jun 19, 2024
fe1778a
feature: add depth_type setters to MonocularDepthParser
jkbmrz Jun 19, 2024
883f324
feature: allow empty detections in detections message creation
jkbmrz Jun 19, 2024
1fb7272
fix: remove old depth message imports
jkbmrz Jun 19, 2024
c1d15c7
refactor: rename ImageOutputParser file
jkbmrz Jun 20, 2024
5748d18
fix: ImageOutputParser output shape check to allow non-batched outputs
jkbmrz Jun 20, 2024
2c9970f
refactor: vectorize detection dicts construction
jkbmrz Jun 20, 2024
083d0f2
refactor: move and rename message creation functions dir
jkbmrz Jun 20, 2024
96064f2
fix: various imports
jkbmrz Jun 20, 2024
7b6ae31
refactor: rename (monocular) depth message
jkbmrz Jun 20, 2024
99f2803
style: remove empty file
jkbmrz Jun 20, 2024
0eaa547
refactor: rename creation_functions to creators
jkbmrz Jun 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions ml/postprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
from .zero_dce import ZeroDCEParser
from .dncnn3 import DnCNN3Parser
from .depth_anything import DepthAnythingParser
from .image_to_image import ImageOutputParser
from .monocular_depth import MonocularDepthParser
from .yunet import YuNetParser
from .mediapipe_hand_detection import MPHandDetectionParser
from .mediapipe_hand_landmarker import MPHandLandmarkParser
from .scrfd import SCRFDParser
from .segmentation import SegmentationParser

__all__ = [
'ZeroDCEParser',
'DnCNN3Parser',
'DepthAnythingParser',
'ImageOutputParser',
'MonocularDepthParser',
'YuNetParser',
'MPHandDetectionParser',
'MPHandLandmarkParser',
Expand Down
46 changes: 0 additions & 46 deletions ml/postprocessing/depth_anything.py

This file was deleted.

41 changes: 0 additions & 41 deletions ml/postprocessing/dncnn3.py

This file was deleted.

52 changes: 52 additions & 0 deletions ml/postprocessing/image_to_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import depthai as dai

from .utils import unnormalize_image
from .utils.message_creation import create_image_message


class ImageOutputParser(dai.node.ThreadedHostNode):
def __init__(self, output_is_bgr=False):
dai.node.ThreadedHostNode.__init__(self)
self.input = dai.Node.Input(self)
self.out = dai.Node.Output(self)

self.output_is_bgr = output_is_bgr

def setBGROutput(self):
self.output_is_bgr = True

def run(self):
"""
Postprocessing logic for image-to-image models (e.g. DnCNN3, zero-dce etc.).

Returns:
dai.ImgFrame: uint8, grayscale HW / colorscale HWC BGR image.
"""

while self.isRunning():

try:
output: dai.NNData = self.input.get()
except dai.MessageQueue.QueueException as e:
break # Pipeline was stopped

output_layer_names = output.getAllLayerNames()
if len(output_layer_names) != 1:
raise ValueError(
f"Expected 1 output layer, got {len(output_layer_names)}."
)
output = output.getTensor(output_layer_names[0])
if len(output.shape) != 4:
raise ValueError(
f"Unexpected 4-dimensional output, got {len(output.shape)}-dimensional",
)

image = output[0]
image = unnormalize_image(image)

image_message = create_image_message(
image=image,
is_bgr=self.output_is_bgr,
)

self.out.send(image_message)
48 changes: 48 additions & 0 deletions ml/postprocessing/monocular_depth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import depthai as dai

from .utils.message_creation import create_monocular_depth_message


class MonocularDepthParser(dai.node.ThreadedHostNode):
def __init__(self, depth_type="relative"):
dai.node.ThreadedHostNode.__init__(self)
self.input = dai.Node.Input(self)
self.out = dai.Node.Output(self)

self.depth_type = depth_type

def setRelativeDepthType(self):
self.depth_type = "relative"

def setMetricDepthType(self):
self.depth_type = "metric"

def run(self):
"""
Postprocessing logic for a model with monocular depth output (e.g.Depth Anything model).

Returns:
dai.ImgFrame: uint16, HW depth map.
"""

while self.isRunning():

try:
output: dai.NNData = self.input.get()
except dai.MessageQueue.QueueException as e:
break # Pipeline was stopped

output_layer_names = output.getAllLayerNames()
if len(output_layer_names) != 1:
raise ValueError(
f"Expected 1 output layer, got {len(output_layer_names)}."
)
output = output.getTensor(output_layer_names[0])

depth_map = output[0]

depth_message = create_monocular_depth_message(
depth_map=depth_map,
depth_type=self.depth_type,
)
self.out.send(depth_message)
2 changes: 2 additions & 0 deletions ml/postprocessing/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .denormalize import unnormalize_image
from .decode_detections import decode_detections
89 changes: 89 additions & 0 deletions ml/postprocessing/utils/decode_detections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import numpy as np
from typing import List, Dict, Any
import time


def decode_detections(
input_size: float,
stride: int,
score_threshold: float,
cls: np.ndarray,
obj: np.ndarray,
bbox: np.ndarray,
kps: np.ndarray,
) -> List[Dict[str, Any]]:
"""
Decode the detections from neural network output tensors.

Args:
input_size (float): The input size of the model that produced the detections, (width, height).
stride (int): The stride used in the detection grid.
rows (int): Number of rows in the detection grid.
cols (int): Number of columns in the detection grid.
score_threshold (float): Minimum score threshold for a detection to be considered valid.
cls (np.ndarray): 2D array of class scores for each grid cell, shape (grid_size, num_classes).
obj (np.ndarray): 1D array of objectness scores for each grid cell, shape (grid_size,).
bbox (np.ndarray): 2D array of bounding box coordinates, shape (grid_size, 4).
kps (np.ndarray): 2D array of keypoint coordinates, shape (grid_size, num_keypoints * 2).

Returns:
List[Dict[str, Any]]: A list of detections, where each detection is a dictionary containing:
- "bbox": [x1, y1, width, height] (relative bounding box coordinates)
- "label": int (class label)
- "keypoints": List[float] (relative keypoint coordinates)
- "score": float (detection score)
"""

input_width, input_height = input_size
cols = int(input_size[0] / stride) # w/stride
rows = int(input_size[1] / stride) # h/stride

# Compute the indices
r, c = np.meshgrid(np.arange(rows), np.arange(cols), indexing='ij')
idx = r * cols + c

# Decode scores
cls_scores = np.clip(cls[idx], 0, 1)
obj_scores = np.clip(obj[idx], 0, 1)
max_cls_scores = np.max(cls_scores, axis=-1)
scores = np.sqrt(max_cls_scores * obj_scores)

# Get the labels with the highest score
labels = np.argmax(cls_scores, axis=-1)

# Decode bounding boxes
cx = (c + bbox[idx, 0]) * stride
cy = (r + bbox[idx, 1]) * stride
w = np.exp(bbox[idx, 2]) * stride
h = np.exp(bbox[idx, 3]) * stride
x1 = cx - w / 2
y1 = cy - h / 2

# Decode keypoints
lx = (kps[idx, ::2] + c[:, :, None]) * stride
ly = (kps[idx, 1::2] + r[:, :, None]) * stride
keypoints = np.stack((lx / input_width, ly / input_height), axis=-1)

# Filter detections based on score_threshold
mask = scores > score_threshold

# Append detection results
detections = []
for i in range(rows):
for j in range(cols):
if mask[i, j]:
detection = {
"bbox": [
x1[i, j] / input_width,
y1[i, j] / input_height,
w[i, j] / input_width,
h[i, j] / input_height,
],
"label": int(labels[i, j]),
"keypoints": [(x,y) for x,y in keypoints[i,j]], #keypoints[i, j].tolist(),
"score": float(scores[i, j]),
}

detections.append(detection)

return detections
26 changes: 26 additions & 0 deletions ml/postprocessing/utils/denormalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np


def unnormalize_image(image, normalize=True):
"""
Un-normalize an image tensor by scaling it to the [0, 255] range.

Args:
image (np.ndarray): The normalized image tensor of shape (H, W, C) or (C, H, W).
normalize (bool, optional): Whether to normalize the image tensor. Defaults to True.

Returns:
np.ndarray: The un-normalized image.
"""
# Normalize the image tensor to the range [0, 1]
if normalize:
image = (image - image.min()) / (image.max() - image.min())

# Scale to [0, 255] and clip the values to be in the proper range
image = image * 255.0
image = np.clip(image, 0, 255)

# Convert to uint8
image = image.astype(np.uint8)

return image
8 changes: 5 additions & 3 deletions ml/postprocessing/utils/message_creation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .depth import create_depth_message
from .image import create_image_message
from .segmentation import create_segmentation_message
from .keypoints import create_hand_keypoints_message
from .detection import create_detection_message
from .monocular_depth import create_monocular_depth_message

__all__ = [
"create_depth_message",
"create_image_message",
"create_segmentation_message",
"create_hand_keypoints_message",
"create_detection_message",
]
"create_monocular_depth_message",
]
28 changes: 0 additions & 28 deletions ml/postprocessing/utils/message_creation/depth.py

This file was deleted.

Loading