Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New model parsers. #5

Merged
merged 14 commits into from
Jun 28, 2024
5 changes: 5 additions & 0 deletions ml/messages/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from .img_detections import ImgDetectionWithKeypoints, ImgDetectionsWithKeypoints
from .keypoints import HandKeypoints, Keypoints
from .matched_points import MatchedPoints
from .lines import Line, Lines

__all__ = [
"ImgDetectionWithKeypoints",
"ImgDetectionsWithKeypoints",
"HandKeypoints",
"Keypoints",
"MatchedPoints",
"Line",
"Lines",
]
8 changes: 6 additions & 2 deletions ml/messages/creators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from .image import create_image_message
from .segmentation import create_segmentation_message
from .keypoints import create_hand_keypoints_message
from .detection import create_detection_message
from .keypoints import create_hand_keypoints_message, create_keypoints_message
from .detection import create_detection_message, create_line_detection_message
from .matched_points import create_matched_points_message
from .depth import create_depth_message

__all__ = [
Expand All @@ -10,4 +11,7 @@
"create_hand_keypoints_message",
"create_detection_message",
"create_depth_message",
"create_line_detection_message",
"create_matched_points_message",
"create_keypoints_message",
]
50 changes: 50 additions & 0 deletions ml/messages/creators/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from ...messages import (
ImgDetectionWithKeypoints,
ImgDetectionsWithKeypoints,
Line,
Lines,
)


Expand Down Expand Up @@ -113,3 +115,51 @@ def create_detection_message(
detections_msg = img_detections()
detections_msg.detections = detections
return detections_msg

def create_line_detection_message(lines: np.ndarray, scores: np.ndarray):
"""
Create a message for the line detection. The message contains the lines and confidence scores of detected lines.

Args:
lines (np.ndarray): Detected lines of shape (N,4) meaning [...,[x_start, y_start, x_end, y_end],...].
scores (np.ndarray): Confidence scores of detected lines of shape (N,).

Returns:
dai.Lines: Message containing the lines and confidence scores of detected lines.
"""

# checks for lines
if not isinstance(lines, np.ndarray):
raise ValueError(f"lines should be numpy array, got {type(lines)}.")
if len(lines) != 0:
if len(lines.shape) != 2:
raise ValueError(
f"lines should be of shape (N,4) meaning [...,[x_start, y_start, x_end, y_end],...], got {lines.shape}."
)
if lines.shape[1] != 4:
raise ValueError(
f"lines 2nd dimension should be of size 4 e.g. [x_start, y_start, x_end, y_end] got {lines.shape[1]}."
)

# checks for scores
if not isinstance(scores, np.ndarray):
raise ValueError(f"scores should be numpy array, got {type(scores)}.")
if len(scores) != 0:
if len(scores.shape) != 1:
raise ValueError(f"scores should be of shape (N,) meaning, got {scores.shape}.")
if scores.shape[0] != lines.shape[0]:
raise ValueError(
f"scores should have same length as lines, got {scores.shape[0]} and {lines.shape[0]}."
)

line_detections = []
for i, line in enumerate(lines):
line_detection = Line()
line_detection.start_point = dai.Point2f(line[0], line[1])
line_detection.end_point = dai.Point2f(line[2], line[3])
line_detection.confidence = float(scores[i])
line_detections.append(line_detection)

lines_msg = Lines()
lines_msg.lines = line_detections
return lines_msg
55 changes: 53 additions & 2 deletions ml/messages/creators/keypoints.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import depthai as dai
import numpy as np
from typing import List
from ...messages import HandKeypoints
from ...messages import HandKeypoints, Keypoints

def create_hand_keypoints_message(hand_keypoints: np.ndarray, handedness: float, confidence: float, confidence_threshold: float) -> HandKeypoints:
"""
Expand Down Expand Up @@ -41,4 +41,55 @@ def create_hand_keypoints_message(hand_keypoints: np.ndarray, handedness: float,
points.append(pt)
hand_keypoints_msg.keypoints = points

return hand_keypoints_msg
return hand_keypoints_msg

def create_keypoints_message(keypoints: np.ndarray, scores: np.ndarray = None, confidence_threshold: float = None) -> Keypoints:
"""
Create a message for the keypoints. The message contains 2D or 3D coordinates of the detected keypoints.

Args:
keypoints (np.ndarray): Detected keypoints of shape (N,2 or 3) meaning [...,[x, y],...] or [...,[x, y, z],...].
scores (np.ndarray): Confidence scores of the detected keypoints.
confidence_threshold (float): Confidence threshold for the keypoints.

Returns:
Keypoints: Message containing 2D or 3D coordinates of the detected keypoints.
"""

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note is that his might require reshaping depending on the output as mentioned in one of the comments.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reshape is handled in the parser.

if not isinstance(keypoints, np.ndarray):
raise ValueError(f"keypoints should be numpy array, got {type(keypoints)}.")
if len(keypoints.shape) != 2:
raise ValueError(f"keypoints should be of shape (N,2 or 3) got {keypoints.shape}.")
if keypoints.shape[1] not in [2, 3]:
raise ValueError(f"keypoints 2nd dimension should be of size 2 or 3 e.g. [x, y] or [x, y, z], got {keypoints.shape[1]}.")
if scores is not None:
if not isinstance(scores, np.ndarray):
raise ValueError(f"scores should be numpy array, got {type(scores)}.")
if len(scores.shape) != 1:
raise ValueError(f"scores should be of shape (N,) meaning [...,score,...], got {scores.shape}.")
if keypoints.shape[0] != scores.shape[0]:
raise ValueError(f"keypoints and scores should have the same length, got {keypoints.shape[0]} and {scores.shape[0]}.")
if confidence_threshold is None:
raise ValueError(f"confidence_threshold should be provided when scores are provided.")
if confidence_threshold is not None:
if not isinstance(confidence_threshold, float):
raise ValueError(f"confidence_threshold should be float, got {type(confidence_threshold)}.")
if scores is None:
raise ValueError(f"confidence_threshold should be provided when scores are provided.")

use_3d = keypoints.shape[1] == 3

keypoints_msg = Keypoints()
points = []
for i, keypoint in enumerate(keypoints):
if scores is not None:
if scores[i] < confidence_threshold:
continue
pt = dai.Point3f()
pt.x = keypoint[0]
pt.y = keypoint[1]
pt.z = keypoint[2] if use_3d else 0
points.append(pt)

keypoints_msg.keypoints = points
return keypoints_msg
34 changes: 34 additions & 0 deletions ml/messages/creators/matched_points.py

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could instead do something like: https://docs.luxonis.com/software/depthai-components/nodes/feature_tracker.

We would likely be matching features across the time, so perhaps using MatchedFeatures from above would make the most sense?

Alternative option is to provide 2 possible decoders, one for feature matching and one for feature tracking.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, meaning that I add TrackedFeature in the list of reference_points and target_points inside MatchedPoints or full support with TrackedFeatures msg consisting of TrackedFeature? If the latter, how can I "connect" (match) two features? By setting the same id and increasing age?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, you would need to set the same ID to the new point and increment the age. I would do a full support of TrackedFeatures so that DAI can use this directly or see how it can be used downstream.

Let's clarify with DAI team internally if we have some further questions on this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in e50e75a.

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import numpy as np
from ...messages import MatchedPoints

def create_matched_points_message(reference_points: np.ndarray, target_points: np.ndarray) -> MatchedPoints:
"""
Create a message for the matched points. The message contains the reference and target points.

Args:
reference_points (np.ndarray): Reference points of shape (N,2) meaning [...,[x, y],...].
target_points (np.ndarray): Target points of shape (N,2) meaning [...,[x, y],...].

Returns:
MatchedPoints: Message containing the reference and target points.
"""


if not isinstance(reference_points, np.ndarray):
raise ValueError(f"reference_points should be numpy array, got {type(reference_points)}.")
if len(reference_points.shape) != 2:
raise ValueError(f"reference_points should be of shape (N,2) meaning [...,[x, y],...], got {reference_points.shape}.")
if reference_points.shape[1] != 2:
raise ValueError(f"reference_points 2nd dimension should be of size 2 e.g. [x, y], got {reference_points.shape[1]}.")
if not isinstance(target_points, np.ndarray):
raise ValueError(f"target_points should be numpy array, got {type(target_points)}.")
if len(target_points.shape) != 2:
raise ValueError(f"target_points should be of shape (N,2) meaning [...,[x, y],...], got {target_points.shape}.")
if target_points.shape[1] != 2:
raise ValueError(f"target_points 2nd dimension should be of size 2 e.g. [x, y], got {target_points.shape[1]}.")

matched_points_msg = MatchedPoints()
matched_points_msg.reference_points = reference_points.tolist()
matched_points_msg.target_points = target_points.tolist()

return matched_points_msg
58 changes: 58 additions & 0 deletions ml/messages/lines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import depthai as dai
from typing import List

class Line(dai.Buffer):
def __init__(self):
super().__init__()
self._start_point: dai.Point2f = None
self._end_point: dai.Point2f = None
self._confidence: float = None

@property
def start_point(self) -> dai.Point2f:
return self._start_point

@start_point.setter
def start_point(self, value: dai.Point2f):
if not isinstance(value, dai.Point2f):
raise TypeError(f"start_point must be of type Point2f, instead got {type(value)}.")
self._start_point = value

@property
def end_point(self) -> dai.Point2f:
return self._end_point

@end_point.setter
def end_point(self, value: dai.Point2f):
if not isinstance(value, dai.Point2f):
raise TypeError(f"end_point must be of type Point2f, instead got {type(value)}.")
self._end_point = value

@property
def confidence(self) -> float:
return self._confidence

@confidence.setter
def confidence(self, value: float):
if not isinstance(value, float):
raise TypeError(f"confidence must be of type float, instead got {type(value)}.")
self._confidence = value


class Lines(dai.Buffer):
def __init__(self):
super().__init__()
self._lines: List[Line] = []

@property
def lines(self) -> List[Line]:
return self._lines

@lines.setter
def lines(self, value: List[Line]):
if not isinstance(value, List):
raise TypeError(f"lines must be of type List[Line], instead got {type(value)}.")
for line in value:
if not isinstance(line, Line):
raise TypeError(f"lines must be of type List[Line], instead got {type(value)}.")
self._lines = value
46 changes: 46 additions & 0 deletions ml/messages/matched_points.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import depthai as dai
from typing import List

class MatchedPoints(dai.Buffer):
def __init__(self):
super().__init__()
self._reference_points: List[List[float]] = []
self._target_points: List[List[float]] = []

@property
def reference_points(self) -> List[List[float]]:
return self._reference_points

@reference_points.setter
def reference_points(self, value: List[List[float]]):
if not isinstance(value, list):
raise TypeError("reference_points must be a list.")
for item in value:
if not isinstance(item, list):
raise TypeError("reference points should be List[List[float]].")
for item in value:
if len(item) != 2:
raise ValueError("Each item in reference_points must be of length 2.")
for item in value:
if not all(isinstance(i, float) for i in item):
raise TypeError("All items in reference_points must be of type float.")
self._reference_points = value

@property
def target_points(self) -> List[List[float]]:
return self._target_points

@target_points.setter
def target_points(self, value: List[List[float]]):
if not isinstance(value, list):
raise TypeError("target_points must be a list.")
for item in value:
if not isinstance(item, list):
raise TypeError("target points should be List[List[float]].")
for item in value:
if len(item) != 2:
raise ValueError("Each item in target_points must be of length 2.")
for item in value:
if not all(isinstance(i, float) for i in item):
raise TypeError("All items in target_points must be of type float.")
self._target_points = value
8 changes: 7 additions & 1 deletion ml/postprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from .mediapipe_hand_landmarker import MPHandLandmarkParser
from .scrfd import SCRFDParser
from .segmentation import SegmentationParser
from .superanimal_landmarker import SuperAnimalParser
from .mediapipe_face_landmarker import MPFaceLandmarkerParser
from .mlsd import MLSDParser

__all__ = [
'ImageOutputParser',
Expand All @@ -14,4 +17,7 @@
'MPHandLandmarkParser',
'SCRFDParser',
'SegmentationParser',
]
'SuperAnimalParser',
'MPFaceLandmarkerParser',
'MLSDParser',
]
43 changes: 43 additions & 0 deletions ml/postprocessing/mediapipe_face_landmarker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import depthai as dai
import numpy as np

from ..messages.creators import create_keypoints_message

class MPFaceLandmarkerParser(dai.node.ThreadedHostNode):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be more generic? Something like a KeypointParser or something along those lines indicating that it's a Parser for keypoints output on a single image?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it can be generic enough so we can for example join it with SuperAnimal Landmarker?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

General keypoint parser added in 2eefa6e.

Note: face_landmark blob returns [1,1,1,1404] so simple calcuation is performed to get the num. of coords. (2 or 3).

I think SuperAnimal cannot be merged because it is pruned network and requires some additional postprocessing.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: face_landmark blob returns [1,1,1,1404] so simple calcuation is performed to get the num. of coords. (2 or 3).

I would assume that at times we could have outputs of [B, N_keypoints, D_keypoints] where N is the number and D dimension. Potentially, we have support for this and option for reshape like in case of face_landmark output?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We only assume that the batch size is 1.

def __init__(
self,
scale_factor=192,
):
dai.node.ThreadedHostNode.__init__(self)
self.input = dai.Node.Input(self)
self.out = dai.Node.Output(self)

self.scale_factor = scale_factor

def setScaleFactor(self, scale_factor):
self.scale_factor = scale_factor

def run(self):
"""
Postprocessing logic for Mediapipe face mesh model.

Returns:
dai.Keypoints: 468 3D keypoints detected on the face.
"""

while self.isRunning():

try:
output: dai.NNData = self.input.get()
except dai.MessageQueue.QueueException as e:
break # Pipeline was stopped

tensorInfo = output.getTensorInfo("conv2d_21_1")
landmarks = output.getTensor("conv2d_21_1").reshape(468, 3).astype(np.float32)
landmarks = (landmarks - tensorInfo.qpZp) * tensorInfo.qpScale

landmarks /= self.scale_factor

msg = create_keypoints_message(landmarks)

self.out.send(msg)
Loading