diff --git a/depthai_nodes/parsing_neural_network.py b/depthai_nodes/parsing_neural_network.py index 1b414d38..db757b74 100644 --- a/depthai_nodes/parsing_neural_network.py +++ b/depthai_nodes/parsing_neural_network.py @@ -51,10 +51,16 @@ def build( ) -> "ParsingNeuralNetwork": ... + @overload + def build( + self, input: dai.Node.Output, nn_source: str, fps: int + ) -> "ParsingNeuralNetwork": + ... + def build( self, input: dai.Node.Output, - nn_source: Union[dai.NNModelDescription, dai.NNArchive], + nn_source: Union[dai.NNModelDescription, dai.NNArchive, str], fps: int = None, ) -> "ParsingNeuralNetwork": """Builds the underlying NeuralNetwork node and creates parser nodes for each @@ -63,9 +69,9 @@ def build( @param input: Node's input. It is a linking point to which the NeuralNetwork is linked. It accepts the output of a Camera node. @type input: Node.Input - @param nn_source: NNModelDescription object containing the HubAI model - descriptors, or NNArchive object of the model. - @type nn_source: Union[dai.NNModelDescription, dai.NNArchive] + + @param nn_source: NNModelDescription object containing the HubAI model descriptors, NNArchive object of the model, or HubAI model slug in form of : or ::. + @type nn_source: Union[dai.NNModelDescription, dai.NNArchive, str] @param fps: FPS limit for the model runtime. @type fps: int @return: Returns the ParsingNeuralNetwork object. @@ -74,21 +80,23 @@ def build( object. """ - if isinstance(nn_source, dai.NNModelDescription): + platform = self.getParentPipeline().getDefaultDevice().getPlatformAsString() + + if isinstance(nn_source, str): + nn_source = dai.NNModelDescription(nn_source) + if isinstance(nn_source, (dai.NNModelDescription, str)): if not nn_source.platform: - nn_source.platform = ( - self.getParentPipeline().getDefaultDevice().getPlatformAsString() - ) + nn_source.platform = platform self._nn_archive = dai.NNArchive(dai.getModelFromZoo(nn_source)) elif isinstance(nn_source, dai.NNArchive): self._nn_archive = nn_source else: raise ValueError( - "nn_source must be either a NNModelDescription or NNArchive" + "nn_source must be either a NNModelDescription, NNArchive, or a string representing HubAI model slug." ) kwargs = {"fps": fps} if fps else {} - self._nn.build(input, nn_source, **kwargs) + self._nn.build(input, self._nn_archive, **kwargs) self._updateParsers(self._nn_archive) return self diff --git a/examples/README.md b/examples/README.md index 506669ac..ccf9bbe9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -13,13 +13,13 @@ Prepare the model slug and run: ``` cd examples -python main.py -s +python main.py -m ``` For example: ``` -python main.py -s yolov6-nano:coco-416x416 +python main.py -m yolov6-nano:coco-416x416 ``` Note that for running the examples you need RVC2 device connected. @@ -29,7 +29,7 @@ If using OAK-D Lite, make sure to also set the FPS limit under 28.5. For example: ``` -python main.py -s yolov6-nano:coco-416x416 -fps 28 +python main.py -m yolov6-nano:coco-416x416 -fps 28 ``` Some models have small input sizes and requesting small image size from `Camera` is problematic so we request 4x bigger frame and resize it back down. During visualization image frame is resized back so some image quality is lost - only for visualization. diff --git a/examples/main.py b/examples/main.py index 76dde265..b72272de 100644 --- a/examples/main.py +++ b/examples/main.py @@ -1,23 +1,21 @@ import depthai as dai -from utils.arguments import initialize_argparser, parse_fps_limit, parse_model_slug -from utils.model import get_input_shape, get_model_from_hub, get_parser +from utils.arguments import initialize_argparser +from utils.model import get_input_shape, get_nn_archive_from_hub, get_parser from utils.xfeat import xfeat_mono, xfeat_stereo from visualization.visualize import visualize -from depthai_nodes.parser_generator import ParserGenerator +from depthai_nodes import ParsingNeuralNetwork # Initialize the argument parser arg_parser, args = initialize_argparser() -# Parse the model slug -model_slug, model_version_slug = parse_model_slug(args) -fps_limit = parse_fps_limit(args) - -# Get the model from the HubAI -nn_archive = get_model_from_hub(model_slug, model_version_slug) +# Parse the arguments +model = args.model +fps_limit = args.fps_limit # Get the parser -parser_class, parser_name = get_parser(nn_archive) +nn_archive = get_nn_archive_from_hub(model) +_, parser_name = get_parser(nn_archive) input_shape = get_input_shape(nn_archive) if parser_name == "XFeatMonoParser": @@ -31,58 +29,34 @@ with dai.Pipeline() as pipeline: cam = pipeline.create(dai.node.Camera).build() - # YOLO and MobileNet-SSD have native parsers in DAI - no need to create a separate parser - if parser_name == "YOLO" or parser_name == "SSD": - network = pipeline.create(dai.node.DetectionNetwork).build( - cam.requestOutput( - input_shape, type=dai.ImgFrame.Type.BGR888p, fps=fps_limit - ), - nn_archive, - ) - parser_queue = network.out.createOutputQueue() + if "gray" in model: + image_type = dai.ImgFrame.Type.GRAY8 else: image_type = dai.ImgFrame.Type.BGR888p - if "gray" in model_version_slug: - image_type = dai.ImgFrame.Type.GRAY8 - - if input_shape[0] < 128 or input_shape[1] < 128: - print( - "Input shape is too small so we are requesting a larger image and resizing it." - ) - print( - "During visualization we resize small image back to large, so image quality is lower." - ) - manip = pipeline.create(dai.node.ImageManip) - manip.initialConfig.setResize(input_shape) - large_input_shape = (input_shape[0] * 4, input_shape[1] * 4) - cam.requestOutput(large_input_shape, type=image_type, fps=fps_limit).link( - manip.inputImage - ) - network = pipeline.create(dai.node.NeuralNetwork).build( - manip.out, nn_archive - ) - else: - network = pipeline.create(dai.node.NeuralNetwork).build( - cam.requestOutput(input_shape, type=image_type, fps=fps_limit), - nn_archive, - ) - - parsers = pipeline.create(ParserGenerator).build(nn_archive) - - if len(parsers) == 0: - raise ValueError("No parsers were generated.") - - if len(parsers) > 1: - raise ValueError("Only models with one parser are supported.") - parser = parsers[0] - - # Linking - network.out.link(parser.input) + if input_shape[0] < 128 or input_shape[1] < 128: + print( + "Input shape is too small so we are requesting a larger image and resizing it." + ) + print( + "During visualization we resize small image back to large, so image quality is lower." + ) + manip = pipeline.create(dai.node.ImageManip) + manip.initialConfig.setResize(input_shape) + large_input_shape = (input_shape[0] * 4, input_shape[1] * 4) + cam.requestOutput(large_input_shape, type=image_type, fps=fps_limit).link( + manip.inputImage + ) + nn = pipeline.create(ParsingNeuralNetwork).build(manip.out, model) + else: + nn = pipeline.create(ParsingNeuralNetwork).build( + cam.requestOutput(input_shape, type=image_type, fps=fps_limit), + model, + ) - parser_queue = parser.out.createOutputQueue() + parser_queue = nn.out.createOutputQueue() - camera_queue = network.passthrough.createOutputQueue() + camera_queue = nn.passthrough.createOutputQueue() pipeline.start() diff --git a/examples/utils/arguments.py b/examples/utils/arguments.py index 444300ec..5e16351b 100644 --- a/examples/utils/arguments.py +++ b/examples/utils/arguments.py @@ -1,5 +1,4 @@ import argparse -from typing import Tuple def initialize_argparser(): @@ -11,9 +10,9 @@ def initialize_argparser(): Currently, only RVC2 devices are supported. If using OAK-D Lite, please set the FPS limit to 28." parser.add_argument( - "-s", - "--model_slug", - help="slug of the model in HubAI.", + "-m", + "--model", + help="HubAI model slug.", required=True, type=str, ) @@ -30,33 +29,3 @@ def initialize_argparser(): args = parser.parse_args() return parser, args - - -def parse_model_slug(args: argparse.Namespace) -> Tuple[str, str]: - """Parse the model slug from the arguments. - - Returns the model slug and model version slug. - """ - model_slug = args.model_slug - - # parse the model slug - if ":" not in model_slug: - raise NameError( - "Please provide the model slug in the format of 'model_slug:model_version_slug'" - ) - - model_slug_parts = model_slug.split(":") - model_slug = model_slug_parts[0] - model_version_slug = model_slug_parts[1] - - return model_slug, model_version_slug - - -def parse_fps_limit(args: argparse.Namespace) -> int: - """Parse the FPS limit from the arguments. - - Returns the FPS limit. - """ - fps_limit = args.fps_limit - - return fps_limit diff --git a/examples/utils/model.py b/examples/utils/model.py index a3c05612..1cb5a5bf 100644 --- a/examples/utils/model.py +++ b/examples/utils/model.py @@ -5,18 +5,13 @@ from depthai_nodes.ml.parsers import * -def get_model_from_hub(model_slug: str, model_version_slug: str) -> dai.NNArchive: - """Get the model from the HubAI and return the NN archive.""" - print( - f"Downloading model {model_slug} with version {model_version_slug} from HubAI..." - ) - modelDescription = dai.NNModelDescription( - modelSlug=model_slug, modelVersionSlug=model_version_slug, platform="RVC2" - ) +def get_nn_archive_from_hub(model: str) -> dai.NNArchive: + """Get NN archive from the HubAI based on model slug.""" + print(f"Downloading model {model} from HubAI...") + modelDescription = dai.NNModelDescription(model=model, platform="RVC2") archivePath = dai.getModelFromZoo(modelDescription) print("Download successful!") nn_archive = dai.NNArchive(archivePath) - return nn_archive