qdrant · joein · May 31, 2024 · May 21, 2024 · May 23, 2024 · May 23, 2024
diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py
@@ -17,6 +17,16 @@
             "hf": "Qdrant/clip-ViT-B-32-vision",
         },
         "model_file": "model.onnx",
+    },
+    {
+        "model": "AndrewOgn/resnet_onnx",
+        "dim": 2048,
+        "description": "ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/abs/1512.03385>`__.",
+        "size_in_GB": 0.1,
+        "sources": {
+            "hf": "AndrewOgn/resnet_onnx",
+        },
+        "model_file": "model.onnx",
     }
 ]
 

diff --git a/fastembed/image/onnx_image_model.py b/fastembed/image/onnx_image_model.py
@@ -47,11 +47,14 @@ def load_onnx_model(
         )
         self.processor = load_preprocessor(model_dir=model_dir)
 
+    def _build_onnx_input(self, encoded: np.ndarray) -> Dict[str, np.ndarray]:
+        return {node.name: encoded for node in self.model.get_inputs()}
+
     def onnx_embed(self, images: List[PathInput]) -> np.ndarray:
         with contextlib.ExitStack():
             image_files = [Image.open(image) for image in images]
             encoded = self.processor(image_files)
-        onnx_input = {"pixel_values": encoded}
+        onnx_input = self._build_onnx_input(encoded)
         onnx_input = self._preprocess_onnx_input(onnx_input)
 
         model_output = self.model.run(None, onnx_input)

diff --git a/fastembed/image/transform/operators.py b/fastembed/image/transform/operators.py
@@ -59,6 +59,9 @@ def __init__(self, scale: float = 1 / 255):
     def __call__(self, images: List[np.ndarray]) -> List[np.ndarray]:
         return [rescale(image, scale=self.scale) for image in images]
 
+class PILtoNDarray(Transform):
+    def __call__(self, images: List[np.ndarray]) -> List[np.ndarray]:
+        return [np.asarray(image).swapaxes(2, 1) for image in images]
 
 class Compose:
     def __init__(self, transforms: List[Transform]):
@@ -96,6 +99,8 @@ def from_config(cls, config: Dict[str, Any]) -> "Compose":
             else:
                 raise ValueError(f"Invalid crop size: {crop_size}")
             transforms.append(CenterCrop(size=crop_size))
+        if config.get("PIL_to_ndarray", False):
+            transforms.append(PILtoNDarray())
         if config.get("do_rescale", True):
             rescale_factor = config.get("rescale_factor", 1 / 255)
             transforms.append(Rescale(scale=rescale_factor))