qdrant · joein · May 31, 2024 · May 21, 2024 · May 23, 2024 · May 23, 2024
diff --git a/experiments/Example. Convert Resnet50 to ONNX.ipynb b/experiments/Example. Convert Resnet50 to ONNX.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "4bdb2a91-fa2a-4cee-ad5a-176cc957394d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Feature vector shape: (2, 2048, 1, 1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import torch.onnx\n",
+    "import torchvision.models as models\n",
+    "import torchvision.transforms as transforms\n",
+    "from PIL import Image\n",
+    "import numpy as np\n",
+    "from tests.config import TEST_MISC_DIR\n",
+    "\n",
+    "# Load pre-trained ResNet-50 model\n",
+    "resnet = models.resnet50(pretrained=True)\n",
+    "resnet = torch.nn.Sequential(*(list(resnet.children())[:-1]))  # Remove the last fully connected layer\n",
+    "resnet.eval()\n",
+    "\n",
+    "# Define preprocessing transform\n",
+    "preprocess = transforms.Compose([\n",
+    "    transforms.Resize(256),\n",
+    "    transforms.CenterCrop(224),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),\n",
+    "])\n",
+    "\n",
+    "# Load and preprocess the image\n",
+    "def preprocess_image(image_path):\n",
+    "    input_image = Image.open(image_path)\n",
+    "    input_tensor = preprocess(input_image)\n",
+    "    input_batch = input_tensor.unsqueeze(0)  # Add batch dimension\n",
+    "    return input_batch\n",
+    "\n",
+    "# Example input for exporting\n",
+    "input_image = preprocess_image('example.jpg')\n",
+    "\n",
+    "# Export the model to ONNX with dynamic axes\n",
+    "torch.onnx.export(\n",
+    "    resnet, \n",
+    "    input_image, \n",
+    "    \"model.onnx\", \n",
+    "    export_params=True, \n",
+    "    opset_version=9, \n",
+    "    input_names=['input'], \n",
+    "    output_names=['output'],\n",
+    "    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}\n",
+    ")\n",
+    "\n",
+    "# Load ONNX model\n",
+    "import onnx\n",
+    "import onnxruntime as ort\n",
+    "\n",
+    "onnx_model = onnx.load(\"model.onnx\")\n",
+    "ort_session = ort.InferenceSession(\"model.onnx\")\n",
+    "\n",
+    "# Run inference and extract feature vectors\n",
+    "def extract_feature_vectors(image_paths):\n",
+    "    input_images = [preprocess_image(image_path) for image_path in image_paths]\n",
+    "    input_batch = torch.cat(input_images, dim=0)  # Combine images into a single batch\n",
+    "    ort_inputs = {ort_session.get_inputs()[0].name: input_batch.numpy()}\n",
+    "    ort_outs = ort_session.run(None, ort_inputs)\n",
+    "    return ort_outs[0]\n",
+    "\n",
+    "# Example usage\n",
+    "images = [TEST_MISC_DIR / \"image.jpeg\", str(TEST_MISC_DIR / \"small_image.jpeg\")]  # Replace with your image paths\n",
+    "feature_vectors = extract_feature_vectors(image_paths)\n",
+    "print(\"Feature vector shape:\", feature_vectors.shape)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/fastembed/image/onnx_embedding.py b/fastembed/image/onnx_embedding.py
@@ -17,6 +17,16 @@
             "hf": "Qdrant/clip-ViT-B-32-vision",
         },
         "model_file": "model.onnx",
+    },
+    {
+        "model": "AndrewOgn/resnet_onnx",
+        "dim": 2048,
+        "description": "ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/abs/1512.03385>`__.",
+        "size_in_GB": 0.1,
+        "sources": {
+            "hf": "AndrewOgn/resnet_onnx",
+        },
+        "model_file": "model.onnx",
     }
 ]
 

diff --git a/fastembed/image/onnx_image_model.py b/fastembed/image/onnx_image_model.py
@@ -47,15 +47,17 @@ def load_onnx_model(
         )
         self.processor = load_preprocessor(model_dir=model_dir)
 
+    def _build_onnx_input(self, encoded: np.ndarray) -> Dict[str, np.ndarray]:
+        return {node.name: encoded for node in self.model.get_inputs()}
+
     def onnx_embed(self, images: List[PathInput]) -> np.ndarray:
         with contextlib.ExitStack():
             image_files = [Image.open(image) for image in images]
             encoded = self.processor(image_files)
-        onnx_input = {"pixel_values": encoded}
+        onnx_input = self._build_onnx_input(encoded)
         onnx_input = self._preprocess_onnx_input(onnx_input)
-
         model_output = self.model.run(None, onnx_input)
-        embeddings = model_output[0]
+        embeddings = model_output[0].reshape(len(images), -1)
         return embeddings
 
     def _embed_images(

diff --git a/fastembed/image/transform/operators.py b/fastembed/image/transform/operators.py
@@ -59,6 +59,9 @@ def __init__(self, scale: float = 1 / 255):
     def __call__(self, images: List[np.ndarray]) -> List[np.ndarray]:
         return [rescale(image, scale=self.scale) for image in images]
 
+class PILtoNDarray:
+    def __call__(self, images: List[Union[Image.Image, np.ndarray]]) -> List[np.ndarray]:
+        return [np.asarray(image).swapaxes(2, 0) if isinstance(image, Image.Image) else image for image in images]
 
 class Compose:
     def __init__(self, transforms: List[Transform]):
@@ -96,6 +99,9 @@ def from_config(cls, config: Dict[str, Any]) -> "Compose":
             else:
                 raise ValueError(f"Invalid crop size: {crop_size}")
             transforms.append(CenterCrop(size=crop_size))
+
+        transforms.append(PILtoNDarray())
+
         if config.get("do_rescale", True):
             rescale_factor = config.get("rescale_factor", 1 / 255)
             transforms.append(Rescale(scale=rescale_factor))

diff --git a/tests/test_image_onnx_embeddings.py b/tests/test_image_onnx_embeddings.py
@@ -8,13 +8,15 @@
 
 CANONICAL_VECTOR_VALUES = {
     "Qdrant/clip-ViT-B-32-vision": np.array([-0.0098, 0.0128, -0.0274, 0.002, -0.0059]),
+    "AndrewOgn/resnet_onnx": np.array([0.0322, 0.0027, 0.0144 , 0.0243, 0.0119])
 }
 
 
 def test_embedding():
     is_ci = os.getenv("CI")
 
     for model_desc in ImageEmbedding.list_supported_models():
+        print(model_desc)
         if not is_ci and model_desc["size_in_GB"] > 1:
             continue
 
@@ -28,6 +30,7 @@ def test_embedding():
         assert embeddings.shape == (2, dim)
 
         canonical_vector = CANONICAL_VECTOR_VALUES[model_desc["model"]]
+        print(embeddings[0, : canonical_vector.shape[0]])
         assert np.allclose(
             embeddings[0, : canonical_vector.shape[0]], canonical_vector, atol=1e-3
         ), model_desc["model"]