-
Notifications
You must be signed in to change notification settings - Fork 125
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Nomic-embeddings-support * Jina models moved to pooled-normalized embeddings * Canonical vector for nomic-ai/nomic-embed-text-v1.5-Q * Moved all nomics to pooled_embeddings --------- Co-authored-by: d.rudenko <dimitriyrudenk@gmail.com>
- Loading branch information
Showing
6 changed files
with
119 additions
and
114 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
from typing import Any, Dict, Iterable, List, Type | ||
|
||
import numpy as np | ||
|
||
from fastembed.common.onnx_model import OnnxOutputContext | ||
from fastembed.common.utils import normalize | ||
from fastembed.text.onnx_embedding import OnnxTextEmbedding, OnnxTextEmbeddingWorker | ||
from fastembed.text.onnx_text_model import TextEmbeddingWorker | ||
|
||
supported_pooled_models = [ | ||
{ | ||
"model": "nomic-ai/nomic-embed-text-v1.5", | ||
"dim": 768, | ||
"description": "8192 context length english model", | ||
"size_in_GB": 0.52, | ||
"sources": { | ||
"hf": "nomic-ai/nomic-embed-text-v1.5", | ||
}, | ||
"model_file": "onnx/model.onnx", | ||
}, | ||
{ | ||
"model": "nomic-ai/nomic-embed-text-v1.5-Q", | ||
"dim": 768, | ||
"description": "Quantized 8192 context length english model", | ||
"size_in_GB": 0.13, | ||
"sources": { | ||
"hf": "nomic-ai/nomic-embed-text-v1.5", | ||
}, | ||
"model_file": "onnx/model_quantized.onnx", | ||
}, | ||
{ | ||
"model": "nomic-ai/nomic-embed-text-v1", | ||
"dim": 768, | ||
"description": "8192 context length english model", | ||
"size_in_GB": 0.52, | ||
"sources": { | ||
"hf": "nomic-ai/nomic-embed-text-v1", | ||
}, | ||
"model_file": "onnx/model.onnx", | ||
}, | ||
] | ||
|
||
|
||
class PooledEmbedding(OnnxTextEmbedding): | ||
@classmethod | ||
def _get_worker_class(cls) -> Type[TextEmbeddingWorker]: | ||
return PooledEmbeddingWorker | ||
|
||
@classmethod | ||
def mean_pooling( | ||
cls, model_output: np.ndarray, attention_mask: np.ndarray | ||
) -> np.ndarray: | ||
token_embeddings = model_output | ||
input_mask_expanded = np.expand_dims(attention_mask, axis=-1) | ||
input_mask_expanded = np.tile( | ||
input_mask_expanded, (1, 1, token_embeddings.shape[-1]) | ||
) | ||
input_mask_expanded = input_mask_expanded.astype(float) | ||
sum_embeddings = np.sum(token_embeddings * input_mask_expanded, axis=1) | ||
sum_mask = np.sum(input_mask_expanded, axis=1) | ||
pooled_embeddings = sum_embeddings / np.maximum(sum_mask, 1e-9) | ||
return pooled_embeddings | ||
|
||
@classmethod | ||
def list_supported_models(cls) -> List[Dict[str, Any]]: | ||
"""Lists the supported models. | ||
Returns: | ||
List[Dict[str, Any]]: A list of dictionaries containing the model information. | ||
""" | ||
return supported_pooled_models | ||
|
||
def _post_process_onnx_output( | ||
self, output: OnnxOutputContext | ||
) -> Iterable[np.ndarray]: | ||
embeddings = output.model_output | ||
attn_mask = output.attention_mask | ||
return self.mean_pooling(embeddings, attn_mask).astype(np.float32) | ||
|
||
|
||
class PooledEmbeddingWorker(OnnxTextEmbeddingWorker): | ||
def init_embedding( | ||
self, model_name: str, cache_dir: str, **kwargs | ||
) -> OnnxTextEmbedding: | ||
return PooledEmbedding( | ||
model_name=model_name, cache_dir=cache_dir, threads=1, **kwargs | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters