|
4 | 4 |
|
5 | 5 | Chroma and LlamaIndex both offer embedding functions which are wrappers on top of popular embedding models.
|
6 | 6 |
|
7 |
| -Unfortunately Chroma and LI's embedding functions are not compatible with each other. Below we offer an adapters to convert LI embedding function to Chroma one. |
| 7 | +Unfortunately Chroma and LI's embedding functions are not compatible with each other. Below we offer an adapters to |
| 8 | +convert LI embedding function to Chroma one. |
8 | 9 |
|
9 | 10 | ```python
|
10 |
| -from llama_index.embeddings.base import BaseEmbedding |
11 |
| -from chromadb.api.types import EmbeddingFunction |
| 11 | +from llama_index.core.schema import TextNode |
| 12 | +from llama_index.core.base.embeddings.base import BaseEmbedding |
| 13 | +from chromadb import EmbeddingFunction, Documents, Embeddings |
| 14 | + |
12 | 15 |
|
13 | 16 | class LlamaIndexEmbeddingAdapter(EmbeddingFunction):
|
14 |
| - def __init__(self,ef:BaseEmbedding): |
15 |
| - self.ef = ef |
| 17 | + def __init__(self, ef: BaseEmbedding): |
| 18 | + self.ef = ef |
16 | 19 |
|
17 |
| - def __call__(self, input: Documents) -> Embeddings: |
18 |
| - return [node.embedding for node in self.ef(input)] |
| 20 | + def __call__(self, input: Documents) -> Embeddings: |
| 21 | + return [node.embedding for node in self.ef([TextNode(text=doc) for doc in input])] |
19 | 22 |
|
20 | 23 | ```
|
21 | 24 |
|
| 25 | +!!! warn "Text modality" |
| 26 | + |
| 27 | + The above adapter assumes that the input documents are text. If you are using a different modality, |
| 28 | + you will need to modify the adapter accordingly. |
| 29 | + |
22 | 30 | An example of how to use the above with LlamaIndex:
|
23 | 31 |
|
24 |
| -> Note: Make sure you have `OPENAI_API_KEY` as env var. |
| 32 | +!!! note "Prerequisites for example" |
| 33 | + |
| 34 | + Run `pip install llama-index chromadb llama-index-embeddings-fastembed fastembed` |
25 | 35 |
|
26 | 36 | ```python
|
27 |
| -from llama_index.embeddings import OpenAIEmbedding |
28 |
| -from llama_index import ServiceContext, set_global_service_context |
29 | 37 | import chromadb
|
| 38 | +from llama_index.embeddings.fastembed import FastEmbedEmbedding |
30 | 39 |
|
31 |
| -embed_model = OpenAIEmbedding(embed_batch_size=10) |
| 40 | +# make sure to include the above adapter and imports |
| 41 | +embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") |
32 | 42 |
|
33 | 43 | client = chromadb.Client()
|
34 | 44 |
|
35 |
| -col = client.get_or_create_collection("test_collection",embedding_function=LlamaIndexEmbeddingAdapter(embed_model)) |
| 45 | +col = client.get_or_create_collection("test_collection", embedding_function=LlamaIndexEmbeddingAdapter(embed_model)) |
36 | 46 |
|
37 |
| -col.add(ids=["1"],documents=["this is a test document"]) |
38 |
| -# your embeddings should be of 1536 dimensions (OpenAI's ADA model) |
39 |
| -``` |
| 47 | +col.add(ids=["1"], documents=["this is a test document"]) |
| 48 | +``` |
0 commit comments