From 1b8af27b0ab1b6dddeb78a60aa7b860855ab993a Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Mon, 2 Oct 2023 01:06:38 +0200 Subject: [PATCH 1/5] johnsnowlabs embeddings support --- .../integrations/providers/johnsnowlabs.mdx | 117 ++++++++++++ .../johnsnowlabs_embedding.ipynb | 178 ++++++++++++++++++ .../langchain/embeddings/__init__.py | 3 + .../langchain/embeddings/johnsnowlabs.py | 90 +++++++++ 4 files changed, 388 insertions(+) create mode 100644 docs/extras/integrations/providers/johnsnowlabs.mdx create mode 100644 docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb create mode 100644 libs/langchain/langchain/embeddings/johnsnowlabs.py diff --git a/docs/extras/integrations/providers/johnsnowlabs.mdx b/docs/extras/integrations/providers/johnsnowlabs.mdx new file mode 100644 index 0000000000000..39f3ea494cbf5 --- /dev/null +++ b/docs/extras/integrations/providers/johnsnowlabs.mdx @@ -0,0 +1,117 @@ +# Johnsnowlabs + +Gain access to the [johnsnowlabs](https://www.johnsnowlabs.com/) ecosystem of enterprise NLP libraries +with over 21.000 enterprise NLP models in over 200 languages with the open source `johnsnowlabs` library. +For all 24.000+ models, see the [John Snow Labs Model Models Hub](https://nlp.johnsnowlabs.com/models) + +## Installation and Setup + + +```bash +pip install johnsnowlabs +``` + +To [install enterprise features](https://nlp.johnsnowlabs.com/docs/en/jsl/install_licensed_quick, run: +```python +# for more details see https://nlp.johnsnowlabs.com/docs/en/jsl/install_licensed_quick +nlp.install() +``` + + +You can embed your queries and documents with either `gpu`,`cpu`,`apple_silicon`,`aarch` based optimized binaries. +By default cpu binaries are used. +Once a session is started, you must restart your notebook to switch between GPU or CPU, or changes will not take effect. + +## Embed Query with CPU: +```python +document = "foo bar" +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert') +output = embedding.embed_query(document) +``` + + +## Embed Query with GPU: + + +```python +document = "foo bar" +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','gpu') +output = embedding.embed_query(document) +``` + + + + +## Embed Query with Apple Silicon (M1,M2,etc..): + +```python +documents = ["foo bar", 'bar foo'] +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','apple_silicon') +output = embedding.embed_query(document) +``` + + + +## Embed Query with AARCH: + +```python +documents = ["foo bar", 'bar foo'] +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','aarch') +output = embedding.embed_query(document) +``` + + + + + + +## Embed Document with CPU: +```python +documents = ["foo bar", 'bar foo'] +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','gpu') +output = embedding.embed_documents(documents) +``` + + + +## Embed Document with GPU: + +```python +documents = ["foo bar", 'bar foo'] +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','gpu') +output = embedding.embed_documents(documents) +``` + + + + + +## Embed Document with Apple Silicon (M1,M2,etc..): + +```python + +```python +documents = ["foo bar", 'bar foo'] +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','apple_silicon') +output = embedding.embed_documents(documents) +``` + + + +## Embed Document with AARCH: + +```python + +```python +documents = ["foo bar", 'bar foo'] +embedding = JohnSnowLabsEmbeddings('embed_sentence.bert','aarch') +output = embedding.embed_documents(documents) +``` + + + + +Models are loaded with [nlp.load](https://nlp.johnsnowlabs.com/docs/en/jsl/load_api) and spark session is started with [nlp.start()](https://nlp.johnsnowlabs.com/docs/en/jsl/start-a-sparksession) under the hood. + + + diff --git a/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb b/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb new file mode 100644 index 0000000000000..c76464305bb7e --- /dev/null +++ b/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Johnsnowlabs Embedding\n", + "\n", + "### Loading the Johnsnowlabs embedding class to generate and query embeddings\n", + "\n", + "Models are loaded with [nlp.load](https://nlp.johnsnowlabs.com/docs/en/jsl/load_api) and spark session is started with [nlp.start()](https://nlp.johnsnowlabs.com/docs/en/jsl/start-a-sparksession) under the hood.\n", + "For all 24.000+ models, see the [John Snow Labs Model Models Hub](https://nlp.johnsnowlabs.com/models)\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "! pip install johnsnowlabs\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "# If you have a enterprise license, you can run this to install enterprise features\n", + "# from johnsnowlabs import nlp\n", + "# nlp.install()" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "source": [ + "#### Import the necessary classes" + ], + "metadata": { + "collapsed": false + }, + "execution_count": 1, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found existing installation: langchain 0.0.189\n", + "Uninstalling langchain-0.0.189:\n", + " Successfully uninstalled langchain-0.0.189\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Initialize Johnsnowlabs Embeddings and Spark Session" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n", + "embedder = JohnSnowLabsEmbeddings('en.embed_sentence.biobert.clinical_base_cased')\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Define some example texts . These could be any documents that you want to analyze - for example, news articles, social media posts, or product reviews." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "texts = [\"Cancer is caused by smoking\", \"Antibiotics aren't painkiller\"]" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Generate and print embeddings for the texts . The HuggingfaceEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "embeddings = embedder.embed_documents(texts)\n", + "for i, embedding in enumerate(embeddings):\n", + " print(f\"Embedding for document {i+1}: {embedding}\")" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "#### Generate and print an embedding for a single piece of text. You can also generate an embedding for a single piece of text, such as a search query. This can be useful for tasks like information retrieval, where you want to find documents that are similar to a given query." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "query = \"Cancer is caused by smoking\"\n", + "query_embedding = embedder.embed_query(query)\n", + "print(f\"Embedding for query: {query_embedding}\")" + ], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index 87cb5e90d5a0b..cbf8789640164 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -37,6 +37,8 @@ HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings, ) +from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings + from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings from langchain.embeddings.jina import JinaEmbeddings from langchain.embeddings.llamacpp import LlamaCppEmbeddings @@ -103,6 +105,7 @@ "AwaEmbeddings", "HuggingFaceBgeEmbeddings", "ErnieEmbeddings", + "JohnSnowLabsEmbeddings", ] diff --git a/libs/langchain/langchain/embeddings/johnsnowlabs.py b/libs/langchain/langchain/embeddings/johnsnowlabs.py new file mode 100644 index 0000000000000..df9c5b62eb111 --- /dev/null +++ b/libs/langchain/langchain/embeddings/johnsnowlabs.py @@ -0,0 +1,90 @@ +import os +import sys +from typing import Any, List + +from langchain.embeddings.base import Embeddings +from langchain.pydantic_v1 import BaseModel, Extra + + +class JohnSnowLabsEmbeddings(BaseModel, Embeddings): + """JohnSnowLabs embedding models + + To use, you should have the ``johnsnowlabs`` python package installed. + Example: .. code-block:: python + from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings + document = "foo bar" + embedding = JohnSnowLabsEmbeddings('embed_sentence.bert') + output = embedding.embed_query(document) + """ + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + raise NotImplementedError("JohnSnowLabsEmbeddings does not support async yet") + + async def aembed_query(self, text: str) -> List[float]: + raise NotImplementedError("JohnSnowLabsEmbeddings does not support async yet") + + model: Any + + def __init__(self, model='embed_sentence.bert', hardware_target='cpu', **kwargs: Any): + """Initialize the johnsnowlabs model.""" + super().__init__(**kwargs) + # 1) Check imports + try: + from johnsnowlabs import nlp + from nlu.pipe.pipeline import NLUPipeline + except ImportError as exc: + raise ImportError( + "Could not import johnsnowlabs python package. " + "Please install it with `pip install johnsnowlabs`.") from exc + + # 2) Start a Spark Session + try: + os.environ['PYSPARK_PYTHON'] = sys.executable + os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable + nlp.start(hardware_target=hardware_target) + except Exception as exc: + raise Exception('Failure starting Spark Session') from exc + # 3) Load the model + try: + if isinstance(model, str): + self.model = nlp.load(model) + elif isinstance(model, NLUPipeline): + self.model = model + else: + self.model = nlp.to_nlu_pipe(model) + except Exception as exc: + raise Exception('Failure loading model') from exc + + class Config: + """Configuration for this pydantic object.""" + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a JohnSnowLabs transformer model. + + Args: texts: The list of texts to embed. + Returns: List of embeddings, one for each text. """ + + return _select_embed_document(self.model.predict(texts, output_level='document')) + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a JohnSnowLabs transformer model. + Args: text: The text to embed. + Returns: Embeddings for the text. """ + return _select_embed_query(self.model.predict(text, output_level='document')) + + +def _select_embed_document(df): + emb_col = None + for c in df.columns: + if 'embedding' in c: + emb_col = c + return [vec.tolist() for vec in df[emb_col].tolist()] + + +def _select_embed_query(df): + emb_col = None + for c in df.columns: + if 'embedding' in c: + emb_col = c + return [vec.tolist() for vec in df[emb_col].tolist()][0] From 48453cc75f1784189915288f200e9b5290482d0a Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Sat, 7 Oct 2023 02:59:51 +0200 Subject: [PATCH 2/5] make format and make lint applied --- .../langchain/embeddings/__init__.py | 3 +- .../langchain/embeddings/johnsnowlabs.py | 30 +++++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/libs/langchain/langchain/embeddings/__init__.py b/libs/langchain/langchain/embeddings/__init__.py index a5451ea7a985a..7c9acdb5e488e 100644 --- a/libs/langchain/langchain/embeddings/__init__.py +++ b/libs/langchain/langchain/embeddings/__init__.py @@ -40,11 +40,10 @@ HuggingFaceInferenceAPIEmbeddings, HuggingFaceInstructEmbeddings, ) -from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings - from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings from langchain.embeddings.javelin_ai_gateway import JavelinAIGatewayEmbeddings from langchain.embeddings.jina import JinaEmbeddings +from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings from langchain.embeddings.llamacpp import LlamaCppEmbeddings from langchain.embeddings.localai import LocalAIEmbeddings from langchain.embeddings.minimax import MiniMaxEmbeddings diff --git a/libs/langchain/langchain/embeddings/johnsnowlabs.py b/libs/langchain/langchain/embeddings/johnsnowlabs.py index df9c5b62eb111..dc6bc6ba2bba9 100644 --- a/libs/langchain/langchain/embeddings/johnsnowlabs.py +++ b/libs/langchain/langchain/embeddings/johnsnowlabs.py @@ -25,7 +25,9 @@ async def aembed_query(self, text: str) -> List[float]: model: Any - def __init__(self, model='embed_sentence.bert', hardware_target='cpu', **kwargs: Any): + def __init__( + self, model="embed_sentence.bert", hardware_target="cpu", **kwargs: Any + ): """Initialize the johnsnowlabs model.""" super().__init__(**kwargs) # 1) Check imports @@ -35,15 +37,16 @@ def __init__(self, model='embed_sentence.bert', hardware_target='cpu', **kwargs: except ImportError as exc: raise ImportError( "Could not import johnsnowlabs python package. " - "Please install it with `pip install johnsnowlabs`.") from exc + "Please install it with `pip install johnsnowlabs`." + ) from exc # 2) Start a Spark Session try: - os.environ['PYSPARK_PYTHON'] = sys.executable - os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable + os.environ["PYSPARK_PYTHON"] = sys.executable + os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable nlp.start(hardware_target=hardware_target) except Exception as exc: - raise Exception('Failure starting Spark Session') from exc + raise Exception("Failure starting Spark Session") from exc # 3) Load the model try: if isinstance(model, str): @@ -53,31 +56,34 @@ def __init__(self, model='embed_sentence.bert', hardware_target='cpu', **kwargs: else: self.model = nlp.to_nlu_pipe(model) except Exception as exc: - raise Exception('Failure loading model') from exc + raise Exception("Failure loading model") from exc class Config: """Configuration for this pydantic object.""" + extra = Extra.forbid def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a JohnSnowLabs transformer model. Args: texts: The list of texts to embed. - Returns: List of embeddings, one for each text. """ + Returns: List of embeddings, one for each text.""" - return _select_embed_document(self.model.predict(texts, output_level='document')) + return _select_embed_document( + self.model.predict(texts, output_level="document") + ) def embed_query(self, text: str) -> List[float]: """Compute query embeddings using a JohnSnowLabs transformer model. Args: text: The text to embed. - Returns: Embeddings for the text. """ - return _select_embed_query(self.model.predict(text, output_level='document')) + Returns: Embeddings for the text.""" + return _select_embed_query(self.model.predict(text, output_level="document")) def _select_embed_document(df): emb_col = None for c in df.columns: - if 'embedding' in c: + if "embedding" in c: emb_col = c return [vec.tolist() for vec in df[emb_col].tolist()] @@ -85,6 +91,6 @@ def _select_embed_document(df): def _select_embed_query(df): emb_col = None for c in df.columns: - if 'embedding' in c: + if "embedding" in c: emb_col = c return [vec.tolist() for vec in df[emb_col].tolist()][0] From 796dbf051156d74605b14e59d74b7cad9598bbb9 Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Mon, 9 Oct 2023 16:08:18 +0200 Subject: [PATCH 3/5] added tests --- .../embeddings/test_johnsnowlabs.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 libs/langchain/tests/integration_tests/embeddings/test_johnsnowlabs.py diff --git a/libs/langchain/tests/integration_tests/embeddings/test_johnsnowlabs.py b/libs/langchain/tests/integration_tests/embeddings/test_johnsnowlabs.py new file mode 100644 index 0000000000000..3def60b56e770 --- /dev/null +++ b/libs/langchain/tests/integration_tests/embeddings/test_johnsnowlabs.py @@ -0,0 +1,20 @@ +"""Test johnsnowlabs embeddings.""" + +from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings + + +def test_johnsnowlabs_embed_document() -> None: + """Test johnsnowlabs embeddings.""" + documents = ["foo bar", "bar foo"] + embedding = JohnSnowLabsEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 2 + assert len(output[0]) == 128 + + +def test_johnsnowlabs_embed_query() -> None: + """Test johnsnowlabs embeddings.""" + document = "foo bar" + embedding = JohnSnowLabsEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 128 From a94dd692e27a86e7838c5adee9aaa9608207f520 Mon Sep 17 00:00:00 2001 From: Christian Kasim Loan Date: Sat, 14 Oct 2023 15:52:27 +0200 Subject: [PATCH 4/5] update docs --- .../text_embedding/johnsnowlabs_embedding.ipynb | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb b/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb index c76464305bb7e..0fdc7ac3b0a5b 100644 --- a/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb +++ b/docs/extras/integrations/text_embedding/johnsnowlabs_embedding.ipynb @@ -57,12 +57,19 @@ } ] }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ - "from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings\n" + "from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings" ], "metadata": { "collapsed": false @@ -82,8 +89,7 @@ "execution_count": null, "outputs": [], "source": [ - "\n", - "embedder = JohnSnowLabsEmbeddings('en.embed_sentence.biobert.clinical_base_cased')\n" + "embedder = JohnSnowLabsEmbeddings('en.embed_sentence.biobert.clinical_base_cased')" ], "metadata": { "collapsed": false @@ -112,7 +118,7 @@ { "cell_type": "markdown", "source": [ - "#### Generate and print embeddings for the texts . The HuggingfaceEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification." + "#### Generate and print embeddings for the texts . The JohnSnowLabsEmbeddings class generates an embedding for each document, which is a numerical representation of the document's content. These embeddings can be used for various natural language processing tasks, such as document similarity comparison or text classification." ], "metadata": { "collapsed": false From 6d67a237ea1e98cdd2066faf54af747332c20c89 Mon Sep 17 00:00:00 2001 From: Bagatur Date: Thu, 26 Oct 2023 20:12:11 -0700 Subject: [PATCH 5/5] cr --- .../langchain/embeddings/johnsnowlabs.py | 68 +++++++++---------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/libs/langchain/langchain/embeddings/johnsnowlabs.py b/libs/langchain/langchain/embeddings/johnsnowlabs.py index dc6bc6ba2bba9..ead57e52283a4 100644 --- a/libs/langchain/langchain/embeddings/johnsnowlabs.py +++ b/libs/langchain/langchain/embeddings/johnsnowlabs.py @@ -10,23 +10,22 @@ class JohnSnowLabsEmbeddings(BaseModel, Embeddings): """JohnSnowLabs embedding models To use, you should have the ``johnsnowlabs`` python package installed. - Example: .. code-block:: python - from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings - document = "foo bar" - embedding = JohnSnowLabsEmbeddings('embed_sentence.bert') - output = embedding.embed_query(document) - """ + Example: + .. code-block:: python - async def aembed_documents(self, texts: List[str]) -> List[List[float]]: - raise NotImplementedError("JohnSnowLabsEmbeddings does not support async yet") + from langchain.embeddings.johnsnowlabs import JohnSnowLabsEmbeddings - async def aembed_query(self, text: str) -> List[float]: - raise NotImplementedError("JohnSnowLabsEmbeddings does not support async yet") + embedding = JohnSnowLabsEmbeddings(model='embed_sentence.bert') + output = embedding.embed_query("foo bar") + """ - model: Any + model: Any = "embed_sentence.bert" def __init__( - self, model="embed_sentence.bert", hardware_target="cpu", **kwargs: Any + self, + model: Any = "embed_sentence.bert", + hardware_target: str = "cpu", + **kwargs: Any ): """Initialize the johnsnowlabs model.""" super().__init__(**kwargs) @@ -40,14 +39,15 @@ def __init__( "Please install it with `pip install johnsnowlabs`." ) from exc - # 2) Start a Spark Session + # 2) Start a Spark Session try: os.environ["PYSPARK_PYTHON"] = sys.executable os.environ["PYSPARK_DRIVER_PYTHON"] = sys.executable nlp.start(hardware_target=hardware_target) except Exception as exc: raise Exception("Failure starting Spark Session") from exc - # 3) Load the model + + # 3) Load the model try: if isinstance(model, str): self.model = nlp.load(model) @@ -66,31 +66,27 @@ class Config: def embed_documents(self, texts: List[str]) -> List[List[float]]: """Compute doc embeddings using a JohnSnowLabs transformer model. - Args: texts: The list of texts to embed. - Returns: List of embeddings, one for each text.""" + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ - return _select_embed_document( - self.model.predict(texts, output_level="document") - ) + df = self.model.predict(texts, output_level="document") + emb_col = None + for c in df.columns: + if "embedding" in c: + emb_col = c + return [vec.tolist() for vec in df[emb_col].tolist()] def embed_query(self, text: str) -> List[float]: """Compute query embeddings using a JohnSnowLabs transformer model. - Args: text: The text to embed. - Returns: Embeddings for the text.""" - return _select_embed_query(self.model.predict(text, output_level="document")) - - -def _select_embed_document(df): - emb_col = None - for c in df.columns: - if "embedding" in c: - emb_col = c - return [vec.tolist() for vec in df[emb_col].tolist()] + Args: + text: The text to embed. -def _select_embed_query(df): - emb_col = None - for c in df.columns: - if "embedding" in c: - emb_col = c - return [vec.tolist() for vec in df[emb_col].tolist()][0] + Returns: + Embeddings for the text. + """ + return self.embed_documents([text])[0]