Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for ElasticSearch as a vector store #982

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions packages/components/credentials/ElasticsearchAPI.credential.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { INodeParams, INodeCredential } from '../src/Interface'

class ElectricsearchAPI implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]

constructor() {
this.label = 'Elasticsearch API'
this.name = 'elasticsearchApi'
this.version = 1.0
this.description =
'Refer to <a target="_blank" href="https://www.elastic.co/guide/en/kibana/current/api-keys.html">official guide</a> on how to get an API Key from ElasticSearch'
this.inputs = [
{
label: 'Elasticsearch Endpoint',
name: 'endpoint',
type: 'string'
},
{
label: 'Elasticsearch API ID',
name: 'apiKey',
type: 'password'
}
]
}
}

module.exports = { credClass: ElectricsearchAPI }
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { INodeParams, INodeCredential } from '../src/Interface'

class ElasticSearchUserPassword implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]

constructor() {
this.label = 'ElasticSearch User Password'
this.name = 'elasticSearchUserPassword'
this.version = 1.0
this.description =
'Refer to <a target="_blank" href="https://www.elastic.co/guide/en/kibana/current/tutorial-secure-access-to-kibana.html">official guide</a> on how to get User Password from ElasticSearch'
this.inputs = [
{
label: 'Cloud ID',
name: 'cloudId',
type: 'string'
},
{
label: 'ElasticSearch User',
name: 'username',
type: 'string'
},
{
label: 'ElasticSearch Password',
name: 'password',
type: 'password'
}
]
}
}

module.exports = { credClass: ElasticSearchUserPassword }
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
import {
getBaseClasses,
getCredentialData,
getCredentialParam,
ICommonObject,
INodeData,
INodeOutputsValue,
INodeParams
} from '../../../src'
import { Client, ClientOptions } from '@elastic/elasticsearch'
import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch'
import { Embeddings } from 'langchain/embeddings/base'
import { VectorStore } from 'langchain/vectorstores/base'
import { Document } from 'langchain/document'

export abstract class ElasticSearchBase {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]

protected constructor() {
this.type = 'Elasticsearch'
this.icon = 'elasticsearch.png'
this.category = 'Vector Stores'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword']
}
this.inputs = [
{
label: 'Embeddings',
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Index Name',
name: 'indexName',
placeholder: '<INDEX_NAME>',
type: 'string'
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Default to 4',
placeholder: '4',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Similarity',
name: 'similarity',
description: 'Similarity measure used in Elasticsearch.',
type: 'options',
default: 'l2_norm',
options: [
{
label: 'l2_norm',
name: 'l2_norm'
},
{
label: 'dot_product',
name: 'dot_product'
},
{
label: 'cosine',
name: 'cosine'
}
],
additionalParams: true,
optional: true
}
]
this.outputs = [
{
label: 'Elasticsearch Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Elasticsearch Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)]
}
]
}

abstract constructVectorStore(
embeddings: Embeddings,
elasticSearchClientArgs: ElasticClientArgs,
docs: Document<Record<string, any>>[] | undefined
): Promise<VectorStore>

async init(nodeData: INodeData, _: string, options: ICommonObject, docs: Document<Record<string, any>>[] | undefined): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const endPoint = getCredentialParam('endpoint', credentialData, nodeData)
const cloudId = getCredentialParam('cloudId', credentialData, nodeData)
const indexName = nodeData.inputs?.indexName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const topK = nodeData.inputs?.topK as string
const similarityMeasure = nodeData.inputs?.similarityMeasure as string
const k = topK ? parseFloat(topK) : 4
const output = nodeData.outputs?.output as string

const elasticSearchClientArgs = this.prepareClientArgs(endPoint, cloudId, credentialData, nodeData, similarityMeasure, indexName)

const vectorStore = await this.constructVectorStore(embeddings, elasticSearchClientArgs, docs)

if (output === 'retriever') {
return vectorStore.asRetriever(k)
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
return vectorStore
}
return vectorStore
}

protected prepareConnectionOptions(
endPoint: string | undefined,
cloudId: string | undefined,
credentialData: ICommonObject,
nodeData: INodeData
) {
let elasticSearchClientOptions: ClientOptions = {}
if (endPoint) {
let apiKey = getCredentialParam('apiKey', credentialData, nodeData)
elasticSearchClientOptions = {
node: endPoint,
auth: {
apiKey: apiKey
}
}
} else if (cloudId) {
let username = getCredentialParam('username', credentialData, nodeData)
let password = getCredentialParam('password', credentialData, nodeData)
elasticSearchClientOptions = {
cloud: {
id: cloudId
},
auth: {
username: username,
password: password
}
}
}
return elasticSearchClientOptions
}

protected prepareClientArgs(
endPoint: string | undefined,
cloudId: string | undefined,
credentialData: ICommonObject,
nodeData: INodeData,
similarityMeasure: string,
indexName: string
) {
let elasticSearchClientOptions = this.prepareConnectionOptions(endPoint, cloudId, credentialData, nodeData)
let vectorSearchOptions = {}
switch (similarityMeasure) {
case 'dot_product':
vectorSearchOptions = {
similarity: 'dot_product'
}
break
case 'cosine':
vectorSearchOptions = {
similarity: 'cosine'
}
break
default:
vectorSearchOptions = {
similarity: 'l2_norm'
}
}
const elasticSearchClientArgs: ElasticClientArgs = {
client: new Client(elasticSearchClientOptions),
indexName: indexName,
vectorSearchOptions: vectorSearchOptions
}
return elasticSearchClientArgs
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { ICommonObject, INode, INodeData } from '../../../src/Interface'
import { Embeddings } from 'langchain/embeddings/base'

import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch'
import { ElasticSearchBase } from './ElasticSearchBase'
import { VectorStore } from 'langchain/vectorstores/base'
import { Document } from 'langchain/document'

class ElasicsearchExisting_VectorStores extends ElasticSearchBase implements INode {
constructor() {
super()
this.label = 'Elasticsearch Load Existing Index'
this.name = 'ElasticsearchIndex'
this.version = 1.0
this.description = 'Load existing index from Elasticsearch (i.e: Document has been upserted)'
}

async constructVectorStore(
embeddings: Embeddings,
elasticSearchClientArgs: ElasticClientArgs,
docs: Document<Record<string, any>>[] | undefined
): Promise<VectorStore> {
return await ElasticVectorSearch.fromExistingIndex(embeddings, elasticSearchClientArgs)
}

async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
return super.init(nodeData, _, options, undefined)
}
}

module.exports = { nodeClass: ElasicsearchExisting_VectorStores }
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { ICommonObject, INode, INodeData } from '../../../src/Interface'
import { Embeddings } from 'langchain/embeddings/base'
import { Document } from 'langchain/document'

import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch'
import { flatten } from 'lodash'
import { ElasticSearchBase } from './ElasticSearchBase'
import { VectorStore } from 'langchain/vectorstores/base'

class ElasicsearchUpsert_VectorStores extends ElasticSearchBase implements INode {
constructor() {
super()
this.label = 'Elasticsearch Upsert Document'
this.name = 'ElasticsearchUpsert'
this.version = 1.0
this.description = 'Upsert documents to Elasticsearch'
this.inputs.unshift({
label: 'Document',
name: 'document',
type: 'Document',
list: true
})
}

async constructVectorStore(
embeddings: Embeddings,
elasticSearchClientArgs: ElasticClientArgs,
docs: Document<Record<string, any>>[]
): Promise<VectorStore> {
const vectorStore = new ElasticVectorSearch(embeddings, elasticSearchClientArgs)
await vectorStore.addDocuments(docs)
return vectorStore
}

async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const docs = nodeData.inputs?.document as Document[]

const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
for (let i = 0; i < flattenDocs.length; i += 1) {
finalDocs.push(new Document(flattenDocs[i]))
}

// The following code is a workaround for a bug (Langchain Issue #1589) in the underlying library.
// Store does not support object in metadata and fail silently
finalDocs.forEach((d) => {
delete d.metadata.pdf
delete d.metadata.loc
})
// end of workaround
return super.init(nodeData, _, options, flattenDocs)
}
}

module.exports = { nodeClass: ElasicsearchUpsert_VectorStores }
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"dependencies": {
"@aws-sdk/client-dynamodb": "^3.360.0",
"@dqbd/tiktoken": "^1.0.7",
"@elastic/elasticsearch": "^8.9.0",
"@getzep/zep-js": "^0.6.3",
"@gomomento/sdk": "^1.40.2",
"@google-ai/generativelanguage": "^0.2.1",
Expand Down