Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/Add support for state-based metadata filter to Retriever Tool #3501

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 151 additions & 8 deletions packages/components/nodes/tools/RetrieverTool/RetrieverTool.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,121 @@
import { z } from 'zod'
import { DynamicStructuredTool } from '@langchain/core/tools'
import { CallbackManagerForToolRun } from '@langchain/core/callbacks/manager'
import { DynamicTool } from '@langchain/core/tools'
import { CallbackManager, CallbackManagerForToolRun, Callbacks, parseCallbackConfigArg } from '@langchain/core/callbacks/manager'
import { BaseDynamicToolInput, DynamicTool, StructuredTool, ToolInputParsingException } from '@langchain/core/tools'
import { BaseRetriever } from '@langchain/core/retrievers'
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { getBaseClasses } from '../../../src/utils'
import { SOURCE_DOCUMENTS_PREFIX } from '../../../src/agents'
import { RunnableConfig } from '@langchain/core/runnables'
import { customGet } from '../../sequentialagents/commonUtils'
import { VectorStoreRetriever } from '@langchain/core/vectorstores'

const howToUse = `Add additional filters to vector store. You can also filter with flow config, including the current "state":
- \`$flow.sessionId\`
- \`$flow.chatId\`
- \`$flow.chatflowId\`
- \`$flow.input\`
- \`$flow.state\`
`

type ZodObjectAny = z.ZodObject<any, any, any, any>
type IFlowConfig = { sessionId?: string; chatId?: string; input?: string; state?: ICommonObject }
interface DynamicStructuredToolInput<T extends z.ZodObject<any, any, any, any> = z.ZodObject<any, any, any, any>>
extends BaseDynamicToolInput {
func?: (input: z.infer<T>, runManager?: CallbackManagerForToolRun, flowConfig?: IFlowConfig) => Promise<string>
schema: T
}

class DynamicStructuredTool<T extends z.ZodObject<any, any, any, any> = z.ZodObject<any, any, any, any>> extends StructuredTool<
T extends ZodObjectAny ? T : ZodObjectAny
> {
static lc_name() {
return 'DynamicStructuredTool'
}

name: string

description: string

func: DynamicStructuredToolInput['func']

// @ts-ignore
schema: T

private flowObj: any

constructor(fields: DynamicStructuredToolInput<T>) {
super(fields)
this.name = fields.name
this.description = fields.description
this.func = fields.func
this.returnDirect = fields.returnDirect ?? this.returnDirect
this.schema = fields.schema
}

async call(arg: any, configArg?: RunnableConfig | Callbacks, tags?: string[], flowConfig?: IFlowConfig): Promise<string> {
const config = parseCallbackConfigArg(configArg)
if (config.runName === undefined) {
config.runName = this.name
}
let parsed
try {
parsed = await this.schema.parseAsync(arg)
} catch (e) {
throw new ToolInputParsingException(`Received tool input did not match expected schema`, JSON.stringify(arg))
}
const callbackManager_ = await CallbackManager.configure(
config.callbacks,
this.callbacks,
config.tags || tags,
this.tags,
config.metadata,
this.metadata,
{ verbose: this.verbose }
)
const runManager = await callbackManager_?.handleToolStart(
this.toJSON(),
typeof parsed === 'string' ? parsed : JSON.stringify(parsed),
undefined,
undefined,
undefined,
undefined,
config.runName
)
let result
try {
result = await this._call(parsed, runManager, flowConfig)
} catch (e) {
await runManager?.handleToolError(e)
throw e
}
if (result && typeof result !== 'string') {
result = JSON.stringify(result)
}
await runManager?.handleToolEnd(result)
return result
}

// @ts-ignore
protected _call(arg: any, runManager?: CallbackManagerForToolRun, flowConfig?: IFlowConfig): Promise<string> {
let flowConfiguration: ICommonObject = {}
if (typeof arg === 'object' && Object.keys(arg).length) {
for (const item in arg) {
flowConfiguration[`$${item}`] = arg[item]
}
}

// inject flow properties
if (this.flowObj) {
flowConfiguration['$flow'] = { ...this.flowObj, ...flowConfig }
}

return this.func!(arg as any, runManager, flowConfiguration)
}

setFlowObject(flow: any) {
this.flowObj = flow
}
}

class Retriever_Tools implements INode {
label: string
Expand All @@ -22,7 +132,7 @@ class Retriever_Tools implements INode {
constructor() {
this.label = 'Retriever Tool'
this.name = 'retrieverTool'
this.version = 2.0
this.version = 3.0
this.type = 'RetrieverTool'
this.icon = 'retrievertool.svg'
this.category = 'Tools'
Expand Down Expand Up @@ -53,23 +163,55 @@ class Retriever_Tools implements INode {
name: 'returnSourceDocuments',
type: 'boolean',
optional: true
},
{
label: 'Additional Metadata Filter',
name: 'retrieverToolMetadataFilter',
type: 'json',
description: 'Add additional metadata filter on top of the existing filter from vector store',
optional: true,
additionalParams: true,
hint: {
label: 'What can you filter?',
value: howToUse
}
}
]
}

async init(nodeData: INodeData): Promise<any> {
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const name = nodeData.inputs?.name as string
const description = nodeData.inputs?.description as string
const retriever = nodeData.inputs?.retriever as BaseRetriever
const returnSourceDocuments = nodeData.inputs?.returnSourceDocuments as boolean
const retrieverToolMetadataFilter = nodeData.inputs?.retrieverToolMetadataFilter

const input = {
name,
description
}

const func = async ({ input }: { input: string }, runManager?: CallbackManagerForToolRun) => {
const docs = await retriever.getRelevantDocuments(input, runManager?.getChild('retriever'))
const flow = { chatflowId: options.chatflowid }

const func = async ({ input }: { input: string }, _?: CallbackManagerForToolRun, flowConfig?: IFlowConfig) => {
if (retrieverToolMetadataFilter) {
const flowObj = flowConfig

const metadatafilter =
typeof retrieverToolMetadataFilter === 'object' ? retrieverToolMetadataFilter : JSON.parse(retrieverToolMetadataFilter)
const newMetadataFilter: any = {}
for (const key in metadatafilter) {
let value = metadatafilter[key]
if (value.startsWith('$flow')) {
value = customGet(flowObj, value)
}
newMetadataFilter[key] = value
}

const vectorStore = (retriever as VectorStoreRetriever<any>).vectorStore
vectorStore.filter = newMetadataFilter
}
const docs = await retriever.invoke(input)
const content = docs.map((doc) => doc.pageContent).join('\n\n')
const sourceDocuments = JSON.stringify(docs)
return returnSourceDocuments ? content + SOURCE_DOCUMENTS_PREFIX + sourceDocuments : content
Expand All @@ -80,6 +222,7 @@ class Retriever_Tools implements INode {
}) as any

const tool = new DynamicStructuredTool({ ...input, func, schema })
tool.setFlowObject(flow)
return tool
}
}
Expand Down
Loading