Skip to content

Commit 1248eb3

Browse files
authored
Merge pull request #4 from FlowiseAI/main
Feature/Add ability to create new doc store on upsert (FlowiseAI#3965)
2 parents 7d62918 + 20a797d commit 1248eb3

File tree

5 files changed

+122
-3
lines changed

5 files changed

+122
-3
lines changed

packages/api-documentation/src/yml/swagger.yml

+56
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,11 @@ paths:
679679
type: string
680680
format: binary
681681
description: Files to be uploaded
682+
docId:
683+
type: string
684+
nullable: true
685+
example: '603a7b51-ae7c-4b0a-8865-e454ed2f6766'
686+
description: Document ID to use existing configuration
682687
loader:
683688
type: string
684689
nullable: true
@@ -704,6 +709,32 @@ paths:
704709
nullable: true
705710
example: '{"name":"postgresRecordManager"}'
706711
description: Record Manager configurations
712+
metadata:
713+
type: object
714+
nullable: true
715+
description: Metadata associated with the document
716+
example: { 'foo': 'bar' }
717+
replaceExisting:
718+
type: boolean
719+
nullable: true
720+
description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store
721+
createNewDocStore:
722+
type: boolean
723+
nullable: true
724+
description: Whether to create a new document store
725+
docStore:
726+
type: object
727+
nullable: true
728+
description: Only when createNewDocStore is true, pass in the new document store configuration
729+
properties:
730+
name:
731+
type: string
732+
example: plainText
733+
description: Name of the new document store to be created
734+
description:
735+
type: string
736+
example: plainText
737+
description: Description of the new document store to be created
707738
required:
708739
- files
709740
required: true
@@ -2350,16 +2381,37 @@ components:
23502381
docId:
23512382
type: string
23522383
format: uuid
2384+
nullable: true
23532385
description: Document ID within the store. If provided, existing configuration from the document will be used for the new document
23542386
metadata:
23552387
type: object
2388+
nullable: true
23562389
description: Metadata associated with the document
23572390
example: { 'foo': 'bar' }
23582391
replaceExisting:
23592392
type: boolean
2393+
nullable: true
23602394
description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store
2395+
createNewDocStore:
2396+
type: boolean
2397+
nullable: true
2398+
description: Whether to create a new document store
2399+
docStore:
2400+
type: object
2401+
nullable: true
2402+
description: Only when createNewDocStore is true, pass in the new document store configuration
2403+
properties:
2404+
name:
2405+
type: string
2406+
example: plainText
2407+
description: Name of the new document store to be created
2408+
description:
2409+
type: string
2410+
example: plainText
2411+
description: Description of the new document store to be created
23612412
loader:
23622413
type: object
2414+
nullable: true
23632415
properties:
23642416
name:
23652417
type: string
@@ -2370,6 +2422,7 @@ components:
23702422
description: Configuration for the loader
23712423
splitter:
23722424
type: object
2425+
nullable: true
23732426
properties:
23742427
name:
23752428
type: string
@@ -2380,6 +2433,7 @@ components:
23802433
description: Configuration for the text splitter
23812434
embedding:
23822435
type: object
2436+
nullable: true
23832437
properties:
23842438
name:
23852439
type: string
@@ -2390,6 +2444,7 @@ components:
23902444
description: Configuration for the embedding generator
23912445
vectorStore:
23922446
type: object
2447+
nullable: true
23932448
properties:
23942449
name:
23952450
type: string
@@ -2400,6 +2455,7 @@ components:
24002455
description: Configuration for the vector store
24012456
recordManager:
24022457
type: object
2458+
nullable: true
24032459
properties:
24042460
name:
24052461
type: string

packages/components/nodes/documentloaders/FireCrawl/FireCrawl.ts

+37-1
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ class FireCrawl_DocumentLoaders implements INode {
266266
this.name = 'fireCrawl'
267267
this.type = 'Document'
268268
this.icon = 'firecrawl.png'
269-
this.version = 2.0
269+
this.version = 2.1
270270
this.category = 'Document Loaders'
271271
this.description = 'Load data from URL using FireCrawl'
272272
this.baseClasses = [this.type]
@@ -307,6 +307,42 @@ class FireCrawl_DocumentLoaders implements INode {
307307
}
308308
],
309309
default: 'crawl'
310+
},
311+
{
312+
// maxCrawlPages
313+
label: 'Max Crawl Pages',
314+
name: 'maxCrawlPages',
315+
type: 'string',
316+
description: 'Maximum number of pages to crawl',
317+
optional: true,
318+
additionalParams: true
319+
},
320+
{
321+
// generateImgAltText
322+
label: 'Generate Image Alt Text',
323+
name: 'generateImgAltText',
324+
type: 'boolean',
325+
description: 'Generate alt text for images',
326+
optional: true,
327+
additionalParams: true
328+
},
329+
{
330+
// returnOnlyUrls
331+
label: 'Return Only URLs',
332+
name: 'returnOnlyUrls',
333+
type: 'boolean',
334+
description: 'Return only URLs of the crawled pages',
335+
optional: true,
336+
additionalParams: true
337+
},
338+
{
339+
// onlyMainContent
340+
label: 'Only Main Content',
341+
name: 'onlyMainContent',
342+
type: 'boolean',
343+
description: 'Extract only the main content of the page',
344+
optional: true,
345+
additionalParams: true
310346
}
311347
// ... (other input parameters)
312348
]

packages/server/src/Interface.DocumentStore.ts

+2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ export interface IDocumentStoreUpsertData {
7676
docId: string
7777
metadata?: string | object
7878
replaceExisting?: boolean
79+
createNewDocStore?: boolean
80+
docStore?: IDocumentStore
7981
loader?: {
8082
name: string
8183
config: ICommonObject

packages/server/src/services/documentstore/index.ts

+13-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ import {
3232
INodeData,
3333
MODE,
3434
IOverrideConfig,
35-
IExecutePreviewLoader
35+
IExecutePreviewLoader,
36+
DocumentStoreDTO
3637
} from '../../Interface'
3738
import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk'
3839
import { v4 as uuidv4 } from 'uuid'
@@ -1464,6 +1465,7 @@ const upsertDocStore = async (
14641465
}
14651466
}
14661467
const replaceExisting = data.replaceExisting ?? false
1468+
const createNewDocStore = data.createNewDocStore ?? false
14671469
const newLoader = typeof data.loader === 'string' ? JSON.parse(data.loader) : data.loader
14681470
const newSplitter = typeof data.splitter === 'string' ? JSON.parse(data.splitter) : data.splitter
14691471
const newVectorStore = typeof data.vectorStore === 'string' ? JSON.parse(data.vectorStore) : data.vectorStore
@@ -1533,6 +1535,15 @@ const upsertDocStore = async (
15331535
recordManagerConfig = JSON.parse(entity.recordManagerConfig || '{}')?.config
15341536
}
15351537

1538+
if (createNewDocStore) {
1539+
const docStoreBody = typeof data.docStore === 'string' ? JSON.parse(data.docStore) : data.docStore
1540+
const newDocumentStore = docStoreBody ?? { name: `Document Store ${Date.now().toString()}` }
1541+
const docStore = DocumentStoreDTO.toEntity(newDocumentStore)
1542+
const documentStore = appDataSource.getRepository(DocumentStore).create(docStore)
1543+
const dbResponse = await appDataSource.getRepository(DocumentStore).save(documentStore)
1544+
storeId = dbResponse.id
1545+
}
1546+
15361547
// Step 2: Replace with new values
15371548
loaderName = newLoader?.name ? getComponentLabelFromName(newLoader?.name) : loaderName
15381549
loaderId = newLoader?.name || loaderId
@@ -1687,6 +1698,7 @@ const upsertDocStore = async (
16871698
isVectorStoreInsert: true
16881699
})
16891700
res.docId = newDocId
1701+
if (createNewDocStore) res.storeId = storeId
16901702

16911703
return res
16921704
} catch (error) {

packages/ui/src/views/docstore/DocStoreAPIDialog.jsx

+14-1
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,13 @@ body_data = {
4141
"docId": "${dialogProps.loaderId}",
4242
"metadata": {}, # Add additional metadata to the document chunks
4343
"replaceExisting": True, # Replace existing document with the new upserted chunks
44+
"createNewDocStore": False, # Create a new document store
4445
"splitter": json.dumps({"config":{"chunkSize":20000}}) # Override existing configuration
4546
# "loader": "",
4647
# "vectorStore": "",
4748
# "embedding": "",
4849
# "recordManager": "",
50+
# "docStore": ""
4951
}
5052
5153
headers = {
@@ -71,11 +73,14 @@ formData.append("splitter", JSON.stringify({"config":{"chunkSize":20000}}));
7173
formData.append("metadata", "{}");
7274
// Replace existing document with the new upserted chunks
7375
formData.append("replaceExisting", "true");
76+
// Create a new document store
77+
formData.append("createNewDocStore", "false");
7478
// Override existing configuration
7579
// formData.append("loader", "");
7680
// formData.append("embedding", "");
7781
// formData.append("vectorStore", "");
7882
// formData.append("recordManager", "");
83+
// formData.append("docStore", "");
7984
8085
async function query(formData) {
8186
const response = await fetch(
@@ -105,11 +110,13 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
105110
-F "splitter={"config":{"chunkSize":20000}}" \\
106111
-F "metadata={}" \\
107112
-F "replaceExisting=true" \\
113+
-F "createNewDocStore=false" \\
108114
# Override existing configuration:
109115
# -F "loader=" \\
110116
# -F "embedding=" \\
111117
# -F "vectorStore=" \\
112-
# -F "recordManager="
118+
# -F "recordManager=" \\
119+
# -F "docStore="
113120
\`\`\`
114121
`
115122
}
@@ -135,6 +142,7 @@ output = query({
135142
"docId": "${dialogProps.loaderId}",
136143
"metadata": "{}", # Add additional metadata to the document chunks
137144
"replaceExisting": True, # Replace existing document with the new upserted chunks
145+
"createNewDocStore": False, # Create a new document store
138146
# Override existing configuration
139147
"loader": {
140148
"config": {
@@ -149,6 +157,7 @@ output = query({
149157
# embedding: {},
150158
# vectorStore: {},
151159
# recordManager: {}
160+
# docStore: {}
152161
})
153162
print(output)
154163
\`\`\`
@@ -174,6 +183,7 @@ query({
174183
"docId": "${dialogProps.loaderId},
175184
"metadata": "{}", // Add additional metadata to the document chunks
176185
"replaceExisting": true, // Replace existing document with the new upserted chunks
186+
"createNewDocStore": false, // Create a new document store
177187
// Override existing configuration
178188
"loader": {
179189
"config": {
@@ -188,6 +198,7 @@ query({
188198
// embedding: {},
189199
// vectorStore: {},
190200
// recordManager: {}
201+
// docStore: {}
191202
}).then((response) => {
192203
console.log(response);
193204
});
@@ -201,6 +212,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
201212
"docId": "${dialogProps.loaderId}",
202213
"metadata": "{}",
203214
"replaceExisting": true,
215+
"createNewDocStore": false,
204216
"loader": {
205217
"config": {
206218
"text": "This is a new text"
@@ -215,6 +227,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
215227
// "embedding": {},
216228
// "vectorStore": {},
217229
// "recordManager": {}
230+
// "docStore": {}
218231
}'
219232
220233
\`\`\`

0 commit comments

Comments
 (0)