From 946ef036e8f5a79cd18cd0d3b057f67c1ae14578 Mon Sep 17 00:00:00 2001 From: Milder Hernandez Cagua Date: Wed, 23 Oct 2024 00:58:29 -0700 Subject: [PATCH] Add Text Search API and VectorStoreTextSearch implementation --- .../JDBCVectorStoreRecordCollectionTest.java | 26 +- ...ashSetVectorStoreRecordCollectionTest.java | 18 +- ...isJsonVectorStoreRecordCollectionTest.java | 18 +- .../memory/InMemoryVolatileVectorStore.java | 34 +-- .../memory/VectorStoreWithAzureAISearch.java | 39 ++- .../memory/VectorStoreWithJDBC.java | 29 +-- .../memory/VectorStoreWithRedis.java | 30 +-- ...reAISearchVectorStoreRecordCollection.java | 18 +- .../jdbc/JDBCVectorStoreQueryProvider.java | 9 +- .../jdbc/JDBCVectorStoreRecordCollection.java | 3 +- .../jdbc/SQLVectorStoreQueryProvider.java | 3 +- .../PostgreSQLVectorStoreQueryProvider.java | 9 +- ...disHashSetVectorStoreRecordCollection.java | 7 +- .../RedisJsonVectorStoreRecordCollection.java | 12 +- ...disVectorStoreCollectionSearchMapping.java | 4 +- .../VolatileVectorStoreRecordCollection.java | 13 +- .../DefaultTextSearchResultMapper.java | 18 ++ .../DefaultTextSearchStringMapper.java | 19 ++ .../data/textsearch/KernelSearchResults.java | 70 +++++ .../data/textsearch/TextSearch.java | 36 +++ .../data/textsearch/TextSearchFilter.java | 79 ++++++ .../data/textsearch/TextSearchOptions.java | 150 +++++++++++ .../data/textsearch/TextSearchResult.java | 103 ++++++++ .../data/textsearch/TextSearchResultLink.java | 15 ++ .../textsearch/TextSearchResultMapper.java | 15 ++ .../data/textsearch/TextSearchResultName.java | 15 ++ .../textsearch/TextSearchResultValue.java | 15 ++ .../textsearch/TextSearchStringMapper.java | 15 ++ .../data/vectorsearch/VectorOperations.java | 4 +- .../vectorsearch/VectorSearchResults.java | 71 ++++++ .../vectorsearch/VectorizableTextSearch.java | 4 +- .../data/vectorsearch/VectorizedSearch.java | 4 +- .../vectorstorage/VectorStoreTextSearch.java | 239 ++++++++++++++++++ .../VectorStoreTextSearchOptions.java | 8 + .../annotations/VectorStoreRecordData.java | 2 +- .../annotations/VectorStoreRecordKey.java | 2 +- .../annotations/VectorStoreRecordVector.java | 2 +- .../options/VectorSearchOptions.java | 81 +++--- ...latileVectorStoreRecordCollectionTest.java | 15 +- 39 files changed, 1070 insertions(+), 184 deletions(-) create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchResultMapper.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchStringMapper.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/KernelSearchResults.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearch.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchFilter.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchOptions.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResult.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultLink.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultMapper.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultName.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultValue.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchStringMapper.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorSearchResults.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearch.java create mode 100644 semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearchOptions.java diff --git a/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java b/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java index 8342f489..17b36756 100644 --- a/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java +++ b/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/jdbc/JDBCVectorStoreRecordCollectionTest.java @@ -416,12 +416,12 @@ public void exactSearch(QueryProvider provider, String embeddingName) { VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withLimit(3) + .withTop(3) .build(); // Embeddings similar to the third hotel List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - options).block(); + options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The third hotel should be the most similar @@ -429,12 +429,12 @@ public void exactSearch(QueryProvider provider, String embeddingName) { options = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withOffset(1) - .withLimit(-100) + .withSkip(1) + .withTop(-100) .build(); // Skip the first result - results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block(); + results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block().getResults(); assertNotNull(results); assertEquals(1, results.size()); // The first hotel should be the most similar @@ -453,12 +453,12 @@ public void approximateSearch(QueryProvider provider) { VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName("indexedEuclidean") - .withLimit(5) + .withTop(5) .build(); // Embeddings similar to the third hotel List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - options).block(); + options).block().getResults(); assertNotNull(results); assertEquals(5, results.size()); // The third hotel should be the most similar @@ -477,7 +477,7 @@ public void searchWithFilterEqualToFilter(QueryProvider provider, String embeddi VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withLimit(3) + .withTop(3) .withVectorSearchFilter( VectorSearchFilter.builder() .equalTo("rating", 4.0).build()) @@ -485,7 +485,7 @@ public void searchWithFilterEqualToFilter(QueryProvider provider, String embeddi // Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - options).block(); + options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The first hotel should be the most similar @@ -504,7 +504,7 @@ public void searchWithAnyTagEqualToFilter(QueryProvider provider, String embeddi VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withLimit(3) + .withTop(3) .withVectorSearchFilter( VectorSearchFilter.builder() .anyTagEqualTo("tags", "city").build()) @@ -512,7 +512,7 @@ public void searchWithAnyTagEqualToFilter(QueryProvider provider, String embeddi // Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - options).block(); + options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The first hotel should be the most similar @@ -530,7 +530,7 @@ public void postgresSearchIncludeAndNotIncludeVectors() { recordCollection.upsertBatchAsync(hotels, null).block(); List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - null).block(); + null).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The third hotel should be the most similar @@ -541,7 +541,7 @@ public void postgresSearchIncludeAndNotIncludeVectors() { .withIncludeVectors(true) .build(); - results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block(); + results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The third hotel should be the most similar diff --git a/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java b/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java index a2b5e2c1..5256c629 100644 --- a/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java +++ b/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisHashSetVectorStoreRecordCollectionTest.java @@ -387,9 +387,9 @@ public void search(RecordCollectionOptions options, String embeddingName) { .build(); // Embeddings similar to the third hotel - List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block(); + List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults(); assertNotNull(results); - assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage); + assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage); // The third hotel should be the most similar assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage); // Score should be different than zero @@ -412,9 +412,9 @@ public void searchWithVectors(RecordCollectionOptions options, String embeddingN .build(); // Embeddings similar to the third hotel - List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block(); + List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults(); assertNotNull(results); - assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage); + assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage); // The third hotel should be the most similar assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage); assertNotNull(results.get(0).getRecord().getEuclidean()); @@ -431,12 +431,12 @@ public void searchWithOffSet(RecordCollectionOptions options, String embeddingNa VectorSearchOptions searchOptions = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withOffset(1) - .withLimit(4) + .withSkip(1) + .withTop(4) .build(); // Embeddings similar to the third hotel - List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block(); + List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults(); assertNotNull(results); assertEquals(4, results.size(), indexingFailureMessage); // The first hotel should be the most similar @@ -454,7 +454,7 @@ public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollecti VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withLimit(3) + .withTop(3) .withVectorSearchFilter( VectorSearchFilter.builder() .equalTo("rating", 4.0).build()) @@ -462,7 +462,7 @@ public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollecti // Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - options).block(); + options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The first hotel should be the most similar diff --git a/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisJsonVectorStoreRecordCollectionTest.java b/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisJsonVectorStoreRecordCollectionTest.java index 393690fd..7ab0bcc8 100644 --- a/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisJsonVectorStoreRecordCollectionTest.java +++ b/api-test/integration-tests/src/test/java/com/microsoft/semantickernel/tests/connectors/memory/redis/RedisJsonVectorStoreRecordCollectionTest.java @@ -387,9 +387,9 @@ public void search(RecordCollectionOptions options, String embeddingName) { .build(); // Embeddings similar to the third hotel - List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block(); + List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults(); assertNotNull(results); - assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage); + assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage); // The third hotel should be the most similar assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage); // Score should be different than zero @@ -412,9 +412,9 @@ public void searchWithVectors(RecordCollectionOptions options, String embeddingN .build(); // Embeddings similar to the third hotel - List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block(); + List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults(); assertNotNull(results); - assertEquals(VectorSearchOptions.DEFAULT_RESULT_LIMIT, results.size(), indexingFailureMessage); + assertEquals(VectorSearchOptions.DEFAULT_TOP, results.size(), indexingFailureMessage); // The third hotel should be the most similar assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId(), indexingFailureMessage); assertNotNull(results.get(0).getRecord().getEuclidean()); @@ -431,12 +431,12 @@ public void searchWithOffSet(RecordCollectionOptions options, String embeddingNa VectorSearchOptions searchOptions = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withOffset(1) - .withLimit(4) + .withSkip(1) + .withTop(4) .build(); // Embeddings similar to the third hotel - List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block(); + List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, searchOptions).block().getResults(); assertNotNull(results); assertEquals(4, results.size(), indexingFailureMessage); // The first hotel should be the most similar @@ -454,7 +454,7 @@ public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollecti VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(embeddingName) - .withLimit(3) + .withTop(3) .withVectorSearchFilter( VectorSearchFilter.builder() .equalTo("rating", 4.0).build()) @@ -462,7 +462,7 @@ public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollecti // Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned List> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, - options).block(); + options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The first hotel should be the most similar diff --git a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java index 8dc1372d..00166005 100644 --- a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java +++ b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/InMemoryVolatileVectorStore.java @@ -7,10 +7,13 @@ import com.azure.core.credential.KeyCredential; import com.fasterxml.jackson.annotation.JsonProperty; import com.microsoft.semantickernel.aiservices.openai.textembedding.OpenAITextEmbeddingGenerationService; +import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; import com.microsoft.semantickernel.data.VolatileVectorStore; import com.microsoft.semantickernel.data.VolatileVectorStoreRecordCollectionOptions; +import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearch; +import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearchOptions; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector; @@ -38,12 +41,12 @@ public class InMemoryVolatileVectorStore { private static final int EMBEDDING_DIMENSIONS = 1536; static class GitHubFile { - @JsonProperty("fileId") // Set a different name for the storage field if needed - @VectorStoreRecordKey() + @VectorStoreRecordKey private final String id; - @VectorStoreRecordData() + @VectorStoreRecordData private final String description; @VectorStoreRecordData + @TextSearchResultValue private final String link; @VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE) private final List embedding; @@ -125,27 +128,24 @@ public static void inMemoryStoreAndSearch( .then(storeData(collection, embeddingGeneration, sampleData())) .block(); + // Build a vectorized search + var vectorStoreTextSearch = VectorStoreTextSearch.builder() + .withVectorizedSearch(collection) + .withTextEmbeddingGenerationService(embeddingGeneration) + .build(); + // Search for results // Volatile store executes an exhaustive search, for approximate search use Azure AI Search, Redis or JDBC with PostgreSQL - var results = search("How to get started", collection, embeddingGeneration).block(); + String query = "How to get started?"; + var results = vectorStoreTextSearch.searchAsync(query, null) + .block(); - if (results == null || results.isEmpty()) { + if (results == null || results.getTotalCount() == 0) { System.out.println("No search results found."); return; } - var searchResult = results.get(0); - System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n", - searchResult.getScore(), searchResult.getRecord().link, - searchResult.getRecord().description); - } - private static Mono>> search( - String searchText, - VectorStoreRecordCollection recordCollection, - OpenAITextEmbeddingGenerationService embeddingGeneration) { - // Generate embeddings for the search text and search for the closest records - return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText)) - .flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null)); + System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0)); } private static Mono> storeData( diff --git a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java index dca8599b..64575a02 100644 --- a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java +++ b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithAzureAISearch.java @@ -15,8 +15,11 @@ import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStore; import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreOptions; import com.microsoft.semantickernel.connectors.data.azureaisearch.AzureAISearchVectorStoreRecordCollectionOptions; +import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; +import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearch; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector; @@ -51,13 +54,12 @@ public class VectorStoreWithAzureAISearch { private static final int EMBEDDING_DIMENSIONS = 1536; static class GitHubFile { - - @JsonProperty("fileId") // Set a different name for the storage field if needed @VectorStoreRecordKey() private final String id; @VectorStoreRecordData() private final String description; @VectorStoreRecordData + @TextSearchResultValue private final String link; @VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_SIMILARITY) private final List embedding; @@ -111,7 +113,6 @@ public static void main(String[] args) { var searchClient = new SearchIndexClientBuilder() .endpoint(AZURE_AI_SEARCH_ENDPOINT) .credential(new AzureKeyCredential(AZURE_AISEARCH_KEY)) - .clientOptions(clientOptions()) .buildAsyncClient(); storeAndSearch(searchClient, embeddingGeneration); @@ -141,27 +142,24 @@ public static void storeAndSearch( .then(storeData(collection, embeddingGeneration, sampleData())) .block(); + // Build a vectorized search + var vectorStoreTextSearch = VectorStoreTextSearch.builder() + .withVectorizedSearch(collection) + .withTextEmbeddingGenerationService(embeddingGeneration) + .build(); + // Search for results // Might need to wait for the data to be indexed - var results = search("How to get started", collection, embeddingGeneration).block(); + String query = "How to get started?"; + var results = vectorStoreTextSearch.searchAsync(query, null) + .block(); - if (results == null || results.isEmpty()) { + if (results == null || results.getTotalCount() == 0) { System.out.println("No search results found."); return; } - var searchResult = results.get(0); - System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n", - searchResult.getScore(), searchResult.getRecord().link, - searchResult.getRecord().description); - } - private static Mono>> search( - String searchText, - VectorStoreRecordCollection recordCollection, - OpenAITextEmbeddingGenerationService embeddingGeneration) { - // Generate embeddings for the search text and search for the closest records - return embeddingGeneration.generateEmbeddingAsync(searchText) - .flatMap(r -> recordCollection.searchAsync(r.getVector(), null)); + System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0)); } private static Mono> storeData( @@ -204,11 +202,4 @@ private static Map sampleData() { "README: README associated with a sample chat summary react-based webapp" }, }).collect(Collectors.toMap(element -> element[0], element -> element[1])); } - - private static ClientOptions clientOptions() { - return new ClientOptions() - .setTracingOptions(new TracingOptions()) - .setMetricsOptions(new MetricsOptions()) - .setApplicationId("Semantic-Kernel"); - } } diff --git a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithJDBC.java b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithJDBC.java index 2099886b..1bf7ba30 100644 --- a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithJDBC.java +++ b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithJDBC.java @@ -10,8 +10,10 @@ import com.microsoft.semantickernel.connectors.data.jdbc.JDBCVectorStoreOptions; import com.microsoft.semantickernel.connectors.data.jdbc.JDBCVectorStoreRecordCollectionOptions; import com.microsoft.semantickernel.connectors.data.mysql.MySQLVectorStoreQueryProvider; +import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; +import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearch; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector; @@ -41,12 +43,12 @@ public class VectorStoreWithJDBC { private static final int EMBEDDING_DIMENSIONS = 1536; static class GitHubFile { - @VectorStoreRecordKey() private final String id; @VectorStoreRecordData() private final String description; @VectorStoreRecordData + @TextSearchResultValue private final String link; @VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, distanceFunction = DistanceFunction.COSINE_DISTANCE) private final List embedding; @@ -154,26 +156,23 @@ public static void storeAndSearch(OpenAITextEmbeddingGenerationService embedding .then(storeData(collection, embeddingGeneration, sampleData())) .block(); + // Build a vectorized search + var vectorStoreTextSearch = VectorStoreTextSearch.builder() + .withVectorizedSearch(collection) + .withTextEmbeddingGenerationService(embeddingGeneration) + .build(); + // Search for results - var results = search("How to get started", collection, embeddingGeneration).block(); + String query = "How to get started?"; + var results = vectorStoreTextSearch.searchAsync(query, null) + .block(); - if (results == null || results.isEmpty()) { + if (results == null || results.getTotalCount() == 0) { System.out.println("No search results found."); return; } - var searchResult = results.get(0); - System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n", - searchResult.getScore(), searchResult.getRecord().link, - searchResult.getRecord().description); - } - private static Mono>> search( - String searchText, - VectorStoreRecordCollection recordCollection, - OpenAITextEmbeddingGenerationService embeddingGeneration) { - // Generate embeddings for the search text and search for the closest records - return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText)) - .flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null)); + System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0)); } private static Mono> storeData( diff --git a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithRedis.java b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithRedis.java index 0310c9dd..63c03271 100644 --- a/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithRedis.java +++ b/samples/semantickernel-concepts/semantickernel-syntax-examples/src/main/java/com/microsoft/semantickernel/samples/syntaxexamples/memory/VectorStoreWithRedis.java @@ -11,8 +11,10 @@ import com.microsoft.semantickernel.connectors.data.redis.RedisStorageType; import com.microsoft.semantickernel.connectors.data.redis.RedisVectorStore; import com.microsoft.semantickernel.connectors.data.redis.RedisVectorStoreOptions; +import com.microsoft.semantickernel.data.textsearch.TextSearchResultValue; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; +import com.microsoft.semantickernel.data.vectorstorage.VectorStoreTextSearch; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey; import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector; @@ -42,12 +44,12 @@ public class VectorStoreWithRedis { private static final int EMBEDDING_DIMENSIONS = 1536; public static class GitHubFile { - @JsonProperty("fileId") // Set a different name for the storage field if needed @VectorStoreRecordKey() private final String id; @VectorStoreRecordData() private final String description; @VectorStoreRecordData + @TextSearchResultValue private final String link; @VectorStoreRecordVector(dimensions = EMBEDDING_DIMENSIONS, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.COSINE_DISTANCE) private final List embedding; @@ -146,27 +148,23 @@ public static void storeAndSearch( .then(storeData(collection, embeddingGeneration, sampleData())) .block(); + // Build a vectorized search + var vectorStoreTextSearch = VectorStoreTextSearch.builder() + .withVectorizedSearch(collection) + .withTextEmbeddingGenerationService(embeddingGeneration) + .build(); + // Search for results - // Might need to wait for the data to be indexed - var results = search("How to get started", collection, embeddingGeneration).block(); + String query = "How to get started?"; + var results = vectorStoreTextSearch.searchAsync(query, null) + .block(); - if (results == null || results.isEmpty()) { + if (results == null || results.getTotalCount() == 0) { System.out.println("No search results found."); return; } - var searchResult = results.get(0); - System.out.printf("Search result with score: %f.%n Link: %s, Description: %s%n", - searchResult.getScore(), searchResult.getRecord().link, - searchResult.getRecord().description); - } - private static Mono>> search( - String searchText, - VectorStoreRecordCollection recordCollection, - OpenAITextEmbeddingGenerationService embeddingGeneration) { - // Generate embeddings for the search text and search for the closest records - return embeddingGeneration.generateEmbeddingsAsync(Collections.singletonList(searchText)) - .flatMap(r -> recordCollection.searchAsync(r.get(0).getVector(), null)); + System.out.printf("Best result for '%s': %s%n", query, results.getResults().get(0)); } private static Mono> storeData( diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/azureaisearch/AzureAISearchVectorStoreRecordCollection.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/azureaisearch/AzureAISearchVectorStoreRecordCollection.java index 02971935..cbcbddfc 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/azureaisearch/AzureAISearchVectorStoreRecordCollection.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/azureaisearch/AzureAISearchVectorStoreRecordCollection.java @@ -14,6 +14,7 @@ import com.azure.search.documents.models.VectorQuery; import com.azure.search.documents.models.VectorizableTextQuery; import com.azure.search.documents.models.VectorizedQuery; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorsearch.VectorizableTextSearch; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch; @@ -287,7 +288,7 @@ public Mono deleteBatchAsync(List keys, DeleteRecordOptions option }).collect(Collectors.toList())).then(); } - private Mono>> searchAndMapAsync( + private Mono> searchAndMapAsync( List vectorQueries, VectorSearchOptions options, GetRecordOptions getRecordOptions) { @@ -296,8 +297,8 @@ private Mono>> searchAndMapAsync( SearchOptions searchOptions = new SearchOptions() .setFilter(filter) - .setTop(options.getLimit()) - .setSkip(options.getOffset()) + .setTop(options.getTop()) + .setSkip(options.getSkip()) .setScoringParameters() .setVectorSearchOptions(new com.azure.search.documents.models.VectorSearchOptions() .setQueries(vectorQueries)); @@ -323,7 +324,8 @@ record = response.getDocument(this.options.getRecordClass()); } return Mono.just(new VectorSearchResult<>(record, response.getScore())); - }).collectList(); + }).collectList().flatMap(results -> Mono.just( + new VectorSearchResults<>(results))); } /** @@ -334,7 +336,7 @@ record = response.getDocument(this.options.getRecordClass()); * @return A list of search results. */ @Override - public Mono>> searchAsync(String searchText, + public Mono> searchAsync(String searchText, VectorSearchOptions options) { if (firstVectorFieldName == null) { throw new SKException("No vector fields defined. Cannot perform vector search"); @@ -349,7 +351,7 @@ public Mono>> searchAsync(String searchText, .setFields(recordDefinition.getField(options.getVectorFieldName() != null ? options.getVectorFieldName() : firstVectorFieldName).getEffectiveStorageName()) - .setKNearestNeighborsCount(options.getLimit())); + .setKNearestNeighborsCount(options.getTop())); return searchAndMapAsync(vectorQueries, options, new GetRecordOptions(options.isIncludeVectors())); @@ -363,7 +365,7 @@ public Mono>> searchAsync(String searchText, * @return A list of search results. */ @Override - public Mono>> searchAsync(List vector, + public Mono> searchAsync(List vector, VectorSearchOptions options) { if (firstVectorFieldName == null) { throw new SKException("No vector fields defined. Cannot perform vector search"); @@ -378,7 +380,7 @@ public Mono>> searchAsync(List vector, .setFields(recordDefinition.getField(options.getVectorFieldName() != null ? options.getVectorFieldName() : firstVectorFieldName).getEffectiveStorageName()) - .setKNearestNeighborsCount(options.getLimit())); + .setKNearestNeighborsCount(options.getTop())); return searchAndMapAsync(vectorQueries, options, new GetRecordOptions(options.isIncludeVectors())); diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreQueryProvider.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreQueryProvider.java index f33374aa..1994475c 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreQueryProvider.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreQueryProvider.java @@ -6,6 +6,7 @@ import com.microsoft.semantickernel.data.vectorsearch.VectorOperations; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchFilter; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper; import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction; import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind; @@ -35,6 +36,7 @@ import javax.sql.DataSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; /** * A JDBC vector store query provider. @@ -526,7 +528,7 @@ protected List getRecordsWithFilter(String collectionName, * @return the search results */ @Override - public List> search(String collectionName, + public VectorSearchResults search(String collectionName, List vector, VectorSearchOptions options, VectorStoreRecordDefinition recordDefinition, VectorStoreRecordMapper mapper) { @@ -556,8 +558,9 @@ public List> search(String collectionName, ? DistanceFunction.EUCLIDEAN_DISTANCE : vectorField.getDistanceFunction(); - return VectorOperations.exactSimilaritySearch(records, vector, vectorField, - distanceFunction, options); + return new VectorSearchResults<>( + VectorOperations.exactSimilaritySearch(records, vector, vectorField, + distanceFunction, options)); } /** diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreRecordCollection.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreRecordCollection.java index 31b25bef..28818a97 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreRecordCollection.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/JDBCVectorStoreRecordCollection.java @@ -6,6 +6,7 @@ import com.microsoft.semantickernel.connectors.data.postgres.PostgreSQLVectorStoreQueryProvider; import com.microsoft.semantickernel.connectors.data.postgres.PostgreSQLVectorStoreRecordMapper; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; @@ -312,7 +313,7 @@ public Mono prepareAsync() { * @return A list of search results. */ @Override - public Mono>> searchAsync(List vector, + public Mono> searchAsync(List vector, VectorSearchOptions vectorSearchOptions) { return Mono.fromCallable( () -> queryProvider.search(this.collectionName, vector, vectorSearchOptions, diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/SQLVectorStoreQueryProvider.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/SQLVectorStoreQueryProvider.java index 747e3c84..04de1949 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/SQLVectorStoreQueryProvider.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/jdbc/SQLVectorStoreQueryProvider.java @@ -4,6 +4,7 @@ import com.microsoft.semantickernel.builders.SemanticKernelBuilder; import com.microsoft.semantickernel.data.filter.FilterClause; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper; import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition; import com.microsoft.semantickernel.data.vectorstorage.options.DeleteRecordOptions; @@ -146,7 +147,7 @@ void deleteRecords(String collectionName, List keys, * @param mapper the mapper, responsible for mapping the result set to the record type. * @return the search results */ - List> search(String collectionName, + VectorSearchResults search(String collectionName, List vector, VectorSearchOptions options, VectorStoreRecordDefinition recordDefinition, diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorStoreQueryProvider.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorStoreQueryProvider.java index 6be72981..200edafa 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorStoreQueryProvider.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/postgres/PostgreSQLVectorStoreQueryProvider.java @@ -10,6 +10,7 @@ import com.microsoft.semantickernel.data.filter.EqualToFilterClause; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchFilter; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper; import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDataField; import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition; @@ -334,7 +335,7 @@ public void upsertRecords(String collectionName, List records, * @return the search results */ @Override - public List> search(String collectionName, + public VectorSearchResults search(String collectionName, List vector, VectorSearchOptions options, VectorStoreRecordDefinition recordDefinition, VectorStoreRecordMapper mapper) { @@ -391,8 +392,8 @@ public List> search(String collectionName, for (Object parameter : parameters) { statement.setObject(parameterIndex++, parameter); } - statement.setInt(parameterIndex++, options.getLimit()); - statement.setInt(parameterIndex, options.getOffset()); + statement.setInt(parameterIndex++, options.getTop()); + statement.setInt(parameterIndex, options.getSkip()); List> records = new ArrayList<>(); ResultSet resultSet = statement.executeQuery(); @@ -404,7 +405,7 @@ public List> search(String collectionName, resultSet.getDouble("score"))); } - return records; + return new VectorSearchResults<>(records); } catch (SQLException | JsonProcessingException e) { throw new SKException("Failed to search records", e); } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisHashSetVectorStoreRecordCollection.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisHashSetVectorStoreRecordCollection.java index b65fe9f9..47c15a8e 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisHashSetVectorStoreRecordCollection.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisHashSetVectorStoreRecordCollection.java @@ -3,6 +3,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper; @@ -391,7 +392,7 @@ public Mono deleteBatchAsync(List strings, DeleteRecordOptions opt * @return A list of search results. */ @Override - public Mono>> searchAsync(List vector, + public Mono> searchAsync(List vector, VectorSearchOptions options) { if (recordDefinition.getVectorFields().isEmpty()) { return Mono @@ -406,7 +407,7 @@ public Mono>> searchAsync(List vector, SearchResult searchResult = client.ftSearch(collectionName, ftSearchParams.getLeft(), ftSearchParams.getRight()); - return searchResult.getDocuments().stream() + List> results = searchResult.getDocuments().stream() .map(doc -> { String key = removeKeyPrefixIfNeeded(doc.getId(), collectionName); double score = 0; @@ -436,6 +437,8 @@ else if (entry.getKey().equals( return new VectorSearchResult<>(record, score); }) .collect(Collectors.toList()); + + return new VectorSearchResults<>(results); }).subscribeOn(Schedulers.boundedElastic())); } } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisJsonVectorStoreRecordCollection.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisJsonVectorStoreRecordCollection.java index 4af97846..2a747684 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisJsonVectorStoreRecordCollection.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisJsonVectorStoreRecordCollection.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordMapper; @@ -405,10 +406,10 @@ public Mono deleteBatchAsync(List strings, DeleteRecordOptions opt * * @param vector The vector to search with. * @param options The options to use for the search. - * @return A list of search results. + * @return The search results. */ @Override - public Mono>> searchAsync(List vector, + public Mono> searchAsync(List vector, VectorSearchOptions options) { if (recordDefinition.getVectorFields().isEmpty()) { return Mono @@ -422,7 +423,7 @@ public Mono>> searchAsync(List vector, SearchResult searchResult = client.ftSearch(collectionName, ftSearchParams.getLeft(), ftSearchParams.getRight()); - return searchResult.getDocuments().stream() + List> results = searchResult.getDocuments().stream() .map(doc -> { Map properties = new HashMap<>(); for (Map.Entry entry : doc.getProperties()) { @@ -445,8 +446,9 @@ public Mono>> searchAsync(List vector, } catch (JsonProcessingException e) { throw new RuntimeException(e); } - }) - .collect(Collectors.toList()); + }).collect(Collectors.toList()); + + return new VectorSearchResults<>(results); }).subscribeOn(Schedulers.boundedElastic())); } } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisVectorStoreCollectionSearchMapping.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisVectorStoreCollectionSearchMapping.java index de51b8e7..2064411a 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisVectorStoreCollectionSearchMapping.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/connectors/data/redis/RedisVectorStoreCollectionSearchMapping.java @@ -64,9 +64,9 @@ public Pair buildQuery(List vector, vectorField.getEffectiveStorageName(), VECTOR_SCORE_FIELD); FTSearchParams searchParams = new FTSearchParams() - .addParam("K", options.getLimit() + options.getOffset()) + .addParam("K", options.getTop() + options.getSkip()) .addParam("BLOB", convertListToByteArray(vector)) - .limit(options.getOffset(), options.getLimit()) + .limit(options.getSkip(), options.getTop()) .sortBy(VECTOR_SCORE_FIELD, SortingOrder.ASC) .dialect(2); diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollection.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollection.java index 7d6031d4..5be45985 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollection.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollection.java @@ -1,14 +1,12 @@ // Copyright (c) Microsoft. All rights reserved. package com.microsoft.semantickernel.data; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.microsoft.semantickernel.data.filter.FilterClause; import com.microsoft.semantickernel.data.vectorsearch.VectorOperations; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; import com.microsoft.semantickernel.data.vectorstorage.VectorStoreRecordCollection; import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction; import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition; @@ -21,7 +19,6 @@ import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -269,7 +266,7 @@ private List arrayNodeToFloatList(ArrayNode arrayNode) { * @return A list of search results. */ @Override - public Mono>> searchAsync(List vector, + public Mono> searchAsync(List vector, final VectorSearchOptions options) { if (recordDefinition.getVectorFields().isEmpty()) { throw new SKException("No vector fields defined. Cannot perform vector search"); @@ -295,8 +292,10 @@ public Mono>> searchAsync(List vector, List records = VolatileVectorStoreCollectionSearchMapping.filterRecords( new ArrayList<>(getCollection().values()), effectiveOptions.getVectorSearchFilter(), recordDefinition, objectMapper); - return VectorOperations.exactSimilaritySearch(records, vector, vectorField, - distanceFunction, effectiveOptions); + + return new VectorSearchResults<>( + VectorOperations.exactSimilaritySearch(records, vector, vectorField, + distanceFunction, effectiveOptions)); }).subscribeOn(Schedulers.boundedElastic()); } } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchResultMapper.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchResultMapper.java new file mode 100644 index 00000000..652d2474 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchResultMapper.java @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +/** + * Default implementation of {@link TextSearchResultMapper}. + */ +public class DefaultTextSearchResultMapper implements TextSearchResultMapper { + /** + * Maps a search result to a {@link TextSearchResult}. + * + * @param result The search result. + * @return The {@link TextSearchResult}. + */ + @Override + public TextSearchResult fromResultToTextSearchResult(Object result) { + return TextSearchResult.fromRecord(result); + } +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchStringMapper.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchStringMapper.java new file mode 100644 index 00000000..5d9c0545 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/DefaultTextSearchStringMapper.java @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +/** + * Default implementation of {@link TextSearchStringMapper}. + */ +public class DefaultTextSearchStringMapper implements TextSearchStringMapper { + /** + * Maps a search result to a string. + * + * @param result The search result. + * @return The string. + */ + @Override + public String fromResultToString(Object result) { + TextSearchResult textSearchResult = TextSearchResult.fromRecord(result); + return textSearchResult.getValue(); + } +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/KernelSearchResults.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/KernelSearchResults.java new file mode 100644 index 00000000..23343977 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/KernelSearchResults.java @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * The search results. + * + * @param The type of the search results. + */ +public class KernelSearchResults { + private final List results; + private final long totalCount; + private final Map metadata; + + /** + * Creates a new instance of the KernelSearchResults class. + * + * @param results The search results. + */ + public KernelSearchResults(List results) { + this(results, results.size(), Collections.emptyMap()); + } + + /** + * Creates a new instance of the KernelSearchResults class. + * + * @param results The search results. + * @param totalCount The total count of search results. + * @param metadata The metadata. + */ + public KernelSearchResults(List results, long totalCount, + Map metadata) { + this.results = Collections.unmodifiableList(results); + this.totalCount = totalCount; + this.metadata = Collections.unmodifiableMap(metadata); + } + + /** + * Gets the total count of search results. + * This value represents the total number of results that are available for the current query and not the number of results being returned. + * + * @return The total count of search results. + */ + public long getTotalCount() { + return totalCount; + } + + /** + * Gets the search results. + * + * @return The search results. + */ + public List getResults() { + return results; + } + + /** + * Gets the metadata associated with the search results. + * + * @return The metadata. + */ + public Map getMetadata() { + return metadata; + } + +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearch.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearch.java new file mode 100644 index 00000000..04ca24ad --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearch.java @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import reactor.core.publisher.Mono; + +public interface TextSearch { + + /** + * Perform a search for content related to the specified query and return String values representing the search results. + * + * @param query The text to search for. + * @param options The search options. + * @return The search results. + */ + Mono> searchAsync(String query, TextSearchOptions options); + + /** + * Perform a search for content related to the specified query and return TextSearchResult values representing the search results. + * + * @param query The text to search for. + * @param options The search options. + * @return The search results. + */ + Mono> getTextSearchResultsAsync(String query, + TextSearchOptions options); + + /** + * Perform a search for content related to the specified query and return Object values representing the search results. + * + * @param query The text to search for. + * @param options The search options. + * @return The search results. + */ + Mono> getSearchResultsAsync(String query, + TextSearchOptions options); +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchFilter.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchFilter.java new file mode 100644 index 00000000..8e448448 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchFilter.java @@ -0,0 +1,79 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import com.microsoft.semantickernel.data.filter.EqualToFilterClause; +import com.microsoft.semantickernel.data.filter.FilterClause; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +/** + * Represents a text search filter. + */ +public class TextSearchFilter { + + private final List filterClauses; + + /** + * Creates a new instance of the TextSearchFilter class. + */ + public TextSearchFilter() { + this(Collections.emptyList()); + } + + /** + * Creates a new instance of the TextSearchFilter class. + * + * @param filterClauses The filter clauses. + */ + public TextSearchFilter(List filterClauses) { + this.filterClauses = Collections.unmodifiableList(filterClauses); + } + + /** + * Gets the filter clauses. + * + * @return The filter clauses. + */ + public List getFilterClauses() { + return filterClauses; + } + + /** + * Creates a new instance of the {@link Builder} class. + * + * @return The builder. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * The builder for the {@link TextSearchFilter} class. + */ + public static class Builder { + private final List filterClauses = new ArrayList<>(); + + /** + * Adds an EqualToFilterClause to the filter. + * + * @param fieldName The field name to filter on. + * @param value The value. + * @return The builder. + */ + public Builder equalTo(String fieldName, Object value) { + filterClauses.add(new EqualToFilterClause(fieldName, value)); + return this; + } + + /** + * Builds the filter. + * + * @return The filter. + */ + public TextSearchFilter build() { + return new TextSearchFilter(filterClauses); + } + } +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchOptions.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchOptions.java new file mode 100644 index 00000000..81688581 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchOptions.java @@ -0,0 +1,150 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +/** + * Represents the options for a text search. + */ +public class TextSearchOptions { + + /** + * The default number of search results to return. + */ + public static final int DEFAULT_TOP = 3; + private final boolean includeTotalCount; + private final int top; + private final int skip; + private final TextSearchFilter filter; + + /** + * Creates a new instance of the TextSearchOptions class with default values. + * + * @return A new instance of the TextSearchOptions class with default values. + */ + public static TextSearchOptions createDefault() { + return new TextSearchOptions(false, DEFAULT_TOP, 0, null); + } + + /** + * Creates a new instance of the TextSearchOptions class. + * + * @param includeTotalCount A value indicating whether to include the total count of search results. + * @param top The limit of the number of results to return. + * @param skip The offset of the results to return. + * @param filter The search filter. + */ + TextSearchOptions(boolean includeTotalCount, int top, int skip, TextSearchFilter filter) { + this.includeTotalCount = includeTotalCount; + this.top = top; + this.skip = skip; + this.filter = filter; + } + + /** + * Gets a value indicating whether to include the total count of search results. + * + * @return A value indicating whether to include the total count of search results. + */ + public boolean isIncludeTotalCount() { + return includeTotalCount; + } + + /** + * Gets the limit of the number of results to return. + * + * @return The limit of the number of results to return. + */ + public int getTop() { + return top; + } + + /** + * Gets the offset of the results to return. + * + * @return The offset of the results to return. + */ + public int getSkip() { + return skip; + } + + /** + * Gets the search filter. + * + * @return The search filter. + */ + public TextSearchFilter getFilter() { + return filter; + } + + /** + * Creates a new instance of the {@link Builder} class. + * + * @return The builder. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * The builder for the {@link TextSearchOptions} class. + */ + public static class Builder { + private boolean includeTotalCount = false; + private int top = DEFAULT_TOP; + private int skip = 0; + private TextSearchFilter filter; + + /** + * Sets a value indicating whether to include the total count of search results. + * + * @param includeTotalCount A value indicating whether to include the total count of search results. + * @return The builder. + */ + public Builder withIncludeTotalCount(boolean includeTotalCount) { + this.includeTotalCount = includeTotalCount; + return this; + } + + /** + * Sets the limit of the number of results to return. + * + * @param top The limit of the number of results to return. + * @return The builder. + */ + public Builder withTop(int top) { + this.top = top; + return this; + } + + /** + * Sets the offset of the results to return. + * + * @param skip The offset of the results to return. + * @return The builder. + */ + public Builder withSkip(int skip) { + this.skip = skip; + return this; + } + + /** + * Sets the search filter. + * + * @param filter The search filter. + * @return The builder. + */ + public Builder withFilter(TextSearchFilter filter) { + this.filter = filter; + return this; + } + + /** + * Builds a new instance of the {@link TextSearchOptions} class. + * + * @return A new instance of the TextSearchOptions class. + */ + public TextSearchOptions build() { + return new TextSearchOptions(includeTotalCount, top, skip, filter); + } + } + +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResult.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResult.java new file mode 100644 index 00000000..2a6221f3 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResult.java @@ -0,0 +1,103 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import com.microsoft.semantickernel.exceptions.SKException; + +import java.lang.reflect.Field; + +/** + * Represents a text search result. + */ +public class TextSearchResult { + private final String name; + private final String value; + private final String link; + + /** + * Creates a new instance of the TextSearchResult class. + * + * @param name The name of the search result. + * @param value The value of the search result. + * @param link The link of the search result. + */ + TextSearchResult(String name, String value, String link) { + this.name = name; + this.value = value; + this.link = link; + } + + /** + * Gets the name of the search result. + * + * @return The name of the search result. + */ + public String getName() { + return name; + } + + /** + * Gets the value of the search result. + * + * @return The value of the search result. + */ + public String getValue() { + return value; + } + + /** + * Gets the link of the search result. + * + * @return The link of the search result. + */ + public String getLink() { + return link; + } + + /** + * Creates a new instance of the {@link TextSearchResult} class from a record. + * The record should have fields annotated with {@link TextSearchResultName}, {@link TextSearchResultValue}, and {@link TextSearchResultLink}. + * + * @param record The record. + * @return The TextSearchResult. + */ + public static TextSearchResult fromRecord(Object record) { + String name = null, value = null, link = null; + + try { + for (Field field : record.getClass().getDeclaredFields()) { + if (field.isAnnotationPresent(TextSearchResultName.class)) { + if (name != null) { + throw new SKException("Multiple fields with @TextSearchResultName found"); + } + + field.setAccessible(true); + name = (String) field.get(record); + } + if (field.isAnnotationPresent(TextSearchResultValue.class)) { + if (value != null) { + throw new SKException("Multiple fields with @TextSearchResultValue found"); + } + + field.setAccessible(true); + value = (String) field.get(record); + } + if (field.isAnnotationPresent(TextSearchResultLink.class)) { + if (link != null) { + throw new SKException("Multiple fields with @TextSearchResultLink found"); + } + + field.setAccessible(true); + link = (String) field.get(record); + } + } + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + + if (value == null) { + throw new SKException("No field with @TextSearchResultValue found"); + } + + return new TextSearchResult(name, value, link); + } +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultLink.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultLink.java new file mode 100644 index 00000000..fab7624e --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultLink.java @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a property on a record class as the link to the source data. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface TextSearchResultLink { +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultMapper.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultMapper.java new file mode 100644 index 00000000..2fcbe37e --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultMapper.java @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +/** + * Maps a search result to a {@link TextSearchResult}. + */ +public interface TextSearchResultMapper { + /** + * Maps a search result to a {@link TextSearchResult}. + * + * @param result The search result. + * @return The {@link TextSearchResult}. + */ + TextSearchResult fromResultToTextSearchResult(Object result); +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultName.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultName.java new file mode 100644 index 00000000..77a3751e --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultName.java @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a property on a record class as the name of the source data. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface TextSearchResultName { +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultValue.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultValue.java new file mode 100644 index 00000000..6b3dd568 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchResultValue.java @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * Annotation to mark a property on a record class as the value of the source data. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.FIELD) +public @interface TextSearchResultValue { +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchStringMapper.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchStringMapper.java new file mode 100644 index 00000000..c3afa430 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/textsearch/TextSearchStringMapper.java @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.textsearch; + +/** + * Maps a search result to a string. + */ +public interface TextSearchStringMapper { + /** + * Maps a search result to a string. + * + * @param result The search result. + * @return The string. + */ + String fromResultToString(Object result); +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorOperations.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorOperations.java index 4f90faa8..cbc1e314 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorOperations.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorOperations.java @@ -234,8 +234,8 @@ public static List> exactSimilaritySearch( return results.stream() .sorted(comparator) - .skip(options.getOffset()) - .limit(options.getLimit()) + .skip(options.getSkip()) + .limit(options.getTop()) .collect(Collectors.toList()); } } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorSearchResults.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorSearchResults.java new file mode 100644 index 00000000..a74dbf23 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorSearchResults.java @@ -0,0 +1,71 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.vectorsearch; + +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * The search results. + * + * @param The type of the search results. + */ +public class VectorSearchResults { + + private final long totalCount; + private final List> results; + private final Map metadata; + + /** + * Creates a new instance of the VectorSearchResults class. + * + * @param results The search results. + */ + public VectorSearchResults(List> results) { + this(results, results.size(), Collections.emptyMap()); + } + + /** + * Creates a new instance of the VectorSearchResults class. + * + * @param results The search results. + * @param totalCount The total count of search results. + * @param metadata The metadata. + */ + public VectorSearchResults(List> results, long totalCount, + Map metadata) { + this.results = Collections.unmodifiableList(results); + this.totalCount = totalCount; + this.metadata = Collections.unmodifiableMap(metadata); + } + + /** + * Gets the total count of search results. + * This value represents the total number of results that are available for the current query and not the number of results being returned. + * + * @return The total count of search results. + */ + public long getTotalCount() { + return totalCount; + } + + /** + * Gets the search results. + * + * @return The search results. + */ + public List> getResults() { + return results; + } + + /** + * Gets the metadata associated with the search results. + * + * @return The metadata. + */ + public Map getMetadata() { + return metadata; + } + +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizableTextSearch.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizableTextSearch.java index 00046444..93aa883d 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizableTextSearch.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizableTextSearch.java @@ -17,8 +17,8 @@ public interface VectorizableTextSearch { * * @param searchText The text to search with. * @param options The options to use for the search. - * @return A list of search results. + * @return VectorSearchResults. */ - Mono>> searchAsync(String searchText, + Mono> searchAsync(String searchText, VectorSearchOptions options); } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizedSearch.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizedSearch.java index 59010730..a8b61b1a 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizedSearch.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorsearch/VectorizedSearch.java @@ -18,8 +18,8 @@ public interface VectorizedSearch { * * @param vector The vector to search with. * @param options The options to use for the search. - * @return A list of search results. + * @return Vector search results. */ - Mono>> searchAsync(List vector, + Mono> searchAsync(List vector, VectorSearchOptions options); } diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearch.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearch.java new file mode 100644 index 00000000..46684f5a --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearch.java @@ -0,0 +1,239 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.vectorstorage; + +import com.microsoft.semantickernel.data.textsearch.DefaultTextSearchResultMapper; +import com.microsoft.semantickernel.data.textsearch.DefaultTextSearchStringMapper; +import com.microsoft.semantickernel.data.textsearch.KernelSearchResults; +import com.microsoft.semantickernel.data.textsearch.TextSearch; +import com.microsoft.semantickernel.data.textsearch.TextSearchOptions; +import com.microsoft.semantickernel.data.textsearch.TextSearchResult; +import com.microsoft.semantickernel.data.textsearch.TextSearchResultMapper; +import com.microsoft.semantickernel.data.textsearch.TextSearchStringMapper; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchFilter; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; +import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResults; +import com.microsoft.semantickernel.data.vectorsearch.VectorizedSearch; +import com.microsoft.semantickernel.data.vectorstorage.options.VectorSearchOptions; +import com.microsoft.semantickernel.exceptions.SKException; +import com.microsoft.semantickernel.services.textembedding.TextEmbeddingGenerationService; +import reactor.core.publisher.Mono; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.stream.Collectors; + +/** + * A text search implementation that uses a vector record collection to perform the search. + * + * @param The record type. + */ +public class VectorStoreTextSearch implements TextSearch { + + private final VectorizedSearch vectorizedSearch; + private final TextEmbeddingGenerationService textEmbeddingGenerationService; + private final TextSearchStringMapper stringMapper; + private final TextSearchResultMapper resultMapper; + private final VectorStoreTextSearchOptions options; + + /** + * Create a new instance of VectorStoreTextSearch with {@link VectorizedSearch} to perform vectorized search and + * {@link TextEmbeddingGenerationService} to generate text embeddings. + * + * @param vectorizedSearch The vectorized search. Usually a vector record collection. + * @param textEmbeddingGenerationService The text embedding generation service. + * @param stringMapper The string mapper. + * @param resultMapper The result mapper. + * @param options The options. + */ + public VectorStoreTextSearch( + @Nonnull VectorizedSearch vectorizedSearch, + @Nonnull TextEmbeddingGenerationService textEmbeddingGenerationService, + @Nullable TextSearchStringMapper stringMapper, + @Nullable TextSearchResultMapper resultMapper, + @Nullable VectorStoreTextSearchOptions options) { + this.vectorizedSearch = vectorizedSearch; + this.textEmbeddingGenerationService = textEmbeddingGenerationService; + this.stringMapper = stringMapper == null ? new DefaultTextSearchStringMapper() + : stringMapper; + this.resultMapper = resultMapper == null ? new DefaultTextSearchResultMapper() + : resultMapper; + this.options = options == null ? new VectorStoreTextSearchOptions() : options; + } + + private Mono> executeSearchAsync(String query, + TextSearchOptions options) { + if (options == null) { + options = TextSearchOptions.createDefault(); + } + + VectorSearchOptions vectorSearchOptions = VectorSearchOptions.builder() + .withVectorSearchFilter(options.getFilter() != null + ? new VectorSearchFilter(options.getFilter().getFilterClauses()) + : null) + .withTop(options.getTop()) + .withSkip(options.getSkip()) + .withIncludeTotalCount(options.isIncludeTotalCount()) + .build(); + + return textEmbeddingGenerationService.generateEmbeddingAsync(query) + .flatMap(embedding -> vectorizedSearch.searchAsync(embedding.getVector(), + vectorSearchOptions)); + } + + /** + * Perform a search for content related to the specified query and return String values representing the search results. + * + * @param query The text to search for. + * @param options The search options. + * @return The search results. + */ + @Override + public Mono> searchAsync(String query, TextSearchOptions options) { + return executeSearchAsync(query, options) + .map(results -> new KernelSearchResults<>( + results.getResults().stream() + .map(r -> stringMapper.fromResultToString(r.getRecord())) + .collect(Collectors.toList()), + results.getTotalCount(), + results.getMetadata())); + } + + /** + * Perform a search for content related to the specified query and return TextSearchResult values representing the search results. + * + * @param query The text to search for. + * @param options The search options. + * @return The search results. + */ + @Override + public Mono> getTextSearchResultsAsync(String query, + TextSearchOptions options) { + return executeSearchAsync(query, options) + .map(results -> new KernelSearchResults<>( + results.getResults().stream() + .map(r -> resultMapper.fromResultToTextSearchResult(r.getRecord())) + .collect(Collectors.toList()), + results.getTotalCount(), + results.getMetadata())); + } + + /** + * Perform a search for content related to the specified query and return Object values representing the search results. + * + * @param query The text to search for. + * @param options The search options. + * @return The search results. + */ + @Override + public Mono> getSearchResultsAsync(String query, + TextSearchOptions options) { + return executeSearchAsync(query, options) + .map(results -> new KernelSearchResults<>( + results.getResults().stream() + .map(r -> resultMapper.fromResultToTextSearchResult(r.getRecord())) + .collect(Collectors.toList()), + results.getTotalCount(), + results.getMetadata())); + } + + /** + * Create a new instance of {@link Builder}. + * + * @param The record type. + * @return The builder. + */ + public static Builder builder() { + return new Builder<>(); + } + + /** + * A builder for the {@link VectorStoreTextSearch} class. + * + * @param The record type. + */ + public static class Builder { + @Nullable + private VectorizedSearch vectorizedSearch; + @Nullable + private TextEmbeddingGenerationService textEmbeddingGenerationService; + @Nullable + private TextSearchStringMapper stringMapper; + @Nullable + private TextSearchResultMapper resultMapper; + @Nullable + private VectorStoreTextSearchOptions options; + + /** + * Sets the vectorized search. + * + * @param vectorizedSearch The vectorized search. + * @return The builder. + */ + public Builder withVectorizedSearch(VectorizedSearch vectorizedSearch) { + this.vectorizedSearch = vectorizedSearch; + return this; + } + + /** + * Sets the text embedding generation service. + * + * @param textEmbeddingGenerationService The text embedding generation service. + * @return The builder. + */ + public Builder withTextEmbeddingGenerationService( + TextEmbeddingGenerationService textEmbeddingGenerationService) { + this.textEmbeddingGenerationService = textEmbeddingGenerationService; + return this; + } + + /** + * Sets the string mapper. + * + * @param stringMapper The string mapper. + * @return The builder. + */ + public Builder withStringMapper(TextSearchStringMapper stringMapper) { + this.stringMapper = stringMapper; + return this; + } + + /** + * Sets the result mapper. + * + * @param resultMapper The result mapper. + * @return The builder. + */ + public Builder withResultMapper(TextSearchResultMapper resultMapper) { + this.resultMapper = resultMapper; + return this; + } + + /** + * Sets the options. + * + * @param options The options. + * @return The builder. + */ + public Builder withOptions(VectorStoreTextSearchOptions options) { + this.options = options; + return this; + } + + /** + * Builds the {@link VectorStoreTextSearch} instance. + * + * @return The {@link VectorStoreTextSearch} instance. + */ + public VectorStoreTextSearch build() { + if (vectorizedSearch == null) { + throw new SKException("Vectorized search is required"); + } + if (textEmbeddingGenerationService == null) { + throw new SKException("Text embedding generation service is required"); + } + + return new VectorStoreTextSearch<>(vectorizedSearch, textEmbeddingGenerationService, + stringMapper, resultMapper, options); + } + } +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearchOptions.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearchOptions.java new file mode 100644 index 00000000..2f0a73e9 --- /dev/null +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/VectorStoreTextSearchOptions.java @@ -0,0 +1,8 @@ +// Copyright (c) Microsoft. All rights reserved. +package com.microsoft.semantickernel.data.vectorstorage; + +/** + * Options for vector store text search. + */ +public class VectorStoreTextSearchOptions { +} diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordData.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordData.java index 532cffdb..4043af26 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordData.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordData.java @@ -7,7 +7,7 @@ import java.lang.annotation.Target; /** - * Represents a data attribute in a record. + * Represents a data field in a record. */ @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.FIELD) diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordKey.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordKey.java index 8f8c98a7..108b6a0c 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordKey.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordKey.java @@ -7,7 +7,7 @@ import java.lang.annotation.Target; /** - * Represents the key attribute in a record. + * Represents the key field in a record. */ @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.FIELD) diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordVector.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordVector.java index 90128b41..dfce1352 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordVector.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/annotations/VectorStoreRecordVector.java @@ -11,7 +11,7 @@ import java.lang.annotation.Target; /** - * Represents a vector attribute in a record. + * Represents a vector field in a record. */ @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.FIELD) diff --git a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/options/VectorSearchOptions.java b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/options/VectorSearchOptions.java index 35fe2f01..241f069f 100644 --- a/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/options/VectorSearchOptions.java +++ b/semantickernel-experimental/src/main/java/com/microsoft/semantickernel/data/vectorstorage/options/VectorSearchOptions.java @@ -14,7 +14,7 @@ public class VectorSearchOptions { /** * The default limit of the number of results to return. */ - public static final int DEFAULT_RESULT_LIMIT = 3; + public static final int DEFAULT_TOP = 3; /** * Creates a new instance of the VectorSearchOptions class with default values. @@ -31,25 +31,28 @@ public static VectorSearchOptions createDefault(String vectorFieldName) { private final VectorSearchFilter vectorSearchFilter; @Nullable private final String vectorFieldName; - private final int limit; - private final int offset; + private final int top; + private final int skip; private final boolean includeVectors; + private final boolean includeTotalCount; /** * Creates a new instance of the VectorSearchOptions class. * @param vectorSearchFilter The vector search filter. * @param vectorFieldName The name of the vector field. - * @param limit The limit of the number of results to return. - * @param offset The offset of the results to return. + * @param top The limit of the number of results to return. + * @param skip The offset of the results to return. * @param includeVectors A value indicating whether to include vectors in the results. */ public VectorSearchOptions(VectorSearchFilter vectorSearchFilter, - String vectorFieldName, int limit, int offset, boolean includeVectors) { + String vectorFieldName, int top, int skip, boolean includeVectors, + boolean includeTotalCount) { this.vectorSearchFilter = vectorSearchFilter; this.vectorFieldName = vectorFieldName; - this.limit = Math.max(1, limit); - this.offset = Math.max(0, offset); + this.top = Math.max(1, top); + this.skip = Math.max(0, skip); this.includeVectors = includeVectors; + this.includeTotalCount = includeTotalCount; } /** @@ -77,8 +80,8 @@ public String getVectorFieldName() { * * @return The limit of the number of results to return. */ - public int getLimit() { - return limit; + public int getTop() { + return top; } /** @@ -86,8 +89,8 @@ public int getLimit() { * * @return The offset of the results to return. */ - public int getOffset() { - return offset; + public int getSkip() { + return skip; } /** @@ -99,6 +102,15 @@ public boolean isIncludeVectors() { return includeVectors; } + /** + * Gets a value indicating whether to include the total count of the results. + * + * @return A value indicating whether to include the total count of the results. + */ + public boolean isIncludeTotalCount() { + return includeTotalCount; + } + /** * Creates a new instance of the Builder class. * @@ -114,17 +126,10 @@ public static Builder builder() { public static class Builder implements SemanticKernelBuilder { private VectorSearchFilter vectorSearchFilter; private String vectorFieldName; - private int limit; - private int offset; - private boolean includeVectors; - - /** - * Creates a new instance of the Builder class with default values. - */ - public Builder() { - this.limit = DEFAULT_RESULT_LIMIT; - this.includeVectors = false; - } + private int top = DEFAULT_TOP; + private int skip = 0; + private boolean includeVectors = false; + private boolean includeTotalCount = false; /** * Sets the vector search filter. @@ -149,21 +154,21 @@ public Builder withVectorFieldName(String vectorFieldName) { /** * Sets the limit of the number of results to return. - * @param limit the limit of the number of results to return + * @param top the limit of the number of results to return * @return {@code this} builder */ - public Builder withLimit(int limit) { - this.limit = limit; + public Builder withTop(int top) { + this.top = top; return this; } /** * Sets the offset of the results to return. - * @param offset the offset of the results to return + * @param skip the offset of the results to return * @return {@code this} builder */ - public Builder withOffset(int offset) { - this.offset = offset; + public Builder withSkip(int skip) { + this.skip = skip; return this; } @@ -177,10 +182,24 @@ public Builder withIncludeVectors(boolean includeVectors) { return this; } + /** + * Sets a value indicating whether to include the total count of the results. + * @param includeTotalCount a value indicating whether to include the total count of the results + * @return {@code this} builder + */ + public Builder withIncludeTotalCount(boolean includeTotalCount) { + this.includeTotalCount = includeTotalCount; + return this; + } + + /** + * Builds a new instance of the VectorSearchOptions class. + * @return a new instance of the VectorSearchOptions class + */ @Override public VectorSearchOptions build() { - return new VectorSearchOptions(vectorSearchFilter, vectorFieldName, limit, offset, - includeVectors); + return new VectorSearchOptions(vectorSearchFilter, vectorFieldName, top, skip, + includeVectors, includeTotalCount); } } } diff --git a/semantickernel-experimental/src/test/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollectionTest.java b/semantickernel-experimental/src/test/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollectionTest.java index 23b1ac78..bd2c633d 100644 --- a/semantickernel-experimental/src/test/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollectionTest.java +++ b/semantickernel-experimental/src/test/java/com/microsoft/semantickernel/data/VolatileVectorStoreRecordCollectionTest.java @@ -10,7 +10,6 @@ import java.util.List; import java.util.stream.Collectors; -import com.microsoft.semantickernel.data.filter.EqualToFilterClause; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchFilter; import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult; import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction; @@ -158,12 +157,12 @@ public void exactSearch(DistanceFunction distanceFunction) { VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(distanceFunction.getValue()) - .withLimit(3) + .withTop(3) .build(); // Embeddings similar to the third hotel List> results = recordCollection - .searchAsync(SEARCH_EMBEDDINGS, options).block(); + .searchAsync(SEARCH_EMBEDDINGS, options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The third hotel should be the most similar @@ -171,12 +170,12 @@ public void exactSearch(DistanceFunction distanceFunction) { options = VectorSearchOptions.builder() .withVectorFieldName(distanceFunction.getValue()) - .withOffset(1) - .withLimit(-100) + .withSkip(1) + .withTop(-100) .build(); // Skip the first result - results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block(); + results = recordCollection.searchAsync(SEARCH_EMBEDDINGS, options).block().getResults(); assertNotNull(results); assertEquals(1, results.size()); // The first hotel should be the most similar @@ -191,7 +190,7 @@ public void searchWithFilter(DistanceFunction distanceFunction) { VectorSearchOptions options = VectorSearchOptions.builder() .withVectorFieldName(distanceFunction.getValue()) - .withLimit(3) + .withTop(3) .withVectorSearchFilter( VectorSearchFilter.builder() .equalTo("rating", 4.0).build()) @@ -199,7 +198,7 @@ public void searchWithFilter(DistanceFunction distanceFunction) { // Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned List> results = recordCollection - .searchAsync(SEARCH_EMBEDDINGS, options).block(); + .searchAsync(SEARCH_EMBEDDINGS, options).block().getResults(); assertNotNull(results); assertEquals(3, results.size()); // The first hotel should be the most similar