Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tag list support to JDBC vector stores #243

Merged
merged 5 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import javax.annotation.Nullable;


/**
* Makes a Gemini service available to the Semantic Kernel.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;


/**
* A chat completion service that uses the Gemini model to generate chat completions.
*/
Expand All @@ -66,7 +65,7 @@ public class GeminiChatCompletion extends GeminiService implements ChatCompletio
public GeminiChatCompletion(VertexAI client, String modelId) {
super(client, modelId);
}

/**
* Create a new instance of {@link GeminiChatCompletion.Builder}.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ public static Builder builder() {
/**
* Builder for a Hugging Face client.
*/
public static class Builder {
public static class Builder {

@Nullable
private KeyCredential key = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,49 +2,53 @@

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;

import java.util.List;

public class Hotel {
@JsonProperty("hotelId")
@VectorStoreRecordKeyAttribute
@VectorStoreRecordKey
private final String id;

@VectorStoreRecordDataAttribute(isFilterable = true)
@VectorStoreRecordData(isFilterable = true)
private final String name;

@VectorStoreRecordDataAttribute
@VectorStoreRecordData
private final int code;

@JsonProperty("summary")
@VectorStoreRecordDataAttribute()
@VectorStoreRecordData()
private final String description;

@JsonProperty("summaryEmbedding1")
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
@VectorStoreRecordVector(dimensions = 8, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
private final List<Float> euclidean;

@JsonProperty("summaryEmbedding2")
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
@VectorStoreRecordVector(dimensions = 8, distanceFunction = DistanceFunction.COSINE_DISTANCE)
private final List<Float> cosineDistance;

@JsonProperty("summaryEmbedding3")
@VectorStoreRecordVectorAttribute(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
@VectorStoreRecordVector(dimensions = 8, distanceFunction = DistanceFunction.DOT_PRODUCT)
private final List<Float> dotProduct;

@JsonProperty("indexedSummaryEmbedding")
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
private final List<Float> indexedEuclidean;
@VectorStoreRecordDataAttribute

@VectorStoreRecordData
private final List<String> tags;

@VectorStoreRecordData
private double rating;

public Hotel() {
this(null, null, 0, null, null, null, null, null, 0.0);
this(null, null, 0, null, null, null, null, null, 0.0, null);
}

@JsonCreator
Expand All @@ -57,7 +61,8 @@ public Hotel(
@JsonProperty("summaryEmbedding2") List<Float> cosineDistance,
@JsonProperty("summaryEmbedding3") List<Float> dotProduct,
@JsonProperty("indexedSummaryEmbedding") List<Float> indexedEuclidean,
@JsonProperty("rating") double rating) {
@JsonProperty("rating") double rating,
@JsonProperty("tags") List<String> tags) {
this.id = id;
this.name = name;
this.code = code;
Expand All @@ -67,6 +72,7 @@ public Hotel(
this.dotProduct = euclidean;
this.indexedEuclidean = euclidean;
this.rating = rating;
this.tags = tags;
}

public String getId() {
Expand Down Expand Up @@ -97,6 +103,10 @@ public double getRating() {
return rating;
}

public List<String> getTags() {
return tags;
}

public void setRating(double rating) {
this.rating = rating;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import com.microsoft.semantickernel.connectors.data.jdbc.JDBCVectorStoreRecordCollection;
import com.microsoft.semantickernel.connectors.data.jdbc.JDBCVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.connectors.data.jdbc.SQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.connectors.data.jdbc.filter.SQLEqualToFilterClause;
import com.microsoft.semantickernel.connectors.data.mysql.MySQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.connectors.data.postgres.PostgreSQLVectorStoreQueryProvider;
import com.microsoft.semantickernel.connectors.data.sqlite.SQLiteVectorStoreQueryProvider;
Expand Down Expand Up @@ -159,19 +158,19 @@ private List<Hotel> getHotels() {
return Arrays.asList(
new Hotel("id_1", "Hotel 1", 1, "Hotel 1 description",
Arrays.asList(0.5f, 3.2f, 7.1f, -4.0f, 2.8f, 10.0f, -1.3f, 5.5f), null, null, null,
4.0),
4.0, Arrays.asList("luxury", "city")),
new Hotel("id_2", "Hotel 2", 2, "Hotel 2 description",
Arrays.asList(-2.0f, 8.1f, 0.9f, 5.4f, -3.3f, 2.2f, 9.9f, -4.5f), null, null, null,
4.0),
4.0, Arrays.asList("luxury", "city")),
new Hotel("id_3", "Hotel 3", 3, "Hotel 3 description",
Arrays.asList(4.5f, -6.2f, 3.1f, 7.7f, -0.8f, 1.1f, -2.2f, 8.3f), null, null, null,
5.0),
5.0, Arrays.asList("luxury", "beach")),
new Hotel("id_4", "Hotel 4", 4, "Hotel 4 description",
Arrays.asList(7.0f, 1.2f, -5.3f, 2.5f, 6.6f, -7.8f, 3.9f, -0.1f), null, null, null,
4.0),
4.0, Arrays.asList("luxury", "city")),
new Hotel("id_5", "Hotel 5", 5, "Hotel 5 description",
Arrays.asList(-3.5f, 4.4f, -1.2f, 9.9f, 5.7f, -6.1f, 7.8f, -2.0f), null, null, null,
4.0)
4.0, Arrays.asList("luxury", "city"))
);
}

Expand Down Expand Up @@ -396,14 +395,13 @@ public void getBatchWithNoVectors(QueryProvider provider) {
}

private static Stream<Arguments> provideSearchParameters() {
return Stream.of(
Arguments.of(QueryProvider.MySQL, "euclidean"),
Arguments.of(QueryProvider.MySQL, "cosineDistance"),
Arguments.of(QueryProvider.MySQL, "dotProduct"),
Arguments.of(QueryProvider.PostgreSQL, "euclidean"),
Arguments.of(QueryProvider.PostgreSQL, "cosineDistance"),
Arguments.of(QueryProvider.PostgreSQL, "dotProduct")
);
return Arrays.stream(QueryProvider.values()).map(provider ->
Stream.of(
Arguments.of(provider, "euclidean"),
Arguments.of(provider, "cosineDistance"),
Arguments.of(provider, "dotProduct")
)
).flatMap(s -> s);
}

@ParameterizedTest
Expand Down Expand Up @@ -464,13 +462,13 @@ public void approximateSearch(QueryProvider provider) {
assertNotNull(results);
assertEquals(5, results.size());
// The third hotel should be the most similar
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
assertEquals("id_3", results.get(0).getRecord().getId());
}

@ParameterizedTest
@MethodSource("provideSearchParameters")
public void searchWithFilter(QueryProvider provider, String embeddingName) {
String collectionName = "searchWithFilter";
public void searchWithFilterEqualToFilter(QueryProvider provider, String embeddingName) {
String collectionName = "searchWithFilterEqualToFilter";
JDBCVectorStoreRecordCollection<Hotel> recordCollection = buildRecordCollection(provider,
collectionName);

Expand All @@ -482,7 +480,7 @@ public void searchWithFilter(QueryProvider provider, String embeddingName) {
.withLimit(3)
.withVectorSearchFilter(
VectorSearchFilter.builder()
.withEqualToFilterClause(new SQLEqualToFilterClause("rating", 4.0)).build())
.equalTo("rating", 4.0).build())
.build();

// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
Expand All @@ -491,7 +489,34 @@ public void searchWithFilter(QueryProvider provider, String embeddingName) {
assertNotNull(results);
assertEquals(3, results.size());
// The first hotel should be the most similar
assertEquals(hotels.get(0).getId(), results.get(0).getRecord().getId());
assertEquals("id_1", results.get(0).getRecord().getId());
}

@ParameterizedTest
@MethodSource("provideSearchParameters")
public void searchWithAnyTagEqualToFilter(QueryProvider provider, String embeddingName) {
String collectionName = "searchWithAnyTagEqualToFilter";
JDBCVectorStoreRecordCollection<Hotel> recordCollection = buildRecordCollection(provider,
collectionName);

List<Hotel> hotels = getHotels();
recordCollection.upsertBatchAsync(hotels, null).block();

VectorSearchOptions options = VectorSearchOptions.builder()
.withVectorFieldName(embeddingName)
.withLimit(3)
.withVectorSearchFilter(
VectorSearchFilter.builder()
.anyTagEqualTo("tags", "city").build())
.build();

// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
options).block();
assertNotNull(results);
assertEquals(3, results.size());
// The first hotel should be the most similar
assertEquals("id_1", results.get(0).getRecord().getId());
}

// MySQL will always return the vectors as they're needed to compute the distances
Expand Down Expand Up @@ -520,7 +545,7 @@ public void postgresSearchIncludeAndNotIncludeVectors() {
assertNotNull(results);
assertEquals(3, results.size());
// The third hotel should be the most similar
assertEquals(hotels.get(2).getId(), results.get(0).getRecord().getId());
assertEquals("id_3", results.get(0).getRecord().getId());
assertNotNull(results.get(0).getRecord().getEuclidean());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,41 +2,42 @@

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordDataAttribute;
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordKeyAttribute;
import com.microsoft.semantickernel.data.vectorstorage.attributes.VectorStoreRecordVectorAttribute;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordData;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordKey;
import com.microsoft.semantickernel.data.vectorstorage.annotations.VectorStoreRecordVector;
import com.microsoft.semantickernel.data.vectorstorage.definition.DistanceFunction;
import com.microsoft.semantickernel.data.vectorstorage.definition.IndexKind;

import java.util.List;

public class Hotel {

@VectorStoreRecordKeyAttribute
@VectorStoreRecordKey
private final String id;

@VectorStoreRecordDataAttribute(isFilterable = true)
@VectorStoreRecordData(isFilterable = true)
private final String name;

@VectorStoreRecordDataAttribute
@VectorStoreRecordData
private final int code;

@JsonProperty("summary")
@VectorStoreRecordDataAttribute()
@VectorStoreRecordData()
private final String description;

@JsonProperty("summaryEmbedding1")
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.EUCLIDEAN_DISTANCE)
private final List<Float> euclidean;

@JsonProperty("summaryEmbedding2")
@VectorStoreRecordVectorAttribute(dimensions = 8)
@VectorStoreRecordVector(dimensions = 8)
private final List<Float> cosineDistance;

@JsonProperty("summaryEmbedding3")
@VectorStoreRecordVectorAttribute(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.DOT_PRODUCT)
@VectorStoreRecordVector(dimensions = 8, indexKind = IndexKind.HNSW, distanceFunction = DistanceFunction.DOT_PRODUCT)
private final List<Float> dotProduct;
@VectorStoreRecordDataAttribute

@VectorStoreRecordData(isFilterable = true)
private double rating;

public Hotel() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.microsoft.semantickernel.connectors.data.redis.RedisHashSetVectorStoreRecordCollection;
import com.microsoft.semantickernel.connectors.data.redis.RedisHashSetVectorStoreRecordCollectionOptions;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchFilter;
import com.microsoft.semantickernel.data.vectorsearch.VectorSearchResult;
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDataField;
import com.microsoft.semantickernel.data.vectorstorage.definition.VectorStoreRecordDefinition;
Expand Down Expand Up @@ -94,6 +95,7 @@ static void setup() {
fields.add(VectorStoreRecordDataField.builder()
.withName("rating")
.withFieldType(Double.class)
.isFilterable(true)
.build());
VectorStoreRecordDefinition recordDefinition = VectorStoreRecordDefinition.fromFields(fields);

Expand Down Expand Up @@ -440,4 +442,30 @@ public void searchWithOffSet(RecordCollectionOptions options, String embeddingNa
// The first hotel should be the most similar
assertEquals(hotels.get(0).getId(), results.get(0).getRecord().getId(), indexingFailureMessage);
}

@ParameterizedTest
@MethodSource("provideSearchParameters")
public void searchWithFilterEqualToFilter(RecordCollectionOptions recordCollectionOptions, String embeddingName) {
String collectionName = getCollectionName("search", recordCollectionOptions);
RedisHashSetVectorStoreRecordCollection<Hotel> recordCollection = createCollection(optionsMap.get(recordCollectionOptions), collectionName);

List<Hotel> hotels = getHotels();
recordCollection.upsertBatchAsync(hotels, null).block();

VectorSearchOptions options = VectorSearchOptions.builder()
.withVectorFieldName(embeddingName)
.withLimit(3)
.withVectorSearchFilter(
VectorSearchFilter.builder()
.equalTo("rating", 4.0).build())
.build();

// Embeddings similar to the third hotel, but as the filter is set to 4.0, the third hotel should not be returned
List<VectorSearchResult<Hotel>> results = recordCollection.searchAsync(SEARCH_EMBEDDINGS,
options).block();
assertNotNull(results);
assertEquals(3, results.size());
// The first hotel should be the most similar
assertEquals("id_1", results.get(0).getRecord().getId());
}
}
Loading
Loading