Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New public sparse vector configuration #393

Merged
merged 7 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions qdrant_client/conversions/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1250,8 +1250,8 @@ def convert_recommend_strategy(cls, model: grpc.RecommendStrategy) -> rest.Recom
raise ValueError(f"invalid RecommendStrategy model: {model}") # pragma: no cover

@classmethod
def convert_sparse_index_config(cls, model: grpc.SparseIndexConfig) -> rest.SparseIndexConfig:
return rest.SparseIndexConfig(
def convert_sparse_index_config(cls, model: grpc.SparseIndexConfig) -> rest.SparseIndexParams:
return rest.SparseIndexParams(
full_scan_threshold=model.full_scan_threshold
if model.HasField("full_scan_threshold")
else None,
Expand Down
144 changes: 72 additions & 72 deletions qdrant_client/grpc/collections_pb2.py

Large diffs are not rendered by default.

153 changes: 82 additions & 71 deletions qdrant_client/http/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,17 @@ class CollectionInfo(BaseModel, extra="forbid"):
optimizer_status: "OptimizersStatus" = Field(
..., description="Current statistics and configuration of the collection"
)
vectors_count: int = Field(
...,
description="Number of vectors in collection All vectors in collection are available for querying Calculated as `points_count x vectors_per_point` Where `vectors_per_point` is a number of named vectors in schema",
vectors_count: Optional[int] = Field(
default=None,
description="Approximate number of vectors in collection. All vectors in collection are available for querying. Calculated as `points_count x vectors_per_point`. Where `vectors_per_point` is a number of named vectors in schema.",
)
indexed_vectors_count: int = Field(
...,
description="Number of indexed vectors in the collection. Indexed vectors in large segments are faster to query, as it is stored in vector index (HNSW)",
indexed_vectors_count: Optional[int] = Field(
default=None,
description="Approximate number of indexed vectors in the collection. Indexed vectors in large segments are faster to query, as it is stored in a specialized vector index.",
)
points_count: int = Field(
..., description="Number of points (vectors + payloads) in collection Each point could be accessed by unique id"
points_count: Optional[int] = Field(
default=None,
description="Approximate number of points (vectors + payloads) in collection. Each point could be accessed by unique id.",
)
segments_count: int = Field(
..., description="Number of segments in collection. Each segment has independent vector as payload indexes"
Expand Down Expand Up @@ -736,161 +737,121 @@ class InitFrom(BaseModel, extra="forbid"):

class InlineResponse200(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional[bool] = Field(default=None, description="")


class InlineResponse2001(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["TelemetryData"] = Field(default=None, description="")


class InlineResponse20010(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["SnapshotDescription"] = Field(default=None, description="")


class InlineResponse20011(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["Record"] = Field(default=None, description="")


class InlineResponse20012(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional[List["Record"]] = Field(default=None, description="")


class InlineResponse20013(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional[List["UpdateResult"]] = Field(default=None, description="")


class InlineResponse20014(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["ScrollResult"] = Field(default=None, description="")


class InlineResponse20015(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional[List["ScoredPoint"]] = Field(default=None, description="")


class InlineResponse20016(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional[List[List["ScoredPoint"]]] = Field(default=None, description="")


class InlineResponse20017(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["GroupsResult"] = Field(default=None, description="")


class InlineResponse20018(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["CountResult"] = Field(default=None, description="")


class InlineResponse2002(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["LocksOption"] = Field(default=None, description="")


class InlineResponse2003(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["ClusterStatus"] = Field(default=None, description="")


class InlineResponse2004(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["CollectionsResponse"] = Field(default=None, description="")


class InlineResponse2005(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["CollectionInfo"] = Field(default=None, description="")


class InlineResponse2006(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["UpdateResult"] = Field(default=None, description="")


class InlineResponse2007(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["CollectionClusterInfo"] = Field(default=None, description="")


class InlineResponse2008(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional["CollectionsAliasesResponse"] = Field(default=None, description="")


class InlineResponse2009(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"ok",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")
result: Optional[List["SnapshotDescription"]] = Field(default=None, description="")


class InlineResponse202(BaseModel, extra="forbid"):
time: Optional[float] = Field(default=None, description="Time spent to process this request")
status: Literal[
"accepted",
] = Field(None, description="")
status: Optional[str] = Field(default=None, description="")


class IsEmptyCondition(BaseModel, extra="forbid"):
Expand Down Expand Up @@ -1798,6 +1759,18 @@ class SparseIndexConfig(BaseModel, extra="forbid"):
Configuration for sparse inverted index.
"""

full_scan_threshold: Optional[int] = Field(
default=None,
description="We prefer a full scan search upto (excluding) this number of vectors. Note: this is number of vectors, not KiloBytes.",
)
index_type: "SparseIndexType" = Field(..., description="Configuration for sparse inverted index.")


class SparseIndexParams(BaseModel, extra="forbid"):
"""
Configuration for sparse inverted index.
"""

full_scan_threshold: Optional[int] = Field(
default=None,
description="We prefer a full scan search upto (excluding) this number of vectors. Note: this is number of vectors, not KiloBytes.",
Expand All @@ -1808,6 +1781,39 @@ class SparseIndexConfig(BaseModel, extra="forbid"):
)


class SparseIndexTypeOneOf(str, Enum):
"""
Mutable RAM sparse index
"""

def __str__(self) -> str:
return str(self.value)

MUTABLERAM = "MutableRam"


class SparseIndexTypeOneOf1(str, Enum):
"""
Immutable RAM sparse index
"""

def __str__(self) -> str:
return str(self.value)

IMMUTABLERAM = "ImmutableRam"


class SparseIndexTypeOneOf2(str, Enum):
"""
Mmap sparse index
"""

def __str__(self) -> str:
return str(self.value)

MMAP = "Mmap"


class SparseVector(BaseModel, extra="forbid"):
"""
Sparse vector structure
Expand All @@ -1819,18 +1825,18 @@ class SparseVector(BaseModel, extra="forbid"):

class SparseVectorDataConfig(BaseModel, extra="forbid"):
"""
Config of single vector data storage
Config of single sparse vector data storage
"""

index: Optional["SparseIndexConfig"] = Field(default=None, description="Type of index used for search")
index: "SparseIndexConfig" = Field(..., description="Config of single sparse vector data storage")


class SparseVectorParams(BaseModel, extra="forbid"):
"""
Params of single sparse vector data storage
"""

index: Optional["SparseIndexConfig"] = Field(
index: Optional["SparseIndexParams"] = Field(
default=None, description="Custom params for index. If none - values from collection configuration are used."
)

Expand Down Expand Up @@ -2221,6 +2227,11 @@ def __str__(self) -> str:
ShardTransferMethodOneOf,
ShardTransferMethodOneOf1,
]
SparseIndexType = Union[
SparseIndexTypeOneOf,
SparseIndexTypeOneOf1,
SparseIndexTypeOneOf2,
]
TrackerStatus = Union[
TrackerStatusOneOf,
TrackerStatusOneOf1,
Expand Down
16 changes: 8 additions & 8 deletions qdrant_client/proto/collections.proto
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ message OptimizersConfigDiff {
Do not create segments larger this size (in kilobytes).
Large segments might require disproportionately long indexation times,
therefore it makes sense to limit the size of segments.
If indexing speed is more important - make this parameter lower.
If search speed is more important - make this parameter higher.
Note: 1Kb = 1 vector of size 256
Expand All @@ -201,11 +201,11 @@ message OptimizersConfigDiff {
optional uint64 memmap_threshold = 5;
/*
Maximum size (in kilobytes) of vectors allowed for plain index, exceeding this threshold will enable vector indexing
Default value is 20,000, based on <https://github.com/google-research/google-research/blob/master/scann/docs/algorithms.md>.
To disable vector indexing, set to `0`.
Note: 1kB = 1 vector of size 256.
*/
optional uint64 indexing_threshold = 6;
Expand Down Expand Up @@ -358,14 +358,14 @@ message PayloadSchemaInfo {
message CollectionInfo {
CollectionStatus status = 1; // operating condition of the collection
OptimizerStatus optimizer_status = 2; // status of collection optimizers
uint64 vectors_count = 3; // number of vectors in the collection
optional uint64 vectors_count = 3; // Approximate number of vectors in the collection
uint64 segments_count = 4; // Number of independent segments
reserved 5; // Deprecated
reserved 6; // Deprecated
CollectionConfig config = 7; // Configuration
map<string, PayloadSchemaInfo> payload_schema = 8; // Collection data types
uint64 points_count = 9; // number of points in the collection
optional uint64 indexed_vectors_count = 10; // number of indexed vectors in the collection.
optional uint64 points_count = 9; // Approximate number of points in the collection
optional uint64 indexed_vectors_count = 10; // Approximate number of indexed vectors in the collection.
}

message ChangeAliases {
Expand Down Expand Up @@ -454,7 +454,7 @@ message ShardTransferInfo {
}

message CollectionClusterInfoResponse {
uint64 peer_id = 1; // ID of this peer
uint64 peer_id = 1; // ID of this peer
uint64 shard_count = 2; // Total number of shards
repeated LocalShardInfo local_shards = 3; // Local shards
repeated RemoteShardInfo remote_shards = 4; // Remote shards
Expand Down
3 changes: 3 additions & 0 deletions tests/conversions/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@
status=collection_status_green,
optimizer_status=optimizer_status,
vectors_count=100000,
points_count=100000,
segments_count=6,
config=collection_config,
payload_schema={
Expand All @@ -359,6 +360,7 @@
status=collection_status,
optimizer_status=optimizer_status_error,
vectors_count=100000,
points_count=100000,
segments_count=6,
config=collection_config,
payload_schema={
Expand All @@ -379,6 +381,7 @@
status=collection_status_error,
optimizer_status=optimizer_status_error,
vectors_count=100000,
points_count=100000,
segments_count=6,
config=collection_config,
payload_schema={
Expand Down
2 changes: 1 addition & 1 deletion tests/test_qdrant_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ def test_sparse_vectors(prefer_grpc):
vectors_config={},
sparse_vectors_config={
"text": models.SparseVectorParams(
index=models.SparseIndexConfig(
index=models.SparseIndexParams(
on_disk=False,
full_scan_threshold=100,
)
Expand Down
Loading