-
Notifications
You must be signed in to change notification settings - Fork 138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix sort sparse vectors #442
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
from qdrant_client.conversions import common_types as types | ||
from qdrant_client.http.models import SparseVector | ||
from typing import List, Optional | ||
import random | ||
from typing import List, Optional | ||
|
||
import numpy as np | ||
|
||
from qdrant_client.conversions import common_types as types | ||
from qdrant_client.http.models import SparseVector | ||
|
||
|
||
def empty_sparse_vector() -> SparseVector: | ||
return SparseVector( | ||
|
@@ -13,7 +15,9 @@ def empty_sparse_vector() -> SparseVector: | |
|
||
|
||
def validate_sparse_vector(vector: SparseVector) -> None: | ||
assert len(vector.indices) == len(vector.values), "Indices and values must have the same length" | ||
assert len(vector.indices) == len( | ||
vector.values | ||
), "Indices and values must have the same length" | ||
assert len(vector.indices) == len(set(vector.indices)), "Indices must be unique" | ||
|
||
|
||
|
@@ -24,18 +28,20 @@ def is_sorted(vector: SparseVector) -> bool: | |
return True | ||
|
||
|
||
def sort(vector: SparseVector) -> SparseVector: | ||
def sort_sparse_vector(vector: SparseVector) -> SparseVector: | ||
if is_sorted(vector): | ||
return vector | ||
|
||
sorted_indices = np.argsort(vector.indices) | ||
return SparseVector( | ||
indices=vector.indices[sorted_indices], | ||
values=vector.values[sorted_indices], | ||
indices=[vector.indices[i] for i in sorted_indices], | ||
values=[vector.values[i] for i in sorted_indices], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this change necessary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
To take elements from an array by several indices, this array should support such an operation (e.g. np.array) |
||
) | ||
|
||
|
||
def calculate_distance_sparse(query: SparseVector, vectors: List[SparseVector]) -> types.NumpyArray: | ||
def calculate_distance_sparse( | ||
query: SparseVector, vectors: List[SparseVector] | ||
) -> types.NumpyArray: | ||
scores = [] | ||
|
||
for vector in vectors: | ||
|
@@ -74,23 +80,3 @@ def sparse_dot_product(vector1: SparseVector, vector2: SparseVector) -> Optional | |
return np.float32(result) | ||
else: | ||
return None | ||
|
||
|
||
# Generate random sparse vector with given size and density | ||
# The density is the probability of non-zero value over the whole vector | ||
def generate_random_sparse_vector(size: int, density: float) -> SparseVector: | ||
num_non_zero = int(size * density) | ||
indices: List[int] = random.sample(range(size), num_non_zero) | ||
values: List[float] = [round(random.random(), 6) for _ in range(num_non_zero)] | ||
indices.sort() | ||
sparse_vector = SparseVector(indices=indices, values=values) | ||
validate_sparse_vector(sparse_vector) | ||
return sparse_vector | ||
|
||
|
||
def generate_random_sparse_vector_list(num_vectors: int, vector_size: int, vector_density: float) -> List[SparseVector]: | ||
sparse_vector_list = [] | ||
for _ in range(num_vectors): | ||
sparse_vector = generate_random_sparse_vector(vector_size, vector_density) | ||
sparse_vector_list.append(sparse_vector) | ||
return sparse_vector_list |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's embarrassing, I guess I thought this was a sort in place 😵💫
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🙈