Skip to content

Commit

Permalink
docs: Add description for changes
Browse files Browse the repository at this point in the history
  • Loading branch information
hh-space-invader committed Mar 5, 2025
1 parent 1c016a2 commit 5ea3bbc
Show file tree
Hide file tree
Showing 6 changed files with 18 additions and 1 deletion.
3 changes: 3 additions & 0 deletions fastembed/common/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ def _load_onnx_model(
if device_id is None:
onnx_providers = ["CUDAExecutionProvider"]
else:
# kSameAsRequested: Allocates only the requested memory, avoiding over-allocation.
# more precise than 'kNextPowerOfTwo', which grows memory aggressively.
# source: https://onnxruntime.ai/docs/get-started/with-c.html#features:~:text=Memory%20arena%20shrinkage:
onnx_providers = [
(
"CUDAExecutionProvider",
Expand Down
3 changes: 3 additions & 0 deletions fastembed/image/onnx_image_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def onnx_embed(self, images: list[ImageInput], **kwargs: Any) -> OnnxOutputConte
if is_cuda_enabled(cuda, providers):
device_id = kwargs.get("device_id", None)
device_id = str(device_id if isinstance(device_id, int) else 0)
# enables memory arena shrinkage, freeing unused memory after each Run() cycle.
# helps prevent excessive memory retention, especially for dynamic workloads.
# source: https://onnxruntime.ai/docs/get-started/with-c.html#features:~:text=Memory%20arena%20shrinkage:
run_options.add_run_config_entry(
"memory.enable_memory_arena_shrinkage", f"gpu:{device_id}"
)
Expand Down
1 change: 0 additions & 1 deletion fastembed/late_interaction/colbert.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def load_onnx_model(self) -> None:
current_max_length = self.tokenizer.truncation["max_length"]
# ensure not to overflow after adding document-marker
self.tokenizer.enable_truncation(max_length=current_max_length - 1)
print("ME VERSION")

def embed(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ def onnx_embed_text(
if is_cuda_enabled(cuda, providers):
device_id = kwargs.get("device_id", None)
device_id = str(device_id if isinstance(device_id, int) else 0)
# enables memory arena shrinkage, freeing unused memory after each Run() cycle.
# helps prevent excessive memory retention, especially for dynamic workloads.
# source: https://onnxruntime.ai/docs/get-started/with-c.html#features:~:text=Memory%20arena%20shrinkage:
run_options.add_run_config_entry(
"memory.enable_memory_arena_shrinkage", f"gpu:{device_id}"
)
Expand Down Expand Up @@ -188,6 +191,9 @@ def onnx_embed_image(self, images: list[ImageInput], **kwargs: Any) -> OnnxOutpu
if is_cuda_enabled(cuda, providers):
device_id = kwargs.get("device_id", None)
device_id = str(device_id if isinstance(device_id, int) else 0)
# enables memory arena shrinkage, freeing unused memory after each Run() cycle.
# helps prevent excessive memory retention, especially for dynamic workloads.
# source: https://onnxruntime.ai/docs/get-started/with-c.html#features:~:text=Memory%20arena%20shrinkage:
run_options.add_run_config_entry(
"memory.enable_memory_arena_shrinkage", f"gpu:{device_id}"
)
Expand Down
3 changes: 3 additions & 0 deletions fastembed/rerank/cross_encoder/onnx_text_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def onnx_embed_pairs(self, pairs: list[tuple[str, str]], **kwargs: Any) -> OnnxO
if is_cuda_enabled(cuda, providers):
device_id = kwargs.get("device_id", None)
device_id = str(device_id if isinstance(device_id, int) else 0)
# Enables memory arena shrinkage, freeing unused memory after each Run() cycle.
# Helps prevent excessive memory retention, especially for dynamic workloads.
# Source: https://onnxruntime.ai/docs/get-started/with-c.html#features:~:text=Memory%20arena%20shrinkage:
run_options.add_run_config_entry(
"memory.enable_memory_arena_shrinkage", f"gpu:{device_id}"
)
Expand Down
3 changes: 3 additions & 0 deletions fastembed/text/onnx_text_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ def onnx_embed(
if is_cuda_enabled(cuda, providers):
device_id = kwargs.get("device_id", None)
device_id = str(device_id if isinstance(device_id, int) else 0)
# enables memory arena shrinkage, freeing unused memory after each Run() cycle.
# helps prevent excessive memory retention, especially for dynamic workloads.
# source: https://onnxruntime.ai/docs/get-started/with-c.html#features:~:text=Memory%20arena%20shrinkage:
run_options.add_run_config_entry(
"memory.enable_memory_arena_shrinkage", f"gpu:{device_id}"
)
Expand Down

0 comments on commit 5ea3bbc

Please sign in to comment.