Skip to content

Commit 6242b9c

Browse files
committed
feat: automatically download additional shards if quant is split
1 parent a6a5836 commit 6242b9c

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

kani/engines/llamacpp/base.py

+10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import re
23
import warnings
34
from typing import AsyncIterable
45

@@ -60,6 +61,15 @@ def __init__(
6061
self.filename = filename
6162
self.pipeline = prompt_pipeline
6263

64+
# for convenience, if the filename is *-00001-of-0000X.gguf, mark all the others as additional files if not set
65+
if match := re.match(r"(.*?)-(\d+)-of-(\d+)\.gguf", filename):
66+
additional_files = []
67+
for n in range(int(match[3])):
68+
if n == int(match[2]):
69+
continue
70+
additional_files.append(f"{match[1]}-*{n}-of-{match[3]}.gguf")
71+
model_load_kwargs.setdefault("additional_files", additional_files)
72+
6373
model_load_kwargs.setdefault("n_ctx", max_context_size)
6474
self.model = Llama.from_pretrained(repo_id=repo_id, filename=filename, **model_load_kwargs)
6575
self.hyperparams = hyperparams

sandbox/r1-quant.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
pipeline = ChatTemplatePromptPipeline.from_pretrained("deepseek-ai/DeepSeek-R1")
1919
engine = LlamaCppEngine(
2020
repo_id="unsloth/DeepSeek-R1-GGUF",
21-
filename="DeepSeek-R1-GGUF/*UD-Q2_K_XL*.gguf",
21+
filename="DeepSeek-R1-Q2_K_XS/DeepSeek-R1-Q2_K_XS-00001-of-00005.gguf",
2222
prompt_pipeline=pipeline,
2323
model_load_kwargs={"n_gpu_layers": -1},
2424
)

0 commit comments

Comments
 (0)