feat: automatically download additional shards if quant is split

zhudotexe · zhudotexe · commit 6242b9c4d463 · 2025-02-14T13:26:38.000-05:00
diff --git a/kani/engines/llamacpp/base.py b/kani/engines/llamacpp/base.py
@@ -1,4 +1,5 @@
 import logging
+import re
 import warnings
 from typing import AsyncIterable
 
@@ -60,6 +61,15 @@ def __init__(
         self.filename = filename
         self.pipeline = prompt_pipeline
 
+        # for convenience, if the filename is *-00001-of-0000X.gguf, mark all the others as additional files if not set
+        if match := re.match(r"(.*?)-(\d+)-of-(\d+)\.gguf", filename):
+            additional_files = []
+            for n in range(int(match[3])):
+                if n == int(match[2]):
+                    continue
+                additional_files.append(f"{match[1]}-*{n}-of-{match[3]}.gguf")
+            model_load_kwargs.setdefault("additional_files", additional_files)
+
         model_load_kwargs.setdefault("n_ctx", max_context_size)
         self.model = Llama.from_pretrained(repo_id=repo_id, filename=filename, **model_load_kwargs)
         self.hyperparams = hyperparams
diff --git a/sandbox/r1-quant.py b/sandbox/r1-quant.py
@@ -18,7 +18,7 @@
 pipeline = ChatTemplatePromptPipeline.from_pretrained("deepseek-ai/DeepSeek-R1")
 engine = LlamaCppEngine(
     repo_id="unsloth/DeepSeek-R1-GGUF",
-    filename="DeepSeek-R1-GGUF/*UD-Q2_K_XL*.gguf",
+    filename="DeepSeek-R1-Q2_K_XS/DeepSeek-R1-Q2_K_XS-00001-of-00005.gguf",
     prompt_pipeline=pipeline,
     model_load_kwargs={"n_gpu_layers": -1},
 )

Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@`
`18`	`18`	`pipeline = ChatTemplatePromptPipeline.from_pretrained("deepseek-ai/DeepSeek-R1")`
`19`	`19`	`engine = LlamaCppEngine(`
`20`	`20`	`repo_id="unsloth/DeepSeek-R1-GGUF",`
`21`		`- filename="DeepSeek-R1-GGUF/UD-Q2_K_XL.gguf",`
	`21`	`+ filename="DeepSeek-R1-Q2_K_XS/DeepSeek-R1-Q2_K_XS-00001-of-00005.gguf",`
`22`	`22`	`prompt_pipeline=pipeline,`
`23`	`23`	`model_load_kwargs={"n_gpu_layers": -1},`
`24`	`24`	`)`