ggml-org · richardkiss · Dec 7, 2023
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -40,13 +40,13 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
         self.ftype = ftype
         self.fname_out = fname_out
         self.is_big_endian = is_big_endian
-        self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
+        self.endianness = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
         self.is_safetensors = self._is_model_safetensors()
         self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
         self.part_names = self._get_part_names()
         self.hparams = Model.load_hparams(self.dir_model)
         self.model_arch = self._get_model_architecture()
-        self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess)
+        self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianness=self.endianness)
 
     def set_vocab(self):
         self._set_vocab_gpt2()

diff --git a/convert.py b/convert.py
@@ -812,8 +812,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:
 
 
 class OutputFile:
-    def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
-        self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
+    def __init__(self, fname_out: Path, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
+        self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianness=endianness)
 
     def add_meta_arch(self, params: Params) -> None:
         name = "LLaMA"
@@ -892,10 +892,10 @@ def close(self) -> None:
         self.gguf.close()
 
     @staticmethod
-    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
+    def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
         check_vocab_size(params, vocab)
 
-        of = OutputFile(fname_out, endianess=endianess)
+        of = OutputFile(fname_out, endianness=endianness)
 
         # meta data
         of.add_meta_arch(params)
@@ -920,10 +920,10 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
         return dt.quantize(arr)
 
     @staticmethod
-    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
+    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianness: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
         check_vocab_size(params, vocab)
 
-        of = OutputFile(fname_out, endianess=endianess)
+        of = OutputFile(fname_out, endianness=endianness)
 
         # meta data
         of.add_meta_arch(params)
@@ -1165,9 +1165,9 @@ def main(args_in: list[str] | None = None) -> None:
     if args.dump:
         do_dump_model(model_plus)
         return
-    endianess = gguf.GGUFEndian.LITTLE
+    endianness = gguf.GGUFEndian.LITTLE
     if args.bigendian:
-        endianess = gguf.GGUFEndian.BIG
+        endianness = gguf.GGUFEndian.BIG
 
     params = Params.load(model_plus)
     if params.n_ctx == -1:
@@ -1220,7 +1220,7 @@ def main(args_in: list[str] | None = None) -> None:
     params.ftype = ftype
     print(f"Writing {outfile}, format {ftype}")
 
-    OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
+    OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianness=endianness)
     print(f"Wrote {outfile}")
 
 

@@ -409,7 +409,7 @@ struct llama_client_slot
     size_t sent_token_probs_index = 0;
 
     int64_t t_start_process_prompt;
-    int64_t t_start_genereration;
+    int64_t t_start_generation;
 
     double t_prompt_processing; // ms
     double t_token_generation; // ms
@@ -477,12 +477,12 @@ struct llama_client_slot
     void release() {
         if (state == IDLE || state == PROCESSING)
         {
-            t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3;
+            t_token_generation = (ggml_time_us() - t_start_generation) / 1e3;
             command = RELEASE;
         }
     }
 
-    json get_formated_timings() {
+    json get_formatted_timings() {
         return json
         {
             {"prompt_n",               num_prompt_tokens_processed},
@@ -1160,10 +1160,10 @@ struct llama_server_context
 
     json get_model_props()
     {
-        return get_formated_generation(slots[0]);
+        return get_formatted_generation(slots[0]);
     }
 
-    json get_formated_generation(llama_client_slot &slot)
+    json get_formatted_generation(llama_client_slot &slot)
     {
         const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
         const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
@@ -1254,15 +1254,15 @@ struct llama_server_context
             {"model",               params.model_alias},
             {"tokens_predicted",    slot.n_decoded},
             {"tokens_evaluated",    slot.num_prompt_tokens},
-            {"generation_settings", get_formated_generation(slot)},
+            {"generation_settings", get_formatted_generation(slot)},
             {"prompt",              slot.prompt},
             {"truncated",           slot.truncated},
             {"stopped_eos",         slot.stopped_eos},
             {"stopped_word",        slot.stopped_word},
             {"stopped_limit",       slot.stopped_limit},
             {"stopping_word",       slot.stopping_word},
             {"tokens_cached",       slot.n_past},
-            {"timings",             slot.get_formated_timings()}
+            {"timings",             slot.get_formatted_timings()}
         };
 
         if (slot.sparams.n_probs > 0)
@@ -1681,7 +1681,7 @@ struct llama_server_context
                     slot.command = NONE;
                     std::vector<llama_token> prompt_tokens;
                     slot.t_start_process_prompt = ggml_time_us();
-                    slot.t_start_genereration = 0;
+                    slot.t_start_generation = 0;
 
                     if (slot.infill)
                     {
@@ -1871,8 +1871,8 @@ struct llama_server_context
 
                 if (slot.n_decoded == 1)
                 {
-                    slot.t_start_genereration = ggml_time_us();
-                    slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
+                    slot.t_start_generation = ggml_time_us();
+                    slot.t_prompt_processing = (slot.t_start_generation - slot.t_start_process_prompt) / 1e3;
                 }
 
                 llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
@@ -2299,13 +2299,13 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
                 invalid_param = true;
                 break;
             }
-            std::string systm_content;
+            std::string system_content;
             std::copy(
                 std::istreambuf_iterator<char>(file),
                 std::istreambuf_iterator<char>(),
-                std::back_inserter(systm_content)
+                std::back_inserter(system_content)
             );
-            llama.process_system_prompt_data(json::parse(systm_content));
+            llama.process_system_prompt_data(json::parse(system_content));
         }
         else if(arg == "--mmproj")
         {

diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
@@ -50,11 +50,11 @@ class GGUFWriter:
 
     def __init__(
         self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
-        endianess: GGUFEndian = GGUFEndian.LITTLE,
+        endianness: GGUFEndian = GGUFEndian.LITTLE,
     ):
         self.fout = open(path, "wb")
         self.arch = arch
-        self.endianess = endianess
+        self.endianness = endianness
         self.offset_tensor = 0
         self.data_alignment = GGUF_DEFAULT_ALIGNMENT
         self.kv_data = bytearray()
@@ -65,7 +65,7 @@ def __init__(
         self.temp_file = None
         self.tensors = []
         print("gguf: This GGUF file is for {0} Endian only".format(
-            "Big" if self.endianess == GGUFEndian.BIG else "Little",
+            "Big" if self.endianness == GGUFEndian.BIG else "Little",
         ))
         self.state = WriterState.EMPTY
 
@@ -218,7 +218,7 @@ def add_tensor(
         self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
         raw_dtype: GGMLQuantizationType | None = None,
     ) -> None:
-        if self.endianess == GGUFEndian.BIG:
+        if self.endianness == GGUFEndian.BIG:
             tensor.byteswap(inplace=True)
         if self.use_temp_file and self.temp_file is None:
             fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
@@ -244,7 +244,7 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
         if self.state is not WriterState.TI_DATA:
             raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
 
-        if self.endianess == GGUFEndian.BIG:
+        if self.endianness == GGUFEndian.BIG:
             tensor.byteswap(inplace=True)
         self.write_padding(self.fout, self.fout.tell())
         tensor.tofile(self.fout)
@@ -405,7 +405,7 @@ def add_chat_template(self, value: str) -> None:
     def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
         pack_prefix = ''
         if not skip_pack_prefix:
-            pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
+            pack_prefix = '<' if self.endianness == GGUFEndian.LITTLE else '>'
         return struct.pack(f'{pack_prefix}{fmt}', value)
 
     def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None: