Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typos in code. #4355

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ def __init__(self, dir_model: Path, ftype: int, fname_out: Path, is_big_endian:
self.ftype = ftype
self.fname_out = fname_out
self.is_big_endian = is_big_endian
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
self.endianness = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
self.is_safetensors = self._is_model_safetensors()
self.num_parts = Model.count_model_parts(self.dir_model, ".safetensors" if self.is_safetensors else ".bin")
self.part_names = self._get_part_names()
self.hparams = Model.load_hparams(self.dir_model)
self.model_arch = self._get_model_architecture()
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess)
self.gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianness=self.endianness)

def set_vocab(self):
self._set_vocab_gpt2()
Expand Down
18 changes: 9 additions & 9 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -812,8 +812,8 @@ def check_vocab_size(params: Params, vocab: Vocab) -> None:


class OutputFile:
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)
def __init__(self, fname_out: Path, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianness=endianness)

def add_meta_arch(self, params: Params) -> None:
name = "LLaMA"
Expand Down Expand Up @@ -892,10 +892,10 @@ def close(self) -> None:
self.gguf.close()

@staticmethod
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
def write_vocab_only(fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab, endianness:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
check_vocab_size(params, vocab)

of = OutputFile(fname_out, endianess=endianess)
of = OutputFile(fname_out, endianness=endianness)

# meta data
of.add_meta_arch(params)
Expand All @@ -920,10 +920,10 @@ def maybe_do_quantize(item: tuple[DataType, NDArray]) -> NDArray:
return dt.quantize(arr)

@staticmethod
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianness: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
check_vocab_size(params, vocab)

of = OutputFile(fname_out, endianess=endianess)
of = OutputFile(fname_out, endianness=endianness)

# meta data
of.add_meta_arch(params)
Expand Down Expand Up @@ -1165,9 +1165,9 @@ def main(args_in: list[str] | None = None) -> None:
if args.dump:
do_dump_model(model_plus)
return
endianess = gguf.GGUFEndian.LITTLE
endianness = gguf.GGUFEndian.LITTLE
if args.bigendian:
endianess = gguf.GGUFEndian.BIG
endianness = gguf.GGUFEndian.BIG

params = Params.load(model_plus)
if params.n_ctx == -1:
Expand Down Expand Up @@ -1220,7 +1220,7 @@ def main(args_in: list[str] | None = None) -> None:
params.ftype = ftype
print(f"Writing {outfile}, format {ftype}")

OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianess=endianess)
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, concurrency = args.concurrency, endianness=endianness)
print(f"Wrote {outfile}")


Expand Down
26 changes: 13 additions & 13 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ struct llama_client_slot
size_t sent_token_probs_index = 0;

int64_t t_start_process_prompt;
int64_t t_start_genereration;
int64_t t_start_generation;

double t_prompt_processing; // ms
double t_token_generation; // ms
Expand Down Expand Up @@ -477,12 +477,12 @@ struct llama_client_slot
void release() {
if (state == IDLE || state == PROCESSING)
{
t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3;
t_token_generation = (ggml_time_us() - t_start_generation) / 1e3;
command = RELEASE;
}
}

json get_formated_timings() {
json get_formatted_timings() {
return json
{
{"prompt_n", num_prompt_tokens_processed},
Expand Down Expand Up @@ -1160,10 +1160,10 @@ struct llama_server_context

json get_model_props()
{
return get_formated_generation(slots[0]);
return get_formatted_generation(slots[0]);
}

json get_formated_generation(llama_client_slot &slot)
json get_formatted_generation(llama_client_slot &slot)
{
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
Expand Down Expand Up @@ -1254,15 +1254,15 @@ struct llama_server_context
{"model", params.model_alias},
{"tokens_predicted", slot.n_decoded},
{"tokens_evaluated", slot.num_prompt_tokens},
{"generation_settings", get_formated_generation(slot)},
{"generation_settings", get_formatted_generation(slot)},
{"prompt", slot.prompt},
{"truncated", slot.truncated},
{"stopped_eos", slot.stopped_eos},
{"stopped_word", slot.stopped_word},
{"stopped_limit", slot.stopped_limit},
{"stopping_word", slot.stopping_word},
{"tokens_cached", slot.n_past},
{"timings", slot.get_formated_timings()}
{"timings", slot.get_formatted_timings()}
};

if (slot.sparams.n_probs > 0)
Expand Down Expand Up @@ -1681,7 +1681,7 @@ struct llama_server_context
slot.command = NONE;
std::vector<llama_token> prompt_tokens;
slot.t_start_process_prompt = ggml_time_us();
slot.t_start_genereration = 0;
slot.t_start_generation = 0;

if (slot.infill)
{
Expand Down Expand Up @@ -1871,8 +1871,8 @@ struct llama_server_context

if (slot.n_decoded == 1)
{
slot.t_start_genereration = ggml_time_us();
slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
slot.t_start_generation = ggml_time_us();
slot.t_prompt_processing = (slot.t_start_generation - slot.t_start_process_prompt) / 1e3;
}

llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
Expand Down Expand Up @@ -2299,13 +2299,13 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
invalid_param = true;
break;
}
std::string systm_content;
std::string system_content;
std::copy(
std::istreambuf_iterator<char>(file),
std::istreambuf_iterator<char>(),
std::back_inserter(systm_content)
std::back_inserter(system_content)
);
llama.process_system_prompt_data(json::parse(systm_content));
llama.process_system_prompt_data(json::parse(system_content));
}
else if(arg == "--mmproj")
{
Expand Down
12 changes: 6 additions & 6 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ class GGUFWriter:

def __init__(
self, path: os.PathLike[str] | str, arch: str, use_temp_file: bool = True,
endianess: GGUFEndian = GGUFEndian.LITTLE,
endianness: GGUFEndian = GGUFEndian.LITTLE,
):
self.fout = open(path, "wb")
self.arch = arch
self.endianess = endianess
self.endianness = endianness
self.offset_tensor = 0
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
self.kv_data = bytearray()
Expand All @@ -65,7 +65,7 @@ def __init__(
self.temp_file = None
self.tensors = []
print("gguf: This GGUF file is for {0} Endian only".format(
"Big" if self.endianess == GGUFEndian.BIG else "Little",
"Big" if self.endianness == GGUFEndian.BIG else "Little",
))
self.state = WriterState.EMPTY

Expand Down Expand Up @@ -218,7 +218,7 @@ def add_tensor(
self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
raw_dtype: GGMLQuantizationType | None = None,
) -> None:
if self.endianess == GGUFEndian.BIG:
if self.endianness == GGUFEndian.BIG:
tensor.byteswap(inplace=True)
if self.use_temp_file and self.temp_file is None:
fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
Expand All @@ -244,7 +244,7 @@ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
if self.state is not WriterState.TI_DATA:
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')

if self.endianess == GGUFEndian.BIG:
if self.endianness == GGUFEndian.BIG:
tensor.byteswap(inplace=True)
self.write_padding(self.fout, self.fout.tell())
tensor.tofile(self.fout)
Expand Down Expand Up @@ -405,7 +405,7 @@ def add_chat_template(self, value: str) -> None:
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
if not skip_pack_prefix:
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
pack_prefix = '<' if self.endianness == GGUFEndian.LITTLE else '>'
return struct.pack(f'{pack_prefix}{fmt}', value)

def _write_packed(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> None:
Expand Down