Skip to content

Commit

Permalink
llama : move load tensors to llama_model
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Jan 7, 2025
1 parent 5ea6cdd commit 4d27597
Show file tree
Hide file tree
Showing 7 changed files with 2,745 additions and 2,730 deletions.
2 changes: 1 addition & 1 deletion src/llama-adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ static void llama_lora_adapter_init_impl(struct llama_model & model, const char
}

// device buft and device ctx
const auto * model_tensor = model.get_tensor( name.c_str());
const auto * model_tensor = model.get_tensor(name.c_str());
if (!model_tensor) {
throw std::runtime_error("LoRA tensor '" + name + "' does not exist in base model");
}
Expand Down
61 changes: 61 additions & 0 deletions src/llama-model-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#include <cstring>
#include <future>

static const size_t kiB = 1024;
static const size_t MiB = 1024*kiB;
static const size_t GiB = 1024*MiB;

const char * llama_file_version_name(llama_fver version) {
switch (version) {
case GGUF_FILE_VERSION_V1: return "GGUF V1 (support until nov 2023)";
Expand All @@ -17,6 +21,49 @@ const char * llama_file_version_name(llama_fver version) {
return "unknown";
}

static std::string llama_model_ftype_name(llama_ftype ftype) {
if (ftype & LLAMA_FTYPE_GUESSED) {
return llama_model_ftype_name((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)";
}

switch (ftype) {
case LLAMA_FTYPE_ALL_F32: return "all F32";
case LLAMA_FTYPE_MOSTLY_F16: return "F16";
case LLAMA_FTYPE_MOSTLY_BF16: return "BF16";
case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0";
case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1";
case LLAMA_FTYPE_MOSTLY_Q5_0: return "Q5_0";
case LLAMA_FTYPE_MOSTLY_Q5_1: return "Q5_1";
case LLAMA_FTYPE_MOSTLY_Q8_0: return "Q8_0";
case LLAMA_FTYPE_MOSTLY_Q2_K: return "Q2_K - Medium";
case LLAMA_FTYPE_MOSTLY_Q2_K_S: return "Q2_K - Small";
case LLAMA_FTYPE_MOSTLY_Q3_K_S: return "Q3_K - Small";
case LLAMA_FTYPE_MOSTLY_Q3_K_M: return "Q3_K - Medium";
case LLAMA_FTYPE_MOSTLY_Q3_K_L: return "Q3_K - Large";
case LLAMA_FTYPE_MOSTLY_Q4_K_S: return "Q4_K - Small";
case LLAMA_FTYPE_MOSTLY_Q4_K_M: return "Q4_K - Medium";
case LLAMA_FTYPE_MOSTLY_Q5_K_S: return "Q5_K - Small";
case LLAMA_FTYPE_MOSTLY_Q5_K_M: return "Q5_K - Medium";
case LLAMA_FTYPE_MOSTLY_Q6_K: return "Q6_K";
case LLAMA_FTYPE_MOSTLY_TQ1_0: return "TQ1_0 - 1.69 bpw ternary";
case LLAMA_FTYPE_MOSTLY_TQ2_0: return "TQ2_0 - 2.06 bpw ternary";
case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return "IQ2_XXS - 2.0625 bpw";
case LLAMA_FTYPE_MOSTLY_IQ2_XS: return "IQ2_XS - 2.3125 bpw";
case LLAMA_FTYPE_MOSTLY_IQ2_S: return "IQ2_S - 2.5 bpw";
case LLAMA_FTYPE_MOSTLY_IQ2_M: return "IQ2_M - 2.7 bpw";
case LLAMA_FTYPE_MOSTLY_IQ3_XS: return "IQ3_XS - 3.3 bpw";
case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return "IQ3_XXS - 3.0625 bpw";
case LLAMA_FTYPE_MOSTLY_IQ1_S: return "IQ1_S - 1.5625 bpw";
case LLAMA_FTYPE_MOSTLY_IQ1_M: return "IQ1_M - 1.75 bpw";
case LLAMA_FTYPE_MOSTLY_IQ4_NL: return "IQ4_NL - 4.5 bpw";
case LLAMA_FTYPE_MOSTLY_IQ4_XS: return "IQ4_XS - 4.25 bpw";
case LLAMA_FTYPE_MOSTLY_IQ3_S: return "IQ3_S - 3.4375 bpw";
case LLAMA_FTYPE_MOSTLY_IQ3_M: return "IQ3_S mix - 3.66 bpw";

default: return "unknown, may not work";
}
}

namespace GGUFMeta {
template <typename T, gguf_type gt_, T (*gfun)(const gguf_context *, const int)>
struct GKV_Base_Type {
Expand Down Expand Up @@ -1008,3 +1055,17 @@ bool llama_model_loader::load_all_data(

return true;
}

std::string llama_model_loader::ftype_name() const {
return llama_model_ftype_name(ftype);
}

void llama_model_loader::print_info() const {
LLAMA_LOG_INFO("%s: file format = %s\n", __func__, llama_file_version_name(fver));
LLAMA_LOG_INFO("%s: file type = %s\n", __func__, llama_model_ftype_name(ftype).c_str());
if (n_bytes < GiB) {
LLAMA_LOG_INFO("%s: file size = %.2f MiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
} else {
LLAMA_LOG_INFO("%s: file size = %.2f GiB (%.2f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
}
}
4 changes: 4 additions & 0 deletions src/llama-model-loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,8 @@ struct llama_model_loader {
llama_mlocks * lmlocks,
llama_progress_callback progress_callback,
void * progress_callback_user_data);

std::string ftype_name() const;

void print_info() const;
};
Loading

0 comments on commit 4d27597

Please sign in to comment.