Skip to content

Commit b162c25

Browse files
committed
fixed moe experts to use detected arch for key
1 parent c1d3889 commit b162c25

3 files changed

Lines changed: 10 additions & 1 deletion

File tree

gpttype_adapter.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2152,7 +2152,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
21522152
{
21532153
printf("\nOverriding number of experts to %d\n",inputs.moe_experts);
21542154
llama_model_kv_override kvo;
2155-
const char * moekey = "llama.expert_used_count";
2155+
std::string moekeystr = "llama";
2156+
if(file_format_meta.model_architecture_str!="")
2157+
{
2158+
moekeystr = file_format_meta.model_architecture_str;
2159+
}
2160+
moekeystr += ".expert_used_count";
2161+
2162+
const char * moekey = moekeystr.c_str();
21562163
std::strncpy(kvo.key, moekey, sizeof(kvo.key) - 1);
21572164
kvo.key[sizeof(kvo.key) - 1] = '\0'; // Ensure null termination
21582165
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;

model_adapter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ void print_tok_vec(std::vector<float> &embd)
296296

297297
fileformatmeta->fileversion = filever;
298298
fileformatmeta->model_architecture = GGUFArch::ARCH_DEFAULT;
299+
fileformatmeta->model_architecture_str = modelarch;
299300
if(modelarch=="phi2")
300301
{
301302
fileformatmeta->model_architecture = GGUFArch::ARCH_PHI;

model_adapter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ struct FileFormatExtraMeta
6868
int fileversion = 0;
6969
GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
7070
int n_expert_count = 0;
71+
std::string model_architecture_str = "";
7172
};
7273

7374
struct TopPicksData

0 commit comments

Comments
 (0)