Skip to content

Commit 8b919b5

Browse files
committed
allow customized rope to use model set values
1 parent f4ee91a commit 8b919b5

4 files changed

Lines changed: 28 additions & 18 deletions

File tree

gpttype_adapter.cpp

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -697,10 +697,12 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
697697
//determine rope scaling params
698698
float rope_freq_scale = 1.0f;
699699
float rope_freq_base = 10000.0f;
700+
bool overwriteRope = false;
700701
if(inputs.rope_freq_scale>0.0f)
701702
{
702703
rope_freq_scale = inputs.rope_freq_scale;
703704
rope_freq_base = inputs.rope_freq_base;
705+
overwriteRope = true;
704706
printf("Using Custom RoPE scaling (scale:%.3f, base:%.1f).\n",rope_freq_scale,rope_freq_base);
705707
}
706708
else
@@ -722,13 +724,9 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
722724
rope_freq_base = (effectivenctx <= 2048 ? 10000.0f : (effectivenctx <= 3072 ? 26000.0f : (effectivenctx <= 4096 ? 32000.0f : (effectivenctx <= 6144 ? 54000.0f :
723725
(effectivenctx <= 8192 ? 82684.0f : (effectivenctx <= 12288 ? 140000.0f : (effectivenctx <= 16384 ? 200000.0f : (effectivenctx <= 24576 ? 320000.0f : 440000.0f))))))));
724726

725-
if(file_format_meta.freq_base_train > rope_freq_base)
726-
{
727-
rope_freq_base = file_format_meta.freq_base_train;
728-
}
729727
}
730728

731-
printf("Using automatic RoPE scaling (scale:%.3f, base:%.1f)\n",rope_freq_scale,rope_freq_base);
729+
printf("Using automatic RoPE scaling. If the model has customized RoPE settings, they will be used directly instead!\n");
732730
}
733731
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
734732
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
@@ -903,8 +901,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
903901
}
904902
#endif
905903
model_params.main_gpu = cu_parseinfo_maindevice;
906-
llama_ctx_params.rope_freq_base = rope_freq_base;
907-
llama_ctx_params.rope_freq_scale = rope_freq_scale;
904+
908905
llama_ctx_params.n_batch = blasbatchsize;
909906
llama_ctx_params.n_threads = n_threads;
910907
llama_ctx_params.n_threads_batch = n_blasthreads;
@@ -932,6 +929,28 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
932929
}
933930

934931
llama_model * llamamodel = llama_load_model_from_file(modelname.c_str(), model_params);
932+
if(overwriteRope)
933+
{
934+
llama_ctx_params.rope_freq_base = rope_freq_base;
935+
llama_ctx_params.rope_freq_scale = rope_freq_scale;
936+
}
937+
else
938+
{
939+
//if the model modifes rope in any way, use the model values. Otherwise, use our automatic ones
940+
if(llamamodel->hparams.rope_freq_base_train!=10000.0f ||
941+
llamamodel->hparams.rope_freq_scale_train!=1.0f ||
942+
llamamodel->hparams.rope_scaling_type_train==2)
943+
{
944+
printf("Automatic RoPE Scaling: Using model internal values.\n");
945+
}
946+
else
947+
{
948+
llama_ctx_params.rope_freq_base = rope_freq_base;
949+
llama_ctx_params.rope_freq_scale = rope_freq_scale;
950+
printf("Automatic RoPE Scaling: Using (scale:%.3f, base:%.1f).\n", rope_freq_scale, rope_freq_base);
951+
}
952+
}
953+
935954
llama_ctx_v4 = llama_new_context_with_model(llamamodel, llama_ctx_params);
936955

937956
if (llama_ctx_v4 == NULL)

koboldcpp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ def bring_terminal_to_foreground():
388388
modelbusy = threading.Lock()
389389
requestsinqueue = 0
390390
defaultport = 5001
391-
KcppVersion = "1.49"
391+
KcppVersion = "1.50"
392392
showdebug = True
393393
showsamplerwarning = True
394394
showmaxctxwarning = True
@@ -1452,7 +1452,7 @@ def togglehorde(a,b,c):
14521452
labels[idx].grid_forget()
14531453
if usehorde_var.get()==1 and (horde_name_var.get()=="koboldcpp" or horde_name_var.get()=="") and model_var.get()!="":
14541454
basefile = os.path.basename(model_var.get())
1455-
horde_name_var.set(os.path.splitext(basefile)[0])
1455+
horde_name_var.set(sanitize_string(os.path.splitext(basefile)[0]))
14561456

14571457
makecheckbox(network_tab, "Configure for Horde", usehorde_var, 6, command=togglehorde)
14581458
togglehorde(1,1,1)

model_adapter.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,14 +290,6 @@ void print_tok_vec(std::vector<float> &embd)
290290
}
291291
int filever = gguf_get_version(ctx);
292292
fileformatmeta->fileversion = filever;
293-
294-
//try to adapt if the rope_freq_base_train exceeds the auto one
295-
fkey = modelarch+".rope.freq_base";
296-
keyidx = gguf_find_key(ctx, fkey.c_str());
297-
if (keyidx != -1) {
298-
float fbt = gguf_get_val_f32(ctx, keyidx);
299-
fileformatmeta->freq_base_train = (fbt > 1.0f ? fbt : 0.0f);
300-
}
301293
}
302294
gguf_free(ctx);
303295
}

model_adapter.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ struct FileFormatExtraMeta
5555
{
5656
int n_ctx_train = 2048;
5757
int fileversion = 0;
58-
float freq_base_train = 0;
5958
};
6059

6160
enum ModelLoadResult

0 commit comments

Comments
 (0)