77// No dynamic memory allocation! Setup structs with FIXED (known) shapes and sizes for ALL output fields
88// Python will ALWAYS provide the memory, we just write to it.
99
10+ #include < cmath>
1011#include < time.h>
1112#include < mutex>
1213#include " model_adapter.h"
@@ -787,6 +788,19 @@ static int GetBatchSize(int desiredBlasBatchSize,FileFormat in_file_format)
787788 return desiredBlasBatchSize;
788789}
789790
791+ // this function applies automatic scaling to rope freq base when the desired context exceeds trained context
792+ static float CalcGradientAIRopeFreqBase (float original_rope_base, int n_ctx_train, int n_ctx_desired, bool is_solar)
793+ {
794+ if (n_ctx_desired <= n_ctx_train || n_ctx_desired <= 2048 )
795+ {
796+ return original_rope_base;
797+ }
798+ float ctx_multiplier = (is_solar?8 .0f :1 .0f );
799+ float chi_ctx_train_value = (n_ctx_train * ctx_multiplier) / 6.28318 ;
800+ float chi_ctx_value = (n_ctx_desired * ctx_multiplier) / 6.28318 ;
801+ return powf (original_rope_base, logf (chi_ctx_value) / logf (chi_ctx_train_value));
802+ }
803+
790804ModelLoadResult gpttype_load_model (const load_model_inputs inputs, FileFormat in_file_format, FileFormatExtraMeta in_file_format_meta)
791805{
792806 ggml_time_init ();
@@ -835,28 +849,16 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
835849 }
836850 else
837851 {
838- rope_freq_scale = 1 .0f ;
839- if (kcpp_params->n_ctx <= 2048 ) // normie mode
852+ // Set freq base for all, including non GGUF. If we are using GGUF, this will be overwritten with more accurate values later.
853+ rope_freq_base = CalcGradientAIRopeFreqBase (10000 .0f ,2048 ,kcpp_params->n_ctx ,false );
854+ if (file_format==FileFormat::GGUF_GENERIC)
840855 {
841- rope_freq_base = 10000 . 0f ;
856+ printf ( " Using automatic RoPE scaling. If the model has customized RoPE settings, they will be used directly instead! \n " ) ;
842857 }
843858 else
844859 {
845- // approximate NTK aware ctx
846- auto effectivenctx = kcpp_params->n_ctx ;
847- if ((file_format == FileFormat::GGUF_GENERIC) && file_format_meta.n_ctx_train > 2048 )
848- {
849- float factor = file_format_meta.n_ctx_train /2048 ;
850- effectivenctx = effectivenctx/factor;
851- }
852- float magic_multiplier = 8 .0f ;
853- float base_multiplier = effectivenctx*magic_multiplier;
854- float base_raw = 10000 .0f ;
855- rope_freq_base = (effectivenctx <= 2048 ? base_raw : base_multiplier);
856-
860+ printf (" Using Automatic RoPE scaling, Pre-GGUF (scale:%.3f, base:%.1f).\n " ,rope_freq_scale, rope_freq_base);
857861 }
858-
859- printf (" Using automatic RoPE scaling. If the model has customized RoPE settings, they will be used directly instead!\n " );
860862 }
861863 gptj_ctx_v3.hparams .rope_freq_scale = neox_ctx_v3.hparams .rope_freq_scale = rope_freq_scale;
862864 gptj_ctx_v3.hparams .rope_freq_base = neox_ctx_v3.hparams .rope_freq_base = rope_freq_base;
@@ -1085,7 +1087,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
10851087 }
10861088 else
10871089 {
1088- // if the model modifes rope in any way, use the model values. Otherwise, use our automatic ones
1090+ // if the model modifes rope in any way, or uses yarn, use the model values. Otherwise, use our automatic ones
10891091 // special exception for llama, which uses auto scale
10901092 if ((llamamodel->hparams .rope_freq_base_train !=10000 .0f && llamamodel->hparams .rope_freq_base_train !=500000 .0f ) ||
10911093 llamamodel->hparams .rope_freq_scale_train !=1 .0f ||
@@ -1095,8 +1097,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
10951097 }
10961098 else
10971099 {
1098- float multiplier_rope_base = llamamodel-> hparams . rope_freq_base_train / 10000 . 0f ;
1099- rope_freq_base *= multiplier_rope_base ;
1100+ // Calculate rope_freq_base using the gradientAI formula, solar requires ctx *8 for correct scaling
1101+ rope_freq_base = CalcGradientAIRopeFreqBase (llamamodel-> hparams . rope_freq_base_train , file_format_meta. n_ctx_train , kcpp_params-> n_ctx , file_format_meta. model_architecture ==GGUFArch::ARCH_SOLAR) ;
11001102 llama_ctx_params.rope_freq_base = rope_freq_base;
11011103 llama_ctx_params.rope_freq_scale = rope_freq_scale;
11021104 printf (" Automatic RoPE Scaling: Using (scale:%.3f, base:%.1f).\n " , rope_freq_scale, rope_freq_base);
@@ -2467,4 +2469,4 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
24672469 concat_output_mtx.unlock ();
24682470 output.text = concat_output_reader_copy_res.c_str ();
24692471 return output;
2470- }
2472+ }
0 commit comments