@@ -373,10 +373,10 @@ struct ggml_backend_meta_split_state llama_meta_device_get_split_state(const str
373373 // count only the same type of previous layers to avoid this
374374 auto get_il_eff = [&](const size_t il){
375375 size_t ret = 0 ;
376- const bool il_is_recurrent = hparams.is_recurrent (il);
377- const bool il_is_swa = hparams.is_swa (il);
376+ const bool il_is_recr = hparams.is_recr (il);
377+ const bool il_is_swa = hparams.is_swa (il);
378378 for (size_t il_prev = 0 ; il_prev < il; il_prev++) {
379- ret += hparams.is_recurrent (il_prev) == il_is_recurrent && hparams.is_swa (il_prev) == il_is_swa;
379+ ret += hparams.is_recr (il_prev) == il_is_recr && hparams.is_swa (il_prev) == il_is_swa;
380380 }
381381 return ret;
382382 };
@@ -553,7 +553,7 @@ struct ggml_backend_meta_split_state llama_meta_device_get_split_state(const str
553553 };
554554
555555 auto get_split_granularity = [&](int64_t blck_size, uint32_t il, const std::vector<std::pair<int64_t , uint32_t >> & segments) -> std::vector<int64_t > {
556- if (hparams.is_recurrent (il)) {
556+ if (hparams.is_recr (il)) {
557557 // linear attention
558558 const int64_t head_dim = hparams.ssm_d_state ;
559559 const int64_t granularity_qkv = std::lcm (blck_size, head_dim);
@@ -1076,18 +1076,16 @@ void llama_model_base::load_hparams(llama_model_loader & ml) {
10761076 std::fill (hparams.n_head_arr .begin (), hparams.n_head_arr .end (), 0 );
10771077 std::fill (hparams.n_head_kv_arr .begin (), hparams.n_head_kv_arr .end (), 0 );
10781078 std::fill (hparams.n_ff_arr .begin (), hparams.n_ff_arr .end (), 0 );
1079- std::fill (
1080- hparams.recurrent_layer_arr .begin (),
1081- hparams.recurrent_layer_arr .end (),
1082- llm_arch_is_recurrent (ml.get_arch ()));
10831079
10841080 std::fill (hparams.rope_sections .begin (), hparams.rope_sections .end (), 0 );
1085- std::fill (hparams.swa_layers .begin (), hparams.swa_layers .end (), 0 );
1081+ std::fill (hparams.is_swa_impl .begin (), hparams.is_swa_impl .end (), 0 );
1082+ std::fill (hparams.is_recr_impl .begin (), hparams.is_recr_impl .end (), llm_arch_is_recurrent (ml.get_arch ()) ? 1 : 0 );
10861083
10871084 std::fill (hparams.xielu_alpha_n .begin (), hparams.xielu_alpha_n .end (), 0 .0f );
10881085 std::fill (hparams.xielu_alpha_p .begin (), hparams.xielu_alpha_p .end (), 0 .0f );
1089- std::fill (hparams.xielu_beta .begin (), hparams.xielu_beta .end (), 0 .0f );
1090- std::fill (hparams.xielu_eps .begin (), hparams.xielu_eps .end (), 0 .0f );
1086+ std::fill (hparams.xielu_beta .begin (), hparams.xielu_beta .end (), 0 .0f );
1087+ std::fill (hparams.xielu_eps .begin (), hparams.xielu_eps .end (), 0 .0f );
1088+
10911089 std::fill (hparams.swiglu_clamp_exp .begin (), hparams.swiglu_clamp_exp .end (), 0 .0f );
10921090 std::fill (hparams.swiglu_clamp_shexp .begin (), hparams.swiglu_clamp_shexp .end (), 0 .0f );
10931091
@@ -2040,18 +2038,18 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params,
20402038 filter_recr = [&](int32_t ) { return true ; };
20412039 } else if (arch == LLM_ARCH_NEMOTRON_H || arch == LLM_ARCH_NEMOTRON_H_MOE) {
20422040 filter_attn = [&](int32_t il) {
2043- return !hparams.is_recurrent (il) && hparams.n_ff (il) == 0 ;
2041+ return !hparams.is_recr (il) && hparams.n_ff (il) == 0 ;
20442042 };
20452043 filter_recr = [&](int32_t il) {
2046- return hparams.is_recurrent (il) && hparams.n_ff (il) == 0 ;
2044+ return hparams.is_recr (il) && hparams.n_ff (il) == 0 ;
20472045 };
20482046 } else if (arch == LLM_ARCH_QWEN35 || arch == LLM_ARCH_QWEN35MOE) {
20492047 const uint32_t n_main = hparams.n_layer - hparams.nextn_predict_layers ;
20502048 filter_attn = [&, n_main](int32_t il) {
2051- return (uint32_t )il < n_main && !hparams.is_recurrent (il);
2049+ return (uint32_t )il < n_main && !hparams.is_recr (il);
20522050 };
20532051 filter_recr = [&, n_main](int32_t il) {
2054- return (uint32_t )il < n_main && hparams.is_recurrent (il);
2052+ return (uint32_t )il < n_main && hparams.is_recr (il);
20552053 };
20562054 }
20572055
0 commit comments