@@ -294,7 +294,7 @@ llama_model_step35::graph::graph(const llama_model & model, const llm_graph_para
294294 cb (cur, " attn_proj" , il);
295295 }
296296
297- if (il == n_transformer_layers - 1 && inp_out_ids && cparams.embeddings_pre_norm_masked ) {
297+ if (il == n_transformer_layers - 1 && inp_out_ids && cparams.embeddings_nextn_masked ) {
298298 cur = ggml_get_rows (ctx0, cur, inp_out_ids);
299299 inpSA = ggml_get_rows (ctx0, inpSA, inp_out_ids);
300300 }
@@ -353,10 +353,10 @@ llama_model_step35::graph::graph(const llama_model & model, const llm_graph_para
353353
354354 cur = inpL;
355355
356- cb (cur, " h_pre_norm " , -1 );
357- res->t_h_pre_norm = cur;
356+ cb (cur, " h_nextn " , -1 );
357+ res->t_h_nextn = cur;
358358
359- if (!cparams.embeddings_pre_norm_masked && inp_out_ids) {
359+ if (!cparams.embeddings_nextn_masked && inp_out_ids) {
360360 cur = ggml_get_rows (ctx0, cur, inp_out_ids);
361361 }
362362
@@ -541,8 +541,8 @@ llama_model_step35::graph_mtp::graph_mtp(const llama_model & model, const llm_gr
541541 cb (cur, " mtp_post_ffn" , il);
542542
543543 // Pre-norm hidden state: used by the AR draft loop to seed the next MTP step.
544- cb (cur, " h_pre_norm " , -1 );
545- res->t_h_pre_norm = cur;
544+ cb (cur, " h_nextn " , -1 );
545+ res->t_h_nextn = cur;
546546
547547 ggml_tensor * head_norm_w = layer.nextn .shared_head_norm
548548 ? layer.nextn .shared_head_norm
0 commit comments