@@ -19,7 +19,7 @@ void llama_model_eagle3::load_arch_hparams(llama_model_loader & ml) {
1919 ml.get_key (LLM_KV_TARGET_HIDDEN_SIZE , n_embd_tgt);
2020 LLAMA_LOG_INFO (" %s: EAGLE3 n_embd_tgt = %u (draft n_embd = %u)\n " , __func__, n_embd_tgt, hparams.n_embd );
2121
22- hparams.n_embd_inp_impl = (uint32_t ) target_layer_ids.size () * n_embd_tgt;
22+ hparams.n_embd_inp_enc_impl = (uint32_t ) target_layer_ids.size () * n_embd_tgt;
2323
2424 // eagle3 norm_before_residual (optional, default false)
2525 // compatible with Readhat eagle3 speculator model
@@ -34,7 +34,7 @@ void llama_model_eagle3::load_arch_hparams(llama_model_loader & ml) {
3434void llama_model_eagle3::load_arch_tensors (llama_model_loader &) {
3535 LLAMA_LOAD_LOCALS ;
3636
37- const int64_t n_embd_inp = hparams.n_embd_inp ();
37+ const int64_t n_embd_inp = hparams.n_embd_inp_enc ();
3838 const int64_t n_embd_attn_input = 2 * n_embd;
3939
4040 // Get vocab size from the d2t tensor in the GGUF file (optional - only needed if eagle3 has different vocab_size than target)
@@ -109,8 +109,8 @@ ggml_tensor * llama_model_eagle3::graph<true>::build_inp_embd_enc() const {
109109
110110 // Input: Target model features (3 layers concatenated: low, mid, high)
111111 // Data will be provided via ubatch->embd in encode_eagle3_features()
112- auto inp_target = std::make_unique<llm_graph_input_embd>(hparams.n_embd_inp ());
113- inp_target->embd = ggml_new_tensor_2d (ctx0, GGML_TYPE_F32 ,hparams.n_embd_inp (), n_tokens);
112+ auto inp_target = std::make_unique<llm_graph_input_embd>(hparams.n_embd_inp_enc ());
113+ inp_target->embd = ggml_new_tensor_2d (ctx0, GGML_TYPE_F32 , hparams.n_embd_inp_enc (), n_tokens);
114114 ggml_set_input (inp_target->embd );
115115
116116 cur = inp_target->embd ;
0 commit comments