Skip to content

Commit 1a2dea2

Browse files
authored
spec: fix segfault error on long prompts for eagle3 (#24707)
1 parent 74a80dd commit 1a2dea2

4 files changed

Lines changed: 16 additions & 5 deletions

File tree

src/llama-context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ int llama_context::encode(const llama_batch & batch_inp) {
13821382
const auto & hparams = model.hparams;
13831383

13841384
// eagle3/DFlash: features as encoder input, and non-draft paths fall back to model's input dim
1385-
const int64_t n_embd = hparams.n_embd_inp();
1385+
const int64_t n_embd = hparams.n_embd_inp_enc();
13861386
const int64_t n_vocab = model.vocab.n_tokens();
13871387

13881388
// note: during encode, we always pass the full sequence starting from pos = 0

src/llama-hparams.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ uint32_t llama_hparams::n_embd_inp() const {
104104
return n_embd_inp;
105105
}
106106

107+
uint32_t llama_hparams::n_embd_inp_enc() const {
108+
return n_embd_inp_enc_impl > 0 ? n_embd_inp_enc_impl : n_embd_inp();
109+
}
110+
107111
uint32_t llama_hparams::n_embd_out() const {
108112
return n_embd_out_impl > 0 ? n_embd_out_impl : n_embd;
109113
}

src/llama-hparams.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,10 @@ struct llama_hparams {
189189
// input embedding dimension (0 = use n_embd)
190190
uint32_t n_embd_inp_impl = 0;
191191

192+
// encoder input embedding dimension (0 = use n_embd_inp())
193+
// e.g. the eagle3 encoder fuses target_layers * target_hidden features
194+
uint32_t n_embd_inp_enc_impl = 0;
195+
192196
// output embedding dimension (0 = use n_embd)
193197
uint32_t n_embd_out_impl = 0;
194198

@@ -305,6 +309,9 @@ struct llama_hparams {
305309
// dimension of main + auxiliary input embeddings
306310
uint32_t n_embd_inp() const;
307311

312+
// dimension of the encoder input embeddings
313+
uint32_t n_embd_inp_enc() const;
314+
308315
// dimension of output embeddings
309316
uint32_t n_embd_out() const;
310317

src/models/eagle3.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ void llama_model_eagle3::load_arch_hparams(llama_model_loader & ml) {
1919
ml.get_key(LLM_KV_TARGET_HIDDEN_SIZE, n_embd_tgt);
2020
LLAMA_LOG_INFO("%s: EAGLE3 n_embd_tgt = %u (draft n_embd = %u)\n", __func__, n_embd_tgt, hparams.n_embd);
2121

22-
hparams.n_embd_inp_impl = (uint32_t) target_layer_ids.size() * n_embd_tgt;
22+
hparams.n_embd_inp_enc_impl = (uint32_t) target_layer_ids.size() * n_embd_tgt;
2323

2424
// eagle3 norm_before_residual (optional, default false)
2525
// compatible with Readhat eagle3 speculator model
@@ -34,7 +34,7 @@ void llama_model_eagle3::load_arch_hparams(llama_model_loader & ml) {
3434
void llama_model_eagle3::load_arch_tensors(llama_model_loader &) {
3535
LLAMA_LOAD_LOCALS;
3636

37-
const int64_t n_embd_inp = hparams.n_embd_inp();
37+
const int64_t n_embd_inp = hparams.n_embd_inp_enc();
3838
const int64_t n_embd_attn_input = 2 * n_embd;
3939

4040
// Get vocab size from the d2t tensor in the GGUF file (optional - only needed if eagle3 has different vocab_size than target)
@@ -109,8 +109,8 @@ ggml_tensor * llama_model_eagle3::graph<true>::build_inp_embd_enc() const {
109109

110110
// Input: Target model features (3 layers concatenated: low, mid, high)
111111
// Data will be provided via ubatch->embd in encode_eagle3_features()
112-
auto inp_target = std::make_unique<llm_graph_input_embd>(hparams.n_embd_inp());
113-
inp_target->embd = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32,hparams.n_embd_inp(), n_tokens);
112+
auto inp_target = std::make_unique<llm_graph_input_embd>(hparams.n_embd_inp_enc());
113+
inp_target->embd = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, hparams.n_embd_inp_enc(), n_tokens);
114114
ggml_set_input(inp_target->embd);
115115

116116
cur = inp_target->embd;

0 commit comments

Comments
 (0)