Skip to content

Commit bf0cb2b

Browse files
ruixiang63ake
authored andcommitted
spec: support qwen3.5 & 3.6 eagle3 draft
1 parent b4024af commit bf0cb2b

3 files changed

Lines changed: 15 additions & 0 deletions

File tree

src/models/qwen35.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ llama_model_qwen35::graph::graph(const llama_model & model, const llm_graph_para
156156

157157
// MTP/NextN layers are loaded as extra decoder blocks but not executed in the main pass.
158158
for (int il = 0; il < n_layer; ++il) {
159+
res->t_layer_inp[il] = inpL;
160+
159161
ggml_tensor * inpSA = inpL;
160162

161163
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);

src/models/qwen35moe.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ llama_model_qwen35moe::graph::graph(const llama_model & model, const llm_graph_p
179179

180180
// MTP/NextN layers are loaded as extra decoder blocks but not executed in the main pass.
181181
for (int il = 0; il < n_layer; ++il) {
182+
res->t_layer_inp[il] = inpL;
183+
182184
ggml_tensor * inpSA = inpL;
183185

184186
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);

tools/server/server-context.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2974,6 +2974,17 @@ struct server_context_impl {
29742974

29752975
bool do_reset = it == slot.prompt.checkpoints.rend();
29762976

2977+
// eagle3 draft is one position behind the target due to deferred boundary), so it
2978+
// can't resume from a checkpoint restored on a recurrent/hybrid target; re-process fully instead.
2979+
const bool spec_eagle3 = std::find(params_base.speculative.types.begin(), params_base.speculative.types.end(),
2980+
COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3) != params_base.speculative.types.end();
2981+
if (!do_reset && spec_eagle3 &&
2982+
(ctx_tgt_seq_rm_type == COMMON_CONTEXT_SEQ_RM_TYPE_FULL ||
2983+
ctx_tgt_seq_rm_type == COMMON_CONTEXT_SEQ_RM_TYPE_RS)) {
2984+
SLT_WRN(slot, "%s", "eagle3 draft cannot resume from a recurrent/hybrid checkpoint, forcing full re-processing\n");
2985+
do_reset = true;
2986+
}
2987+
29772988
if (!do_reset) {
29782989
// restore the context checkpoint
29792990
it->load_tgt(ctx_tgt, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);

0 commit comments

Comments
 (0)