@@ -305,16 +305,59 @@ bool load_target_gguf_partial(const std::string & path,
305305 return false ;
306306 }
307307
308+ // Structural invariants required by the graph builder.
309+ if (kl != vl) {
310+ set_last_error (" key_length != value_length not supported" );
311+ gguf_free (gctx); return false ;
312+ }
313+ if (n_layer % fai != 0 ) {
314+ char buf[128 ];
315+ std::snprintf (buf, sizeof (buf), " block_count=%u not divisible by full_attention_interval=%u" , n_layer, fai);
316+ set_last_error (buf);
317+ gguf_free (gctx); return false ;
318+ }
319+
308320 // rope dimension_sections (array of 4 uint32)
309321 int rope_sections[4 ] = {0 , 0 , 0 , 0 };
310322 {
311323 int64_t rid = gguf_find_key (gctx, " qwen35.rope.dimension_sections" );
312- if (rid >= 0 ) {
313- size_t n = gguf_get_arr_n (gctx, rid);
314- if (n >= 4 ) {
315- const int32_t * arr = (const int32_t *)gguf_get_arr_data (gctx, rid);
316- for (int k = 0 ; k < 4 ; k++) rope_sections[k] = arr[k];
324+ if (rid < 0 ) {
325+ set_last_error (" missing qwen35.rope.dimension_sections" );
326+ gguf_free (gctx); return false ;
327+ }
328+ size_t n = gguf_get_arr_n (gctx, rid);
329+ if (n < 4 ) {
330+ set_last_error (" qwen35.rope.dimension_sections has < 4 entries" );
331+ gguf_free (gctx); return false ;
332+ }
333+ const int32_t * arr = (const int32_t *)gguf_get_arr_data (gctx, rid);
334+ for (int k = 0 ; k < 4 ; k++) rope_sections[k] = arr[k];
335+ }
336+
337+ // Validate rope_sections against head_dim. n_rot = 2 * sum(sections) is
338+ // the number of dims rotated by ggml_rope_multi; it must be even, > 0,
339+ // and ≤ head_dim, otherwise rope reads/writes out of bounds.
340+ {
341+ long sum = 0 ;
342+ for (int k = 0 ; k < 4 ; k++) {
343+ if (rope_sections[k] < 0 ) {
344+ char buf[160 ];
345+ std::snprintf (buf, sizeof (buf),
346+ " rope_sections[%d]=%d is negative" , k, rope_sections[k]);
347+ set_last_error (buf);
348+ gguf_free (gctx); return false ;
317349 }
350+ sum += rope_sections[k];
351+ }
352+ const long n_rot = 2 * sum;
353+ if (n_rot <= 0 || n_rot > (long )kl) {
354+ char buf[200 ];
355+ std::snprintf (buf, sizeof (buf),
356+ " rope_sections {%d,%d,%d,%d} → n_rot=%ld invalid for head_dim=%u" ,
357+ rope_sections[0 ], rope_sections[1 ], rope_sections[2 ], rope_sections[3 ],
358+ n_rot, kl);
359+ set_last_error (buf);
360+ gguf_free (gctx); return false ;
318361 }
319362 }
320363
@@ -351,6 +394,28 @@ bool load_target_gguf_partial(const std::string & path,
351394 out.rope_dimension_count = (int )get_u32_or (gctx, " qwen35.rope.dimension_count" , 64 );
352395 out.rope_theta = get_f32_or (gctx, " qwen35.rope.freq_base" , 10000000 .0f );
353396 out.rms_eps = get_f32_or (gctx, " qwen35.attention.layer_norm_rms_epsilon" , 1e-6f );
397+
398+ // EOS token ids from GGUF tokenizer metadata (stored as UINT32 by the
399+ // GGUF spec; we use the u32 helper and cast). UINT32_MAX is the
400+ // missing-key sentinel and maps to int32_t -1, which the runtime EOS
401+ // check rejects via the `>= 0` guard.
402+ {
403+ const uint32_t kEosKeyMissing = 0xFFFFFFFFu ;
404+ const uint32_t raw_eos = get_u32_or (gctx, " tokenizer.ggml.eos_token_id" , kEosKeyMissing );
405+ const uint32_t raw_eos_chat = get_u32_or (gctx, " tokenizer.ggml.eot_token_id" , kEosKeyMissing );
406+ out.eos_id = (raw_eos == kEosKeyMissing ) ? -1 : (int32_t )raw_eos;
407+ out.eos_chat_id = (raw_eos_chat == kEosKeyMissing ) ? -1 : (int32_t )raw_eos_chat;
408+ std::printf (" [loader] eos_id=%d eos_chat_id=%d\n " , out.eos_id , out.eos_chat_id );
409+ }
410+
411+ // Compute capture layer IDs: evenly spaced through the target layers.
412+ // step = (n_layer - 2) / (N - 1), ids[k] = 1 + k * step.
413+ {
414+ const int N = DFLASH27B_DRAFT_N_TARGET_LAYERS;
415+ const int step = ((int )n_layer - 2 ) / (N - 1 );
416+ for (int k = 0 ; k < N; k++) out.capture_layer_ids [k] = 1 + k * step;
417+ }
418+
354419 out.layers .assign ((size_t )n_layer, TargetLayer{});
355420
356421 // ── 2. Wire our layer pointers to tensors inside meta_ctx ─────────
@@ -360,8 +425,8 @@ bool load_target_gguf_partial(const std::string & path,
360425 out.tok_embd = g (" token_embd.weight" );
361426 out.out_norm = g (" output_norm.weight" );
362427 out.output = g (" output.weight" );
363- if (!out.tok_embd || !out.out_norm ) {
364- set_last_error (" missing top-level tensors (token_embd/output_norm)" );
428+ if (!out.tok_embd || !out.out_norm || !out. output ) {
429+ set_last_error (" missing top-level tensors (token_embd/output_norm/output )" );
365430 gguf_free (gctx);
366431 return false ;
367432 }
0 commit comments