Skip to content

Commit 5d15549

Browse files
smpurkisclaude
andcommitted
fix(dflash): address weicj review — HIP link, loader validation, hipMalloc checks
- CMakeLists.txt: pflash_daemon and spike_thin_copy now link against ${_dflash27b_ggml_backend_lib} instead of hardcoded ggml-cuda, fixing HIP build link failures reported by weicj. - gguf_target_loader.cpp: restore structural validation removed in prior cleanup — kl!=vl check, n_layer%fai divisibility, full rope_sections validation (presence, count, bounds vs head_dim), EOS metadata assignment, capture_layer_ids recomputation, and output.weight requirement in top-level tensor check. - bsa_launcher_hip.cu: check hipMalloc return values; on failure, roll back partial allocations, reset kv_buf_cap, and return -1 instead of silently proceeding with null buffers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent f959bce commit 5d15549

3 files changed

Lines changed: 85 additions & 13 deletions

File tree

dflash/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ endif()
359359
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/pflash_daemon.cpp")
360360
add_executable(pflash_daemon test/pflash_daemon.cpp)
361361
target_include_directories(pflash_daemon PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
362-
target_link_libraries(pflash_daemon PRIVATE dflash27b ggml ggml-cuda)
362+
target_link_libraries(pflash_daemon PRIVATE dflash27b ggml ${_dflash27b_ggml_backend_lib})
363363
endif()
364364

365365
# ─── Tests (numerics vs oracle) ────────────────────────────────────
@@ -392,7 +392,7 @@ if(DFLASH27B_TESTS)
392392
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/spike_thin_copy.cpp")
393393
add_executable(spike_thin_copy test/spike_thin_copy.cpp)
394394
target_include_directories(spike_thin_copy PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src)
395-
target_link_libraries(spike_thin_copy PRIVATE ggml ggml-cuda)
395+
target_link_libraries(spike_thin_copy PRIVATE ggml ${_dflash27b_ggml_backend_lib})
396396
endif()
397397
if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/smoke_draft_graph.cpp")
398398
add_executable(smoke_draft_graph test/smoke_draft_graph.cpp)

dflash/src/bsa_launcher_hip.cu

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,17 @@ extern "C" int launch_bsa_sparse_flash_forward_bf16(
7272
// Ensure persistent transpose buffers are large enough.
7373
const size_t kv_bytes = (size_t)B * Hk * S * D * 2; // sizeof(bfloat16)=2
7474
if (kv_bytes > kv_buf_cap) {
75-
if (kv_buf_K) hipFree(kv_buf_K);
76-
if (kv_buf_V) hipFree(kv_buf_V);
77-
hipMalloc(&kv_buf_K, kv_bytes);
78-
hipMalloc(&kv_buf_V, kv_bytes);
75+
if (kv_buf_K) { hipFree(kv_buf_K); kv_buf_K = nullptr; }
76+
if (kv_buf_V) { hipFree(kv_buf_V); kv_buf_V = nullptr; }
77+
hipError_t err_k = hipMalloc(&kv_buf_K, kv_bytes);
78+
hipError_t err_v = hipMalloc(&kv_buf_V, kv_bytes);
79+
if (err_k != hipSuccess || err_v != hipSuccess) {
80+
// Roll back: free any partial allocation and reset state.
81+
if (kv_buf_K) { hipFree(kv_buf_K); kv_buf_K = nullptr; }
82+
if (kv_buf_V) { hipFree(kv_buf_V); kv_buf_V = nullptr; }
83+
kv_buf_cap = 0;
84+
return -1;
85+
}
7986
kv_buf_cap = kv_bytes;
8087
}
8188

dflash/src/gguf_target_loader.cpp

Lines changed: 72 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -305,16 +305,59 @@ bool load_target_gguf_partial(const std::string & path,
305305
return false;
306306
}
307307

308+
// Structural invariants required by the graph builder.
309+
if (kl != vl) {
310+
set_last_error("key_length != value_length not supported");
311+
gguf_free(gctx); return false;
312+
}
313+
if (n_layer % fai != 0) {
314+
char buf[128];
315+
std::snprintf(buf, sizeof(buf), "block_count=%u not divisible by full_attention_interval=%u", n_layer, fai);
316+
set_last_error(buf);
317+
gguf_free(gctx); return false;
318+
}
319+
308320
// rope dimension_sections (array of 4 uint32)
309321
int rope_sections[4] = {0, 0, 0, 0};
310322
{
311323
int64_t rid = gguf_find_key(gctx, "qwen35.rope.dimension_sections");
312-
if (rid >= 0) {
313-
size_t n = gguf_get_arr_n(gctx, rid);
314-
if (n >= 4) {
315-
const int32_t * arr = (const int32_t *)gguf_get_arr_data(gctx, rid);
316-
for (int k = 0; k < 4; k++) rope_sections[k] = arr[k];
324+
if (rid < 0) {
325+
set_last_error("missing qwen35.rope.dimension_sections");
326+
gguf_free(gctx); return false;
327+
}
328+
size_t n = gguf_get_arr_n(gctx, rid);
329+
if (n < 4) {
330+
set_last_error("qwen35.rope.dimension_sections has < 4 entries");
331+
gguf_free(gctx); return false;
332+
}
333+
const int32_t * arr = (const int32_t *)gguf_get_arr_data(gctx, rid);
334+
for (int k = 0; k < 4; k++) rope_sections[k] = arr[k];
335+
}
336+
337+
// Validate rope_sections against head_dim. n_rot = 2 * sum(sections) is
338+
// the number of dims rotated by ggml_rope_multi; it must be even, > 0,
339+
// and ≤ head_dim, otherwise rope reads/writes out of bounds.
340+
{
341+
long sum = 0;
342+
for (int k = 0; k < 4; k++) {
343+
if (rope_sections[k] < 0) {
344+
char buf[160];
345+
std::snprintf(buf, sizeof(buf),
346+
"rope_sections[%d]=%d is negative", k, rope_sections[k]);
347+
set_last_error(buf);
348+
gguf_free(gctx); return false;
317349
}
350+
sum += rope_sections[k];
351+
}
352+
const long n_rot = 2 * sum;
353+
if (n_rot <= 0 || n_rot > (long)kl) {
354+
char buf[200];
355+
std::snprintf(buf, sizeof(buf),
356+
"rope_sections {%d,%d,%d,%d} → n_rot=%ld invalid for head_dim=%u",
357+
rope_sections[0], rope_sections[1], rope_sections[2], rope_sections[3],
358+
n_rot, kl);
359+
set_last_error(buf);
360+
gguf_free(gctx); return false;
318361
}
319362
}
320363

@@ -351,6 +394,28 @@ bool load_target_gguf_partial(const std::string & path,
351394
out.rope_dimension_count = (int)get_u32_or(gctx, "qwen35.rope.dimension_count", 64);
352395
out.rope_theta = get_f32_or(gctx, "qwen35.rope.freq_base", 10000000.0f);
353396
out.rms_eps = get_f32_or(gctx, "qwen35.attention.layer_norm_rms_epsilon", 1e-6f);
397+
398+
// EOS token ids from GGUF tokenizer metadata (stored as UINT32 by the
399+
// GGUF spec; we use the u32 helper and cast). UINT32_MAX is the
400+
// missing-key sentinel and maps to int32_t -1, which the runtime EOS
401+
// check rejects via the `>= 0` guard.
402+
{
403+
const uint32_t kEosKeyMissing = 0xFFFFFFFFu;
404+
const uint32_t raw_eos = get_u32_or(gctx, "tokenizer.ggml.eos_token_id", kEosKeyMissing);
405+
const uint32_t raw_eos_chat = get_u32_or(gctx, "tokenizer.ggml.eot_token_id", kEosKeyMissing);
406+
out.eos_id = (raw_eos == kEosKeyMissing) ? -1 : (int32_t)raw_eos;
407+
out.eos_chat_id = (raw_eos_chat == kEosKeyMissing) ? -1 : (int32_t)raw_eos_chat;
408+
std::printf("[loader] eos_id=%d eos_chat_id=%d\n", out.eos_id, out.eos_chat_id);
409+
}
410+
411+
// Compute capture layer IDs: evenly spaced through the target layers.
412+
// step = (n_layer - 2) / (N - 1), ids[k] = 1 + k * step.
413+
{
414+
const int N = DFLASH27B_DRAFT_N_TARGET_LAYERS;
415+
const int step = ((int)n_layer - 2) / (N - 1);
416+
for (int k = 0; k < N; k++) out.capture_layer_ids[k] = 1 + k * step;
417+
}
418+
354419
out.layers.assign((size_t)n_layer, TargetLayer{});
355420

356421
// ── 2. Wire our layer pointers to tensors inside meta_ctx ─────────
@@ -360,8 +425,8 @@ bool load_target_gguf_partial(const std::string & path,
360425
out.tok_embd = g("token_embd.weight");
361426
out.out_norm = g("output_norm.weight");
362427
out.output = g("output.weight");
363-
if (!out.tok_embd || !out.out_norm) {
364-
set_last_error("missing top-level tensors (token_embd/output_norm)");
428+
if (!out.tok_embd || !out.out_norm || !out.output) {
429+
set_last_error("missing top-level tensors (token_embd/output_norm/output)");
365430
gguf_free(gctx);
366431
return false;
367432
}

0 commit comments

Comments
 (0)