From e934e229e376a3fa027120142fc54aa1859c5e48 Mon Sep 17 00:00:00 2001 From: rxho Date: Mon, 27 Apr 2026 07:10:21 +0800 Subject: [PATCH] gguf : reject empty metadata key names during parsing Fixes #20873: a malformed GGUF file with a zero-length key string triggers GGML_ASSERT(!key.empty()) in the gguf_kv constructor. Add an early check in gguf_init_from_file_ptr that rejects empty keys with an error log, consistent with existing duplicate-key and bad-key-size validation. Three new handcrafted test cases: - KV_EMPTY_KEY: all keys are empty - KV_EMPTY_KEY_ONLY_FIRST: only the first key is empty - KV_EMPTY_KEY_ONLY_MID: a middle key is empty Signed-off-by: rxho --- ggml/src/gguf.cpp | 4 ++++ tests/test-gguf.cpp | 23 +++++++++++++++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index ab3cc974867..accbebcbaf1 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -508,6 +508,10 @@ struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_para GGML_LOG_ERROR("%s: encountered bad_alloc error while reading key %" PRIi64 "\n", __func__, i); ok = false; } + if (ok && key.empty()) { + GGML_LOG_ERROR("%s: empty key name at KV pair %" PRIi64 "\n", __func__, i); + ok = false; + } for (size_t j = 0; ok && j < ctx->kv.size(); ++j) { if (key == ctx->kv[j].key) { GGML_LOG_ERROR("%s: duplicate key '%s' for tensors %zu and %" PRIi64 " \n", __func__, key.c_str(), j, i); diff --git a/tests/test-gguf.cpp b/tests/test-gguf.cpp index ed3070dc4de..4790472d308 100644 --- a/tests/test-gguf.cpp +++ b/tests/test-gguf.cpp @@ -29,6 +29,9 @@ enum handcrafted_file_type { HANDCRAFTED_KV_BAD_TYPE = 20 + offset_has_kv, // HANDCRAFTED_KV_BAD_VALUE_SIZE = 30 + offset_has_kv, // removed because it can result in allocations > 1 TB (default sanitizer limit) HANDCRAFTED_KV_DUPLICATE_KEY = 40 + offset_has_kv, + HANDCRAFTED_KV_EMPTY_KEY = 45 + offset_has_kv, + HANDCRAFTED_KV_EMPTY_KEY_ONLY_FIRST = 46 + offset_has_kv, + HANDCRAFTED_KV_EMPTY_KEY_ONLY_MID = 47 + offset_has_kv, HANDCRAFTED_KV_BAD_ALIGN = 50 + offset_has_kv, HANDCRAFTED_KV_SUCCESS = 800 + offset_has_kv, @@ -66,6 +69,9 @@ static std::string handcrafted_file_type_name(const enum handcrafted_file_type h case HANDCRAFTED_KV_BAD_KEY_SIZE: return "KV_BAD_KEY_SIZE"; case HANDCRAFTED_KV_BAD_TYPE: return "KV_BAD_TYPE"; case HANDCRAFTED_KV_DUPLICATE_KEY: return "KV_DUPLICATE_KEY"; + case HANDCRAFTED_KV_EMPTY_KEY: return "KV_EMPTY_KEY"; + case HANDCRAFTED_KV_EMPTY_KEY_ONLY_FIRST: return "KV_EMPTY_KEY_ONLY_FIRST"; + case HANDCRAFTED_KV_EMPTY_KEY_ONLY_MID: return "KV_EMPTY_KEY_ONLY_MID"; case HANDCRAFTED_KV_BAD_ALIGN: return "KV_BAD_ALIGN"; case HANDCRAFTED_KV_SUCCESS: return "KV_RANDOM_KV"; @@ -250,14 +256,24 @@ static FILE * get_handcrafted_file(const unsigned int seed, const enum handcraft const std::string key = "my_key_" + std::to_string((hft == HANDCRAFTED_KV_DUPLICATE_KEY ? i/2 : i)); - if (hft == HANDCRAFTED_KV_BAD_KEY_SIZE) { + const bool empty_key_this_iter = + hft == HANDCRAFTED_KV_EMPTY_KEY || + (hft == HANDCRAFTED_KV_EMPTY_KEY_ONLY_FIRST && i == 0) || + (hft == HANDCRAFTED_KV_EMPTY_KEY_ONLY_MID && i == int(kv_types.size()) / 2); + + if (empty_key_this_iter) { + const uint64_t n = 0; + helper_write(file, n); + } else if (hft == HANDCRAFTED_KV_BAD_KEY_SIZE) { const uint64_t n = -1; helper_write(file, n); } else { const uint64_t n = key.length(); helper_write(file, n); } - helper_write(file, key.data(), key.length()); + if (!empty_key_this_iter) { + helper_write(file, key.data(), key.length()); + } { const int32_t type32 = int32_t(type); @@ -698,6 +714,9 @@ static std::pair test_handcrafted_file(const unsigned int seed) { HANDCRAFTED_KV_BAD_KEY_SIZE, HANDCRAFTED_KV_BAD_TYPE, HANDCRAFTED_KV_DUPLICATE_KEY, + HANDCRAFTED_KV_EMPTY_KEY, + HANDCRAFTED_KV_EMPTY_KEY_ONLY_FIRST, + HANDCRAFTED_KV_EMPTY_KEY_ONLY_MID, HANDCRAFTED_KV_BAD_ALIGN, HANDCRAFTED_KV_SUCCESS,