fix(imatrix): null mtp_batch.token after free to prevent double-free

jimbothigpen · claude · jimbothigpen · commit 3f1deb1b237b · 2026-05-30T10:37:25.000-04:00
llama_batch_free() checks and frees batch.token if non-null. Since we manually allocate mtp_batch.token (llama_batch_init with n_embd>0 only allocates embd, not token), we must null the pointer after our explicit free() in all three cleanup paths before calling llama_batch_free(). Bug present in upstream ggml-org#23476 as well; added here as fork-specific fix. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp
@@ -1101,6 +1101,7 @@ static bool compute_imatrix(llama_context * ctx, llama_context * ctx_mtp, const
                 llama_batch_free(batch);
                 if (mtp_enabled) {
                     free(mtp_batch.token);
+                    mtp_batch.token = nullptr;
                     llama_batch_free(mtp_batch);
                 }
                 return false;
@@ -1115,6 +1116,7 @@ static bool compute_imatrix(llama_context * ctx, llama_context * ctx_mtp, const
                                          pos_first, n_embd, /*seq_id=*/0, mtp_batch)) {
                     llama_batch_free(batch);
                     free(mtp_batch.token);
+                    mtp_batch.token = nullptr;
                     llama_batch_free(mtp_batch);
                     return false;
                 }
@@ -1181,6 +1183,7 @@ static bool compute_imatrix(llama_context * ctx, llama_context * ctx_mtp, const
     llama_batch_free(batch);
     if (mtp_enabled) {
         free(mtp_batch.token);
+        mtp_batch.token = nullptr;
         llama_batch_free(mtp_batch);
     }