server : do not create checkpoints right after mtmd chunks (ggml-org#20232)

ggerganov · web-flow · commit d417bc43dd29 · 2026-03-08T22:16:46.000+02:00
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
@@ -2438,6 +2438,8 @@ struct server_context_impl {
                         slot.n_prompt_tokens_cache = 0;
                     }
 
+                    bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
+
                     // check if we should process the image
                     if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
                         // process the image
@@ -2457,6 +2459,8 @@ struct server_context_impl {
                             const auto & chunk = input_tokens.find_chunk(slot.prompt.n_tokens());
                             slot.prompt.tokens.push_back(chunk.get()); // copy
                         }
+
+                        do_checkpoint = false; // do not checkpoint right after an image chunk
                     }
 
                     // If using an alora, there may be uncached tokens that come
@@ -2473,8 +2477,6 @@ struct server_context_impl {
                         alora_disabled_id = enabled_loras[0];
                     }
 
-                    bool do_checkpoint = params_base.n_ctx_checkpoints > 0;
-
                     // make checkpoints only for completion tasks
                     do_checkpoint = do_checkpoint && slot.task->type == SERVER_TASK_TYPE_COMPLETION;
 

Original file line number	Diff line number	Diff line change
`@@ -2438,6 +2438,8 @@ struct server_context_impl {`
`2438`	`2438`	`slot.n_prompt_tokens_cache = 0;`
`2439`	`2439`	`}`
`2440`	`2440`
	`2441`	`+ bool do_checkpoint = params_base.n_ctx_checkpoints > 0;`
	`2442`	`+`
`2441`	`2443`	`// check if we should process the image`
`2442`	`2444`	`if (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {`
`2443`	`2445`	`// process the image`
`@@ -2457,6 +2459,8 @@ struct server_context_impl {`
`2457`	`2459`	`const auto & chunk = input_tokens.find_chunk(slot.prompt.n_tokens());`
`2458`	`2460`	`slot.prompt.tokens.push_back(chunk.get()); // copy`
`2459`	`2461`	`}`
	`2462`	`+`
	`2463`	`+ do_checkpoint = false; // do not checkpoint right after an image chunk`
`2460`	`2464`	`}`
`2461`	`2465`
`2462`	`2466`	`// If using an alora, there may be uncached tokens that come`
`@@ -2473,8 +2477,6 @@ struct server_context_impl {`
`2473`	`2477`	`alora_disabled_id = enabled_loras[0];`
`2474`	`2478`	`}`
`2475`	`2479`
`2476`		`- bool do_checkpoint = params_base.n_ctx_checkpoints > 0;`
`2477`		`-`
`2478`	`2480`	`// make checkpoints only for completion tasks`
`2479`	`2481`	`do_checkpoint = do_checkpoint && slot.task->type == SERVER_TASK_TYPE_COMPLETION;`
`2480`	`2482`