server : fix reasoning budget WebUI precedence over model.ini (ggml-org#24517)

ggerganov · web-flow · commit ebc10770ac5a · 2026-06-12T17:59:56.000+03:00
When reasoning-budget is set in model.ini, the per-request
thinking_budget_tokens from the WebUI was ignored because the
model.ini value took unconditional precedence.

Swap the precedence so the WebUI per-request value is checked
first, with the model.ini value serving as a fallback default.

Assisted-by: pi:llama.cpp/Qwen3.6-27B
diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp
@@ -1126,9 +1126,9 @@ json oaicompat_chat_params_parse(
 
     // Reasoning budget: pass parameters through to sampling layer
     {
-        int reasoning_budget = opt.reasoning_budget;
-        if (reasoning_budget == -1 && body.contains("thinking_budget_tokens")) {
-            reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
+        int reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
+        if (reasoning_budget == -1) {
+            reasoning_budget = opt.reasoning_budget;
         }
 
         if (!chat_params.thinking_end_tag.empty()) {

Original file line number	Diff line number	Diff line change
`@@ -1126,9 +1126,9 @@ json oaicompat_chat_params_parse(`
`1126`	`1126`
`1127`	`1127`	`// Reasoning budget: pass parameters through to sampling layer`
`1128`	`1128`	`{`
`1129`		`- int reasoning_budget = opt.reasoning_budget;`
`1130`		`- if (reasoning_budget == -1 && body.contains("thinking_budget_tokens")) {`
`1131`		`- reasoning_budget = json_value(body, "thinking_budget_tokens", -1);`
	`1129`	`+ int reasoning_budget = json_value(body, "thinking_budget_tokens", -1);`
	`1130`	`+ if (reasoning_budget == -1) {`
	`1131`	`+ reasoning_budget = opt.reasoning_budget;`
`1132`	`1132`	`}`
`1133`	`1133`
`1134`	`1134`	`if (!chat_params.thinking_end_tag.empty()) {`