fix: tool-call JSON leaks into content with stream+tools on tokenizer-template models (#10052) (#10057)

localai-bot · mudler · web-flow · commit 73cfedc0238e · 2026-05-29T10:12:53.000+02:00
* fix(grammars): honor properties_order entry at index 0 The JSON-schema-to-GBNF property sort used `aOrder != 0 && bOrder != 0` as its "is this key ordered?" guard. That treats index 0 — the first key listed in properties_order — as unset, so `properties_order: name,arguments` fell back to alphabetical ordering and still emitted "arguments" before "name". Use presence in the order map instead: listed keys sort by their index and ahead of unlisted keys, which keep a stable alphabetical order. This makes the documented `properties_order: name,arguments` actually produce name-first tool-call JSON. Relates to #10052. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] * fix(functions): defer tool grammar to the backend when the tokenizer template owns templating (#10052) When use_tokenizer_template delegates templating to the backend (llama.cpp), the backend also owns tool-call grammar generation and parsing. LocalAI was still generating its own GBNF grammar and sending it down. With a grammar present, llama.cpp does not hand the tools to its template, so its native peg/json tool parser never engages: it streams the grammar-constrained tool-call JSON back as plain content instead of emitting tool_calls. In streaming mode the JSON object leaked into the content field, and the Go-side incremental detector never gated content because the LocalAI-generated grammar emitted "arguments" before "name". The GGUF auto-import path already couples use_tokenizer_template with grammar.disable, but that block is skipped when a template is already configured, so gallery and hand-written configs (e.g. qwen3) that set the tokenizer template directly never got the paired grammar.disable. - SetDefaults now enforces the coupling for every config: when use_tokenizer_template is set, grammar generation is disabled and tools flow to the backend's native (name-first) pipeline. This also fixes already-installed models without editing each config. - Set function.grammar.disable in the shared gallery/qwen3.yaml, which is the base config referenced by every qwen3 gallery entry. Verified end to end against qwen3-4b with stream:true + tools: content no longer carries the tool-call JSON, reasoning is classified separately, and tool calls stream as proper name-first tool_calls deltas. Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Assisted-by: Claude:claude-opus-4-8 [Claude Code] --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
diff --git a/core/config/model_config.go b/core/config/model_config.go
@@ -732,6 +732,17 @@ func (cfg *ModelConfig) SetDefaults(opts ...ConfigLoaderOption) {
 		cfg.Proxy.Mode = ProxyModePassthrough
 	}
 
+	// When templating is delegated to the backend (use_tokenizer_template),
+	// the backend also owns tool-call grammar generation and parsing. Sending
+	// a LocalAI-generated grammar alongside overrides the backend's native
+	// (name-first) tool pipeline and makes it stream the tool-call JSON back as
+	// plain content (issue #10052). The GGUF auto-import path already couples
+	// these two flags; enforce it here so gallery and hand-written configs that
+	// set use_tokenizer_template directly stay consistent.
+	if cfg.TemplateConfig.UseTokenizerTemplate {
+		cfg.FunctionsConfig.GrammarConfig.NoGrammar = true
+	}
+
 	// Apply model-family-specific inference defaults before generic fallbacks.
 	// This ensures gallery-installed and runtime-loaded models get optimal parameters.
 	ApplyInferenceDefaults(cfg, cfg.Name, cfg.Model)
diff --git a/core/config/model_config_test.go b/core/config/model_config_test.go
@@ -471,4 +471,33 @@ concurrency_groups:
 			Expect(configs[0].GetConcurrencyGroups()).To(Equal([]string{"vram-heavy", "120b"}))
 		})
 	})
+
+	// When templating is delegated to the backend (use_tokenizer_template),
+	// the backend also owns tool-call grammar generation and parsing. A
+	// LocalAI-generated grammar sent alongside would override the backend's
+	// native (name-first) tool pipeline and make it stream the tool-call JSON
+	// back as plain content (issue #10052). SetDefaults must therefore couple
+	// the two: tokenizer template implies grammar generation is disabled.
+	Context("use_tokenizer_template couples with grammar disable (issue #10052)", func() {
+		It("disables Go grammar generation when the tokenizer template is used", func() {
+			cfg := &ModelConfig{
+				TemplateConfig: TemplateConfig{UseTokenizerTemplate: true},
+			}
+			Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse())
+
+			cfg.SetDefaults()
+
+			Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeTrue(),
+				"use_tokenizer_template must imply grammar.disable so tools go to the backend's native pipeline")
+		})
+
+		It("leaves grammar generation enabled when the tokenizer template is not used", func() {
+			cfg := &ModelConfig{}
+
+			cfg.SetDefaults()
+
+			Expect(cfg.FunctionsConfig.GrammarConfig.NoGrammar).To(BeFalse(),
+				"models that template in Go still rely on the Go-generated grammar")
+		})
+	})
 })
diff --git a/gallery/qwen3.yaml b/gallery/qwen3.yaml
@@ -17,6 +17,13 @@ config_file: |
     # "pure content" PEG parser that leaks reasoning tags into content.
     options:
         - use_jinja:true
+    # With use_tokenizer_template the backend (llama.cpp) owns tool-call
+    # grammar generation and parsing too. Disabling LocalAI's own grammar lets
+    # llama.cpp's native name-first tool pipeline run; otherwise the generated
+    # grammar overrides it and the tool-call JSON leaks into content (#10052).
+    function:
+        grammar:
+            disable: true
     template:
         use_tokenizer_template: true
 name: qwen3
diff --git a/pkg/functions/grammars/json_schema.go b/pkg/functions/grammars/json_schema.go
@@ -155,12 +155,22 @@ func (sc *JSONSchemaConverter) visit(schema map[string]any, name string, rootSch
 			propName   string
 			propSchema map[string]any
 		}) int {
-			aOrder := propOrder[a.propName]
-			bOrder := propOrder[b.propName]
-			if aOrder != 0 && bOrder != 0 {
+			// Use presence in the order map (not a non-zero sentinel) so that
+			// the first listed key — index 0 — is honored. Keys present in
+			// properties_order sort by their index and ahead of any key that
+			// isn't listed; unlisted keys keep a stable alphabetical order.
+			aOrder, aOK := propOrder[a.propName]
+			bOrder, bOK := propOrder[b.propName]
+			switch {
+			case aOK && bOK:
 				return cmp.Compare(aOrder, bOrder)
+			case aOK:
+				return -1
+			case bOK:
+				return 1
+			default:
+				return cmp.Compare(a.propName, b.propName)
 			}
-			return cmp.Compare(a.propName, b.propName)
 		})
 
 		var rule strings.Builder
diff --git a/pkg/functions/grammars/json_schema_test.go b/pkg/functions/grammars/json_schema_test.go
@@ -547,3 +547,61 @@ realvalue
 		})
 	})
 })
+
+var _ = Describe("JSON schema property ordering (issue #10052)", func() {
+	// A function-call shaped schema. The grammar must honor the configured
+	// properties_order. Before the fix, the sort guard `aOrder != 0 && bOrder != 0`
+	// treated the first listed key (index 0) as "unset" and fell back to
+	// alphabetical order, so "arguments" was emitted before "name" even when
+	// properties_order put name first.
+	const schema = `{
+		"type": "object",
+		"properties": {
+			"name": {"type": "string"},
+			"arguments": {"type": "object", "properties": {"cmd": {"type": "string"}}}
+		}
+	}`
+
+	// keyIndex finds the position of an object-key literal (escaped as \"key\"
+	// in GBNF), which only appears where the key is emitted in the rule — not
+	// in derived rule names like root-name.
+	keyIndex := func(grammar, key string) int {
+		return strings.Index(grammar, `\"`+key+`\"`)
+	}
+
+	It("honors properties_order with name listed first (index 0)", func() {
+		grammar, err := NewJSONSchemaConverter("name,arguments").GrammarFromBytes([]byte(schema))
+		Expect(err).To(BeNil())
+		ni := keyIndex(grammar, "name")
+		ai := keyIndex(grammar, "arguments")
+		Expect(ni).To(BeNumerically(">=", 0))
+		Expect(ai).To(BeNumerically(">=", 0))
+		Expect(ni).To(BeNumerically("<", ai),
+			"properties_order lists name first, so the grammar must emit \"name\" before \"arguments\"")
+	})
+
+	It("keeps alphabetical order when properties_order is empty", func() {
+		grammar, err := NewJSONSchemaConverter("").GrammarFromBytes([]byte(schema))
+		Expect(err).To(BeNil())
+		// No explicit order: keys fall back to alphabetical, so "arguments"
+		// precedes "name". This is the documented default and must not change.
+		Expect(keyIndex(grammar, "arguments")).To(BeNumerically("<", keyIndex(grammar, "name")))
+	})
+
+	It("sorts keys present in properties_order ahead of unlisted keys", func() {
+		const schemaWithExtra = `{
+			"type": "object",
+			"properties": {
+				"name": {"type": "string"},
+				"arguments": {"type": "object", "properties": {"cmd": {"type": "string"}}},
+				"aaa_unlisted": {"type": "string"}
+			}
+		}`
+		// "aaa_unlisted" is alphabetically first but not in the order list, so
+		// it must still come after the listed name/arguments keys.
+		grammar, err := NewJSONSchemaConverter("name,arguments").GrammarFromBytes([]byte(schemaWithExtra))
+		Expect(err).To(BeNil())
+		Expect(keyIndex(grammar, "name")).To(BeNumerically("<", keyIndex(grammar, "arguments")))
+		Expect(keyIndex(grammar, "arguments")).To(BeNumerically("<", keyIndex(grammar, "aaa_unlisted")))
+	})
+})