Skip to content

Commit 3e22037

Browse files
localai-botmudler
andauthored
fix(functions): validate auto-detected XML tool-call names — robust glm-4.5/Hermes guard (#9722, supersedes #9940) (#10059)
fix(functions): validate auto-detected XML tool-call names (#9722) The XML tool-call auto-detector tries every preset, including glm-4.5 whose tool block is <tool_call>name...</tool_call>. When a Hermes/NousResearch model emits <tool_call>{"name":"bash","arguments":{...}}</tool_call>, glm-4.5 mis-claims the block and returns the entire JSON object (or leading prose, or a JSON array) as the function NAME. The misparse then wins over the JSON parser, so streaming clients receive a tool call whose name is a JSON blob. Guard the auto-detect paths in ParseXMLIterative: a returned tool name must look like a real function name ([A-Za-z0-9_.-]+). Results that don't are dropped so auto-detection falls through to the next format and ultimately to JSON parsing, which handles Hermes correctly. An explicitly forced format (format != nil) is left untouched and trusted verbatim. This supersedes PR #9940, which dropped only names with a leading "{". That narrower check misses leading prose ("Sure: {...}"), JSON arrays ("[{...}]") and brace-less garbage ("name: bash, ..."); the name-shape check rejects all of them while still accepting legitimate glm-4.5 calls. The fix applies to both the streaming worker and the non-streaming ParseFunctionCall path, which both call ParseXMLIterative with auto-detection. Assisted-by: Claude:claude-opus-4-8 [Claude Code] Signed-off-by: Ettore Di Giacinto <mudler@localai.io> Co-authored-by: Ettore Di Giacinto <mudler@localai.io>
1 parent fbcd886 commit 3e22037

2 files changed

Lines changed: 100 additions & 4 deletions

File tree

pkg/functions/parse.go

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,36 @@ func buildContent(before string, parser *ChatMsgParser) string {
628628
// This provides better streaming and partial parsing support.
629629
// When format is nil or when format is set, tries "find scope/tool start, split, parse suffix"
630630
// first (llama.cpp PEG order) so that content before the tool block does not cause parse failure.
631+
// validToolNameRe matches a plausible function name. OpenAI tool names are
632+
// limited to letters, digits, underscores and hyphens; dots appear in some
633+
// providers' namespaced names. Anything else (whitespace, braces, brackets,
634+
// quotes, colons) signals the XML auto-detector grabbed a JSON blob or prose
635+
// rather than a real name.
636+
var validToolNameRe = regexp.MustCompile(`^[A-Za-z0-9_.\-]+$`)
637+
638+
// plausibleToolName reports whether name looks like a real function name.
639+
func plausibleToolName(name string) bool {
640+
return validToolNameRe.MatchString(strings.TrimSpace(name))
641+
}
642+
643+
// filterPlausibleToolCalls drops auto-detected tool calls whose name is not a
644+
// plausible function name. This guards against a format (notably glm-4.5, whose
645+
// tool block is <tool_call>name...</tool_call>) mis-claiming a Hermes-style
646+
// <tool_call>JSON</tool_call> block and returning the whole JSON object — or
647+
// any leading prose / array — as the function name. Dropping the misparse lets
648+
// auto-detection fall through to the next format and ultimately to JSON
649+
// parsing, which handles Hermes correctly. Replaces the narrower leading-"{"
650+
// check (PR #9940); see issue #9722.
651+
func filterPlausibleToolCalls(calls []FuncCallResults) []FuncCallResults {
652+
out := calls[:0:0]
653+
for _, c := range calls {
654+
if plausibleToolName(c.Name) {
655+
out = append(out, c)
656+
}
657+
}
658+
return out
659+
}
660+
631661
func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]FuncCallResults, error) {
632662
// Try split-on-scope first so reasoning/content before tool block is skipped
633663
if format != nil {
@@ -639,7 +669,12 @@ func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]F
639669
for _, fmtPreset := range formats {
640670
if fmtPreset.format != nil {
641671
if pr, ok := tryParseXMLFromScopeStart(s, fmtPreset.format, isPartial); ok {
642-
return pr.ToolCalls, nil
672+
// Auto-detect: discard misparsed (non-name) results so a
673+
// format that grabbed a JSON blob doesn't win; fall through
674+
// to the next format.
675+
if valid := filterPlausibleToolCalls(pr.ToolCalls); len(valid) > 0 {
676+
return valid, nil
677+
}
643678
}
644679
}
645680
}
@@ -659,14 +694,19 @@ func ParseXMLIterative(s string, format *XMLToolCallFormat, isPartial bool) ([]F
659694
if err != nil {
660695
// Check if it's a partial exception (recoverable)
661696
if _, ok := err.(*ChatMsgPartialException); ok {
662-
// Partial parse, return what we have
663-
return parser.ToolCalls(), nil
697+
// Partial parse, return what we have — unless every
698+
// result is a misparse, in which case try the next format.
699+
if valid := filterPlausibleToolCalls(parser.ToolCalls()); len(valid) > 0 {
700+
return valid, nil
701+
}
664702
}
665703
// Try next format
666704
continue
667705
}
668706
if success && len(parser.ToolCalls()) > 0 {
669-
return parser.ToolCalls(), nil
707+
if valid := filterPlausibleToolCalls(parser.ToolCalls()); len(valid) > 0 {
708+
return valid, nil
709+
}
670710
}
671711
}
672712
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package functions
2+
3+
import (
4+
"regexp"
5+
6+
. "github.com/onsi/ginkgo/v2"
7+
. "github.com/onsi/gomega"
8+
)
9+
10+
// Robust fix for the glm-4.5 XML auto-detect false positive (relates to #9722
11+
// / supersedes the brittle leading-"{" filter in #9940). When the XML
12+
// auto-detector mis-identifies a Hermes-style <tool_call>JSON</tool_call> block
13+
// as glm-4.5, it extracts the block body as the function NAME. A real function
14+
// name is [A-Za-z0-9_.-]+; anything with braces, brackets, whitespace, quotes
15+
// or colons is a misparse and must not be returned (so JSON parsing can take
16+
// over). This is stronger than checking only for a leading "{": it also rejects
17+
// leading prose, JSON arrays, and brace-less garbage.
18+
var _ = Describe("glm-4.5 auto-detect name validation (#9722/#9940)", func() {
19+
// plausibleName mirrors the contract: a returned auto-detected tool name
20+
// must look like a real function name.
21+
plausible := regexp.MustCompile(`^[A-Za-z0-9_.\-]+$`)
22+
23+
DescribeTable("auto-detect must not emit a misparsed tool name",
24+
func(input string) {
25+
results, err := ParseXMLIterative(input, nil, false)
26+
Expect(err).ToNot(HaveOccurred())
27+
for _, r := range results {
28+
Expect(plausible.MatchString(r.Name)).To(BeTrue(),
29+
"auto-detected XML tool name must look like a function name, got: %q", r.Name)
30+
}
31+
},
32+
Entry("canonical Hermes JSON", "<tool_call>\n{\"name\": \"bash\", \"arguments\": {\"script\": \"ls\"}}\n</tool_call>"),
33+
Entry("leading prose then JSON", "<tool_call>\nSure: {\"name\": \"bash\", \"arguments\": {\"script\": \"ls\"}}\n</tool_call>"),
34+
Entry("JSON array (parallel calls)", "<tool_call>\n[{\"name\": \"bash\", \"arguments\": {}}]\n</tool_call>"),
35+
Entry("brace-less garbage", "<tool_call>\nname: bash, arguments: {}\n</tool_call>"),
36+
)
37+
38+
// No-regression: a genuine glm-4.5 tool call must still be auto-detected.
39+
It("still parses a legitimate glm-4.5 tool call", func() {
40+
legit := "<tool_call>get_weather\n<arg_key>city</arg_key>\n<arg_value>NYC</arg_value>\n</tool_call>"
41+
results, err := ParseXMLIterative(legit, nil, false)
42+
Expect(err).ToNot(HaveOccurred())
43+
Expect(results).To(HaveLen(1))
44+
Expect(results[0].Name).To(Equal("get_weather"))
45+
})
46+
47+
// A user who explicitly forces the glm-4.5 format keeps the raw behaviour
48+
// (no name filtering) — only auto-detection is guarded.
49+
It("does not filter when the glm-4.5 format is explicitly forced", func() {
50+
input := "<tool_call>\n{\"name\": \"bash\", \"arguments\": {}}\n</tool_call>"
51+
forced, err := ParseXMLIterative(input, GetXMLFormatPreset("glm-4.5"), false)
52+
Expect(err).ToNot(HaveOccurred())
53+
Expect(forced).ToNot(BeEmpty(),
54+
"explicit format must be trusted verbatim, even if it yields a JSON-blob name")
55+
})
56+
})

0 commit comments

Comments
 (0)