Skip to content

Commit e397d38

Browse files
authored
common/json-schema: fix: handle non-capturing groups (?:...) in JSON schema pattern converter (#21124)
The regex-to-grammar converter in _visit_pattern() crashes with SIGSEGV when a JSON schema "pattern" field contains a non-capturing group (?:...). Root cause: when the parser sees '(' followed by '?', it pushes a warning but does not advance past '?:'. The recursive transform() call then interprets '?' as a quantifier and calls seq.back() on an empty vector, causing undefined behavior. This commonly occurs when serving OpenAI-compatible tool calls from clients that include complex regex patterns in their JSON schemas (e.g., date validation patterns like ^(?:(?:\d\d[2468][048]|...)-02-29|...)$). The fix: - Skip '?:' after '(' to treat non-capturing groups as regular groups - For unsupported syntax (?=, ?!, etc.), skip to matching ')' safely, handling escaped characters to avoid miscounting parenthesis depth - Adjust the ')' unbalanced-parentheses check using direct char comparisons instead of substr - Add test cases for non-capturing groups (C++ only, as the JS/Python implementations do not yet support this syntax)
1 parent e6f2ec0 commit e397d38

2 files changed

Lines changed: 59 additions & 3 deletions

File tree

common/json-schema-to-grammar.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -416,15 +416,30 @@ class common_schema_converter {
416416
i++;
417417
} else if (c == '(') {
418418
i++;
419-
if (i < length) {
420-
if (sub_pattern[i] == '?') {
419+
if (i < length && sub_pattern[i] == '?') {
420+
if (i + 1 < length && sub_pattern[i + 1] == ':') {
421+
i += 2; // skip "?:" for non-capturing group, treat as regular group
422+
} else {
423+
// lookahead/lookbehind (?=, ?!, ?<=, ?<!) - not supported
421424
_warnings.push_back("Unsupported pattern syntax");
425+
// skip to matching ')' to avoid UB on empty seq
426+
int depth = 1;
427+
while (i < length && depth > 0) {
428+
if (sub_pattern[i] == '\\' && i + 1 < length) {
429+
i += 2; // skip escaped character
430+
} else {
431+
if (sub_pattern[i] == '(') depth++;
432+
else if (sub_pattern[i] == ')') depth--;
433+
i++;
434+
}
435+
}
436+
continue;
422437
}
423438
}
424439
seq.emplace_back("(" + to_rule(transform()) + ")", false);
425440
} else if (c == ')') {
426441
i++;
427-
if (start > 0 && sub_pattern[start - 1] != '(') {
442+
if (start > 0 && sub_pattern[start - 1] != '(' && (start < 2 || sub_pattern[start - 2] != '?' || sub_pattern[start - 1] != ':')) {
428443
_errors.push_back("Unbalanced parentheses");
429444
}
430445
return join_seq();

tests/test-json-schema-to-grammar.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,47 @@ int main() {
15251525
}
15261526
});
15271527

1528+
// C++ only tests (features not yet supported in JS/Python implementations)
1529+
{
1530+
fprintf(stderr, "#\n# Testing C++ only features\n#\n");
1531+
auto run = [](const TestCase & tc) {
1532+
fprintf(stderr, "- %s\n", tc.name.c_str());
1533+
try {
1534+
tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
1535+
tc.verify_status(SUCCESS);
1536+
} catch (const std::invalid_argument & ex) {
1537+
fprintf(stderr, "Error: %s\n", ex.what());
1538+
tc.verify_status(FAILURE);
1539+
}
1540+
};
1541+
1542+
run({
1543+
SUCCESS,
1544+
"regexp with non-capturing group",
1545+
R"""({
1546+
"type": "string",
1547+
"pattern": "^(?:foo|bar)baz$"
1548+
})""",
1549+
R"""(
1550+
root ::= "\"" (("foo" | "bar") "baz") "\"" space
1551+
space ::= | " " | "\n"{1,2} [ \t]{0,20}
1552+
)""",
1553+
});
1554+
1555+
run({
1556+
SUCCESS,
1557+
"regexp with nested non-capturing groups",
1558+
R"""({
1559+
"type": "string",
1560+
"pattern": "^(?:(?:ab)+c)?d$"
1561+
})""",
1562+
R"""(
1563+
root ::= "\"" ((("ab")+ "c")? "d") "\"" space
1564+
space ::= | " " | "\n"{1,2} [ \t]{0,20}
1565+
)""",
1566+
});
1567+
}
1568+
15281569
if (getenv("LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR")) {
15291570
fprintf(stderr, "\033[33mWARNING: Skipping slow tests on emulator.\n\033[0m");
15301571
} else {

0 commit comments

Comments
 (0)