Skip to content

Commit 7924a6e

Browse files
committed
server: filter partial tool call streams
Expose stable tool-call headers early for client UX, but quarantine partial arguments and malformed raw tool markers until the parser sees a complete call. Accept direct tag-style function starts such as <function=...> for Qwen-style tool calls and include structural markers in lazy grammar triggers. Fixes #6
1 parent 82ecad0 commit 7924a6e

3 files changed

Lines changed: 305 additions & 10 deletions

File tree

common/chat-auto-parser-generator.cpp

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313

1414
using json = nlohmann::ordered_json;
1515

16+
static bool is_structural_tool_marker(const std::string & marker) {
17+
return !marker.empty() && (marker[0] == '<' || marker[0] == '[');
18+
}
19+
1620
// Helper to iterate over tools/functions
1721
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
1822
for (const auto & tool : tools) {
@@ -76,11 +80,30 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
7680
parser.build_grammar(builder, data.grammar_lazy);
7781
});
7882

79-
// Set grammar triggers based on tool section markers (fall back to per-call markers)
83+
// Set grammar triggers based on tool section markers (fall back to per-call markers).
84+
// Some tag-style models occasionally skip the outer per-call wrapper and start
85+
// directly at the function marker; trigger there too if it is a structural marker.
8086
if (data.grammar_lazy) {
81-
data.grammar_triggers = {
82-
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
87+
std::vector<std::string> trigger_markers = {
88+
autoparser.tools.format.section_start,
89+
autoparser.tools.format.per_call_start,
8390
};
91+
if (is_structural_tool_marker(autoparser.tools.function.name_prefix)) {
92+
trigger_markers.push_back(autoparser.tools.function.name_prefix);
93+
}
94+
95+
for (const std::string & marker : trigger_markers) {
96+
if (marker.empty()) {
97+
continue;
98+
}
99+
bool exists = false;
100+
for (const auto & trigger : data.grammar_triggers) {
101+
exists = exists || trigger.value == marker;
102+
}
103+
if (!exists) {
104+
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, marker });
105+
}
106+
}
84107
}
85108
}
86109

@@ -440,12 +463,16 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
440463

441464
common_peg_parser tool_calls = p.eps();
442465

466+
const bool allow_direct_func_start =
467+
is_structural_tool_marker(function.name_prefix) && !function.close.empty();
468+
443469
if (!format.per_call_start.empty()) {
444470
auto wrapped_call = format.per_call_start + p.space() + tool_choice + p.space() + format.per_call_end;
471+
auto single_call = allow_direct_func_start ? (wrapped_call | tool_choice) : wrapped_call;
445472
if (inputs.parallel_tool_calls) {
446-
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.zero_or_more(p.space() + wrapped_call) + p.space());
473+
tool_calls = p.trigger_rule("tool-call", single_call + p.zero_or_more(p.space() + single_call) + p.space());
447474
} else {
448-
tool_calls = p.trigger_rule("tool-call", wrapped_call + p.space());
475+
tool_calls = p.trigger_rule("tool-call", single_call + p.space());
449476
}
450477
if (!format.section_start.empty()) {
451478
tool_calls = p.trigger_rule("tool-calls",
@@ -469,8 +496,17 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
469496
tool_calls = p.optional(tool_calls);
470497
}
471498

472-
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
473-
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
499+
std::vector<std::string> trigger_markers;
500+
if (!format.section_start.empty()) {
501+
trigger_markers.push_back(format.section_start);
502+
}
503+
if (!format.per_call_start.empty()) {
504+
trigger_markers.push_back(format.per_call_start);
505+
}
506+
if (allow_direct_func_start) {
507+
trigger_markers.push_back(function.name_prefix);
508+
}
509+
auto content_before_tools = trigger_markers.empty() ? p.eps() : p.until_one_of(trigger_markers);
474510
return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
475511
p.end();
476512
}

tools/server/server-task.cpp

Lines changed: 261 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,252 @@ json task_params::to_json(bool only_metrics) const {
244244
//
245245
// task_result_state
246246
//
247+
static bool task_result_has_explicit_tool_call_marker(const std::string & text);
248+
249+
static bool task_result_has_complete_partial_tool_calls(
250+
const std::string & generated_text,
251+
const common_chat_msg & msg) {
252+
if (msg.tool_calls.empty()) {
253+
return false;
254+
}
255+
256+
for (const auto & tc : msg.tool_calls) {
257+
if (tc.name.empty() || tc.arguments.empty()) {
258+
return false;
259+
}
260+
}
261+
262+
const size_t end = generated_text.find_last_not_of(" \t\r\n");
263+
if (end == std::string::npos) {
264+
return false;
265+
}
266+
267+
const size_t start = end > 512 ? end - 512 : 0;
268+
const std::string tail = generated_text.substr(start, end - start + 1);
269+
270+
static const char * closing_markers[] = {
271+
"</tool_call>",
272+
"</function>",
273+
"</seed:tool_call>",
274+
"</minimax:tool_call>",
275+
"</TOOLCALL>",
276+
"<|tool_call_end|>",
277+
"<|tool_calls_section_end|>",
278+
"<tool_call|>",
279+
};
280+
for (const char * marker : closing_markers) {
281+
if (tail.find(marker) != std::string::npos) {
282+
return true;
283+
}
284+
}
285+
286+
// Check full generated_text, not only tail. A long argument can push the
287+
// opening marker outside the 512-byte tail window. In tagged formats,
288+
// JSON/object closure alone is not completion.
289+
if (task_result_has_explicit_tool_call_marker(generated_text)) {
290+
return false;
291+
}
292+
293+
const char last = generated_text[end];
294+
return last == '}' || last == ']' || last == ')';
295+
}
296+
297+
static bool task_result_pos_is_in_code_fence(
298+
const std::string & text,
299+
size_t pos) {
300+
bool in_fence = false;
301+
size_t search = 0;
302+
while (search < pos) {
303+
const size_t fence = text.find("```", search);
304+
if (fence == std::string::npos || fence >= pos) {
305+
break;
306+
}
307+
in_fence = !in_fence;
308+
search = fence + 3;
309+
}
310+
return in_fence;
311+
}
312+
313+
static bool task_result_raw_tool_marker_has_boundary(
314+
const std::string & text,
315+
size_t pos) {
316+
if (pos == 0) {
317+
return true;
318+
}
319+
320+
const char prev = text[pos - 1];
321+
return prev == '\n' || prev == '\r' || prev == '\t' || prev == ' ' || prev == '>';
322+
}
323+
324+
static size_t task_result_find_raw_tool_marker(
325+
const std::string & text,
326+
size_t search_from) {
327+
static const char * markers[] = {
328+
"<tool_call",
329+
"<function=",
330+
"<parameter=",
331+
"<|tool_calls_section_begin|>",
332+
"<|tool_call_begin|>",
333+
"<|tool_call_start|>",
334+
"<seed:tool_call",
335+
"<minimax:tool_call",
336+
"<TOOLCALL",
337+
};
338+
339+
size_t best = std::string::npos;
340+
for (const char * marker : markers) {
341+
size_t pos = text.find(marker, search_from);
342+
while (pos != std::string::npos) {
343+
if (task_result_raw_tool_marker_has_boundary(text, pos) &&
344+
!task_result_pos_is_in_code_fence(text, pos)) {
345+
best = best == std::string::npos ? pos : std::min(best, pos);
346+
break;
347+
}
348+
pos = text.find(marker, pos + 1);
349+
}
350+
}
351+
352+
return best;
353+
}
354+
355+
static bool task_result_has_explicit_tool_call_marker(const std::string & text) {
356+
return task_result_find_raw_tool_marker(text, 0) != std::string::npos;
357+
}
358+
359+
static bool task_result_starts_with_raw_tool_marker(const std::string & text) {
360+
const size_t marker = task_result_find_raw_tool_marker(text, 0);
361+
if (marker == std::string::npos) {
362+
return false;
363+
}
364+
365+
const size_t first = text.find_first_not_of(" \t\r\n");
366+
return first != std::string::npos && marker == first;
367+
}
368+
369+
static void task_result_quarantine_raw_tool_text_field(
370+
std::string & text,
371+
const std::string & previous) {
372+
if (text.size() <= previous.size()) {
373+
return;
374+
}
375+
376+
const size_t search_from = previous.size() > 64 ? previous.size() - 64 : 0;
377+
const size_t marker = task_result_find_raw_tool_marker(text, search_from);
378+
if (marker == std::string::npos) {
379+
return;
380+
}
381+
382+
// Keep all text that was already streamed. If a malformed raw tool marker
383+
// appears in newly parsed content/reasoning, quarantine it until it becomes
384+
// a parsed tool call instead of visible assistant text.
385+
if (marker < previous.size()) {
386+
text = previous;
387+
} else {
388+
text.resize(marker);
389+
}
390+
}
391+
392+
static void task_result_quarantine_raw_tool_text(
393+
common_chat_msg & new_msg,
394+
const common_chat_msg & msg_prv) {
395+
if (new_msg.tool_calls.size() > msg_prv.tool_calls.size()) {
396+
return;
397+
}
398+
399+
task_result_quarantine_raw_tool_text_field(new_msg.content, msg_prv.content);
400+
task_result_quarantine_raw_tool_text_field(new_msg.reasoning_content, msg_prv.reasoning_content);
401+
}
402+
403+
static void task_result_freeze_text_fields(
404+
common_chat_msg & new_msg,
405+
const common_chat_msg & msg_prv) {
406+
new_msg.content = msg_prv.content;
407+
new_msg.content_parts = msg_prv.content_parts;
408+
new_msg.reasoning_content = msg_prv.reasoning_content;
409+
}
410+
411+
static bool task_result_has_marker_after_anchor(
412+
const std::string & generated_text,
413+
const std::string & anchor) {
414+
if (anchor.empty()) {
415+
return false;
416+
}
417+
418+
const size_t pos = generated_text.rfind(anchor);
419+
if (pos == std::string::npos) {
420+
return false;
421+
}
422+
423+
const size_t tail_len = std::min<size_t>(generated_text.size() - pos, 512);
424+
const std::string tail = generated_text.substr(pos, tail_len);
425+
426+
static const char * argument_markers[] = {
427+
"<|tool_call_argument_begin|>",
428+
"<parameter=",
429+
"\"arguments\"",
430+
"'arguments'",
431+
"arguments:",
432+
"arguments=",
433+
};
434+
for (const char * marker : argument_markers) {
435+
if (tail.find(marker) != std::string::npos) {
436+
return true;
437+
}
438+
}
439+
440+
return false;
441+
}
442+
443+
static bool task_result_has_stable_partial_tool_call_header(
444+
const std::string & generated_text,
445+
const common_chat_tool_call & tc) {
446+
if (tc.name.empty()) {
447+
return false;
448+
}
449+
450+
// Arguments present in the parsed result does not guarantee a stable header
451+
// for streaming purposes. The parser may have only started argument parsing
452+
// (e.g. just "{"). Only consider the header stable when the generated text
453+
// has argument markers after the tool call name/id anchor.
454+
return task_result_has_marker_after_anchor(generated_text, tc.id) ||
455+
task_result_has_marker_after_anchor(generated_text, tc.name);
456+
}
457+
458+
static void task_result_filter_incomplete_partial_tool_calls(
459+
const std::string & generated_text,
460+
common_chat_msg & new_msg,
461+
const common_chat_msg & msg_prv) {
462+
std::vector<common_chat_tool_call> filtered;
463+
filtered.reserve(std::max(new_msg.tool_calls.size(), msg_prv.tool_calls.size()));
464+
465+
for (size_t i = 0; i < new_msg.tool_calls.size(); ++i) {
466+
common_chat_tool_call tc = new_msg.tool_calls[i];
467+
if (i < msg_prv.tool_calls.size()) {
468+
if (tc.name.empty()) {
469+
tc.name = msg_prv.tool_calls[i].name;
470+
}
471+
if (tc.id.empty()) {
472+
tc.id = msg_prv.tool_calls[i].id;
473+
}
474+
}
475+
476+
if (!task_result_has_stable_partial_tool_call_header(generated_text, tc)) {
477+
break;
478+
}
479+
480+
// A partial stream may expose the stable tool name/id for UX, but the
481+
// arguments remain hidden until a complete call is parsed.
482+
tc.arguments = i < msg_prv.tool_calls.size() ? msg_prv.tool_calls[i].arguments : "";
483+
filtered.push_back(std::move(tc));
484+
}
485+
486+
while (filtered.size() < msg_prv.tool_calls.size()) {
487+
filtered.push_back(msg_prv.tool_calls[filtered.size()]);
488+
}
489+
490+
new_msg.tool_calls = std::move(filtered);
491+
}
492+
247493
common_chat_msg task_result_state::update_chat_msg(
248494
const std::string & text_added,
249495
bool is_partial,
@@ -258,6 +504,19 @@ common_chat_msg task_result_state::update_chat_msg(
258504
chat_parser_params);
259505
if (!new_msg.empty()) {
260506
new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
507+
if (filter_tool_calls && chat_parser_params.parse_tool_calls) {
508+
const bool has_complete_tool_calls = task_result_has_complete_partial_tool_calls(generated_text, new_msg);
509+
if (!new_msg.tool_calls.empty() &&
510+
(!has_complete_tool_calls || task_result_starts_with_raw_tool_marker(generated_text))) {
511+
task_result_freeze_text_fields(new_msg, msg_prv_copy);
512+
}
513+
if (!has_complete_tool_calls) {
514+
task_result_quarantine_raw_tool_text(new_msg, msg_prv_copy);
515+
}
516+
if (!has_complete_tool_calls) {
517+
task_result_filter_incomplete_partial_tool_calls(generated_text, new_msg, msg_prv_copy);
518+
}
519+
}
261520
chat_msg = new_msg;
262521
auto all_diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, chat_msg);
263522

@@ -292,7 +551,7 @@ common_chat_msg task_result_state::update_chat_msg(
292551
}
293552
} else {
294553
// Not sent yet.
295-
if (!d.tool_call_delta.arguments.empty() || !is_partial) {
554+
if (!d.tool_call_delta.name.empty() || !d.tool_call_delta.arguments.empty() || !is_partial) {
296555
d.tool_call_delta.name = chat_msg.tool_calls[i].name;
297556
d.tool_call_delta.id = chat_msg.tool_calls[i].id;
298557
diffs.push_back(std::move(d));
@@ -1525,7 +1784,7 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
15251784
//
15261785
void server_task_result_cmpl_partial::update(task_result_state & state) {
15271786
is_updated = true;
1528-
state.update_chat_msg(content, true, oaicompat_msg_diffs);
1787+
state.update_chat_msg(content, true, oaicompat_msg_diffs, true);
15291788

15301789
// Copy current state for use in to_json_*() (reflects state BEFORE this chunk)
15311790
thinking_block_started = state.thinking_block_started;

tools/server/server-task.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ struct server_task_result_cmpl_final : server_task_result {
388388

389389
virtual void update(task_result_state & state) override {
390390
is_updated = true;
391-
oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs);
391+
oaicompat_msg = state.update_chat_msg(content, false, oaicompat_msg_diffs, stream);
392392

393393
oai_resp_id = state.oai_resp_id;
394394
oai_resp_reasoning_id = state.oai_resp_reasoning_id;

0 commit comments

Comments
 (0)