Skip to content

Commit f653ae0

Browse files
Merge pull request #574 from janhq/update-dev-from-master-2026-06-29-01-17
Sync master with upstream release b9837
2 parents 5c75d32 + b3fed31 commit f653ae0

42 files changed

Lines changed: 1810 additions & 302 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

common/arg.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ void common_models_handler_apply(common_models_handler & handler, common_params
467467
// the first part is what gets loaded, so point params.model.path at it
468468
if (!url_tasks.empty()) {
469469
std::string first_path = url_tasks.front().local_path;
470-
url_tasks.front().on_done = [&]() { params.model.path = first_path; };
470+
url_tasks.front().on_done = [&, first_path]() { params.model.path = first_path; };
471471
}
472472
for (auto & task : url_tasks) {
473473
tasks.push_back(std::move(task));
@@ -3296,6 +3296,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
32963296
params.sampling.reasoning_budget_message = value;
32973297
}
32983298
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_THINK_BUDGET_MESSAGE"));
3299+
add_opt(common_arg(
3300+
{"--reasoning-preserve"},
3301+
{"--no-reasoning-preserve"},
3302+
"preserve reasoning trace in the full history, not just the last assistant message (default: template default)\n"
3303+
"compatible with certain templates having 'supports_preserve_reasoning' capability\n"
3304+
"example: https://docs.z.ai/guides/capabilities/thinking-mode#preserved-thinking",
3305+
[](common_params & params, bool value) {
3306+
if (value) {
3307+
params.default_template_kwargs["preserve_reasoning"] = "true";
3308+
} else {
3309+
params.default_template_kwargs["preserve_reasoning"] = "false";
3310+
}
3311+
}
3312+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_REASONING_PRESERVE"));
32993313
add_opt(common_arg(
33003314
{"--chat-template"}, "JINJA_TEMPLATE",
33013315
string_format(
@@ -3471,7 +3485,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34713485
[](common_params & params) {
34723486
params.offline = true;
34733487
}
3474-
).set_env("LLAMA_ARG_OFFLINE"));
3488+
).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_DOWNLOAD}).set_env("LLAMA_ARG_OFFLINE"));
34753489
add_opt(common_arg(
34763490
{"-lv", "--verbosity", "--log-verbosity"}, "N",
34773491
string_format("Set the verbosity threshold. Messages with a higher verbosity will be ignored. Values:\n"

common/chat.cpp

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -912,6 +912,10 @@ static std::string common_chat_template_direct_apply_impl(
912912
if (inputs.add_generation_prompt) {
913913
inp["add_generation_prompt"] = true;
914914
}
915+
if (inp.contains("preserve_reasoning") && inp["preserve_reasoning"].is_boolean()) {
916+
bool enabled = inp["preserve_reasoning"].get<bool>();
917+
jinja::caps_apply_preserve_reasoning(ctx, enabled);
918+
}
915919

916920
jinja::global_from_json(ctx, inp, inputs.mark_input);
917921

@@ -2376,6 +2380,149 @@ static void func_args_not_string(json & messages) {
23762380

23772381
}
23782382

2383+
// MiniCPM5 format:
2384+
// - Reasoning: <think>{reasoning}</think> (optional)
2385+
// - Tool calls: <function name="foo"><param name="bar">value</param></function>
2386+
static common_chat_params common_chat_params_init_minicpm5(const common_chat_template & tmpl,
2387+
const autoparser::generation_params & inputs) {
2388+
common_chat_params data;
2389+
2390+
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
2391+
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
2392+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
2393+
data.supports_thinking = true;
2394+
data.preserved_tokens = {
2395+
"<function",
2396+
"<param",
2397+
"</function>",
2398+
"</param>",
2399+
"<think>",
2400+
"</think>",
2401+
};
2402+
2403+
data.thinking_start_tag = "<think>";
2404+
data.thinking_end_tag = "</think>";
2405+
2406+
data.message_delimiters = {
2407+
{ COMMON_CHAT_ROLE_ASSISTANT, "<|im_start|>assistant" },
2408+
{ COMMON_CHAT_ROLE_TOOL, "<|im_start|>user\n<tool_response>" },
2409+
{ COMMON_CHAT_ROLE_USER, "<|im_start|>user" },
2410+
{ COMMON_CHAT_ROLE_SYSTEM, "<|im_start|>system" },
2411+
};
2412+
2413+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
2414+
auto has_response_format = inputs.json_schema.is_object() && !inputs.json_schema.empty();
2415+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
2416+
auto include_grammar = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
2417+
2418+
if (inputs.has_continuation()) {
2419+
const auto & msg = inputs.continue_msg;
2420+
2421+
data.generation_prompt = "<|im_start|>assistant\n<think>\n" + msg.reasoning_content;
2422+
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
2423+
data.generation_prompt += "\n</think>\n\n" + msg.render_content();
2424+
}
2425+
2426+
data.prompt += data.generation_prompt;
2427+
}
2428+
2429+
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
2430+
auto generation_prompt = p.literal("<|im_start|>assistant\n");
2431+
2432+
auto reasoning = p.eps();
2433+
if (extract_reasoning) {
2434+
reasoning = ("<think>" << p.reasoning(p.until("</think>")) << "</think>") + p.space();
2435+
}
2436+
2437+
// Response format parser
2438+
if (has_response_format) {
2439+
return generation_prompt + reasoning + p.content(p.schema(p.json(), "response-format", inputs.json_schema));
2440+
}
2441+
2442+
if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
2443+
// CDATA lets a value carry characters that would otherwise close the tag (e.g.
2444+
// </param>); capture the inner text only, excluding the CDATA markers.
2445+
auto string_value = p.choice({
2446+
p.literal("<![CDATA[") + p.ac(p.tool_arg_string_value(p.until("]]>")) + p.literal("]]>"), "]]>") + p.tool_arg_close(p.literal("</param>")),
2447+
p.negate(p.literal("<![CDATA[")) + p.ac(p.tool_arg_string_value(p.until("</param>")) + p.tool_arg_close(p.literal("</param>")), "</param>")
2448+
});
2449+
2450+
auto tool_choice = p.choice();
2451+
foreach_function(inputs.tools, [&](const json & tool) {
2452+
const auto & function = tool.at("function");
2453+
const std::string name = function.at("name");
2454+
auto params = function.contains("parameters") ? function.at("parameters") : json::object();
2455+
2456+
auto args = p.eps();
2457+
if (params.contains("properties") && params.at("properties").is_object() && !params.at("properties").empty()) {
2458+
auto schema_info = common_schema_info();
2459+
schema_info.resolve_refs(params);
2460+
2461+
auto arg_choice = p.choice();
2462+
for (const auto & [prop_name, prop_schema] : params.at("properties").items()) {
2463+
auto value_parser = p.eps();
2464+
if (schema_info.resolves_to_string(prop_schema)) {
2465+
value_parser = string_value;
2466+
} else {
2467+
value_parser = p.tool_arg_json_value(
2468+
p.schema(p.json(), "tool-" + name + "-arg-" + prop_name + "-schema", prop_schema, false)
2469+
) + p.tool_arg_close(p.literal("</param>"));
2470+
}
2471+
2472+
auto arg_rule = p.tool_arg(
2473+
p.tool_arg_open(p.literal("<param name=\"") + p.tool_arg_name(p.literal(prop_name)) + p.literal("\">")) +
2474+
value_parser
2475+
);
2476+
2477+
arg_choice |= arg_rule;
2478+
}
2479+
args = p.zero_or_more(arg_choice + p.space());
2480+
}
2481+
2482+
auto tool_parser = p.tool(
2483+
p.tool_open(p.literal("<function name=\"") + p.tool_name(p.literal(name)) + p.literal("\">"))
2484+
<< p.tool_args(args)
2485+
<< p.tool_close(p.literal("</function>")));
2486+
2487+
tool_choice |= p.rule("tool-" + name, tool_parser);
2488+
});
2489+
2490+
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
2491+
auto tool_calls = p.trigger_rule("tool-call", p.repeat(tool_choice + p.space(), 1, max_calls));
2492+
2493+
auto content = p.content(p.until("<function"));
2494+
2495+
return generation_prompt + reasoning + content + tool_calls + p.end();
2496+
}
2497+
2498+
return generation_prompt + reasoning + p.content(p.rest()) + p.end();
2499+
});
2500+
2501+
data.parser = parser.save();
2502+
2503+
if (include_grammar) {
2504+
data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
2505+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2506+
foreach_function(inputs.tools, [&](const json & tool) {
2507+
const auto & function = tool.at("function");
2508+
auto schema = function.contains("parameters") ? function.at("parameters") : json::object();
2509+
builder.resolve_refs(schema);
2510+
});
2511+
if (has_response_format) {
2512+
auto schema = inputs.json_schema;
2513+
builder.resolve_refs(schema);
2514+
}
2515+
parser.build_grammar(builder, data.grammar_lazy);
2516+
});
2517+
2518+
data.grammar_triggers = {
2519+
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<function" },
2520+
};
2521+
}
2522+
2523+
return data;
2524+
}
2525+
23792526
static json common_chat_extra_context() {
23802527
json ctx = json::object();
23812528
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
@@ -2468,6 +2615,14 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
24682615
return common_chat_params_init_gemma4(tmpl, params);
24692616
}
24702617

2618+
// MiniCPM5 - XML tool calls with <function name="..."><param name="...">...</param></function>
2619+
if (src.find("Tool usage guidelines:") != std::string::npos &&
2620+
src.find("<function name=\"") != std::string::npos &&
2621+
src.find("<param name=\"") != std::string::npos) {
2622+
LOG_DBG("Using specialized template: MiniCPM5\n");
2623+
return common_chat_params_init_minicpm5(tmpl, params);
2624+
}
2625+
24712626
return std::nullopt;
24722627
}
24732628

0 commit comments

Comments
 (0)