Skip to content

Commit 5282e8d

Browse files
Merge pull request #485 from janhq/update-dev-from-master-2026-04-14-00-57
Sync master with upstream release b8783
2 parents 76ff29c + e21cdc1 commit 5282e8d

53 files changed

Lines changed: 2853 additions & 645 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/close-issue.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
steps:
1818
- uses: actions/stale@v10
1919
with:
20-
exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap"
20+
exempt-issue-labels: "refactoring,help wanted,good first issue,research 🔬,bug,roadmap,security"
2121
days-before-issue-stale: 30
2222
days-before-issue-close: 14
2323
stale-issue-label: "stale"

common/chat.cpp

Lines changed: 193 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,14 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
10911091
common_chat_params data;
10921092

10931093
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
1094+
1095+
if (inputs.add_generation_prompt && string_ends_with(data.prompt, "<turn|>\n")) {
1096+
// This may happen if the model generates content + tool_call, the
1097+
// template does not add the model's next turn and confuses the model
1098+
// from emitting its proper reasoning token sequence.
1099+
data.prompt += "<|turn>model\n";
1100+
}
1101+
10941102
data.format = COMMON_CHAT_FORMAT_PEG_GEMMA4;
10951103
data.supports_thinking = true;
10961104
data.thinking_start_tag = "<|channel>thought";
@@ -1118,7 +1126,8 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
11181126
p.rule("thought", p.content(p.literal("<|channel>thought") + p.space() + p.until("<channel|>") + p.literal("<channel|>")));
11191127
}
11201128

1121-
auto thought = (p.peek(p.literal("<|channel>")) + p.ref("thought")) | p.negate(p.literal("<|channel>"));
1129+
auto consume_empty_channels = p.gbnf(p.zero_or_more(p.literal("<|channel>") + p.negate(p.literal("thought"))), "");
1130+
auto thought = (p.peek(p.literal("<|channel>")) + consume_empty_channels + p.ref("thought")) | p.negate(p.literal("<|channel>"));
11221131

11231132
if (has_response_format) {
11241133
auto response_format = p.literal("```json") <<
@@ -1182,12 +1191,16 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
11821191
/* max = */ inputs.parallel_tool_calls ? -1 : 1
11831192
));
11841193

1185-
auto content = p.rule("content", p.content(p.until_one_of({"<|channel>", "<|tool_call>"})));
1194+
auto scan_to_toolcall = p.rule("scan-to-toolcall", p.until("<|tool_call>"));
1195+
auto content = p.rule("content", p.content(p.until_one_of({"<|channel>", "<channel|>", "<|tool_call>"})));
11861196
auto message = p.rule("message", thought + content);
1187-
return start + p.zero_or_more(message) + tool_call;
1197+
return start + p.zero_or_more(message) + scan_to_toolcall + tool_call;
11881198
}
11891199

1190-
auto content = p.rule("content", p.content(p.until("<|channel>")));
1200+
// Gemma 4 may emit an extra <|channel>thought\n<channel|> at the end of the content. It may
1201+
// also emit a single trailing <channel|> token. Consume all complete reasoning blocks and
1202+
// then stop at the first unmatched <channel|> token.
1203+
auto content = p.rule("content", p.content(p.until_one_of({"<|channel>", "<channel|>"})));
11911204
auto message = p.rule("message", thought + content);
11921205
return start + p.one_or_more(message);
11931206
});
@@ -1656,6 +1669,173 @@ static common_chat_params common_chat_params_init_gigachat_v3(
16561669
return data;
16571670
}
16581671

1672+
static common_chat_params common_chat_params_init_deepseek_v3_2(const common_chat_template & tmpl,
1673+
const autoparser::generation_params & inputs) {
1674+
common_chat_params data;
1675+
1676+
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
1677+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
1678+
data.supports_thinking = true;
1679+
data.thinking_start_tag = "<think>";
1680+
data.thinking_end_tag = "</think>";
1681+
data.preserved_tokens = {
1682+
"|DSML|",
1683+
"<think>",
1684+
"</think>",
1685+
};
1686+
1687+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
1688+
auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
1689+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
1690+
auto include_grammar = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
1691+
1692+
const std::string DSML = "|DSML|";
1693+
const std::string THINK_START = "<think>";
1694+
const std::string THINK_END = "</think>";
1695+
const std::string FC_START = "<" + DSML + "function_calls>";
1696+
const std::string FC_END = "</" + DSML + "function_calls>";
1697+
const std::string INVOKE_START = "<" + DSML + "invoke";
1698+
const std::string INVOKE_END = "</" + DSML + "invoke>";
1699+
const std::string PARAM_START = "<" + DSML + "parameter";
1700+
const std::string PARAM_END = "</" + DSML + "parameter>";
1701+
1702+
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
1703+
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
1704+
auto end = p.end();
1705+
1706+
auto reasoning = p.eps();
1707+
if (extract_reasoning && inputs.enable_thinking) {
1708+
reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
1709+
} else if (extract_reasoning) {
1710+
// Thinking disabled but reasoning extraction requested: the generation prompt
1711+
// contains an empty <think></think> pair that must still be consumed.
1712+
reasoning = p.optional(p.literal(THINK_START) + p.until(THINK_END) + p.literal(THINK_END));
1713+
}
1714+
1715+
if (has_response_format) {
1716+
auto response_format = p.rule("response-format",
1717+
p.literal("```json") + p.space() +
1718+
p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)) +
1719+
p.space() + p.literal("```"));
1720+
return generation_prompt + reasoning + response_format + end;
1721+
}
1722+
1723+
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1724+
return generation_prompt + reasoning + p.content(p.rest()) + end;
1725+
}
1726+
1727+
auto tool_choice = p.choice();
1728+
foreach_function(inputs.tools, [&](const json & tool) {
1729+
const auto & function = tool.at("function");
1730+
std::string name = function.at("name");
1731+
auto params = function.contains("parameters") ? function.at("parameters") : json::object();
1732+
const auto & props = params.contains("properties") ? params.at("properties") : json::object();
1733+
1734+
std::set<std::string> required;
1735+
if (params.contains("required")) {
1736+
params.at("required").get_to(required);
1737+
}
1738+
1739+
auto schema_info = common_schema_info();
1740+
schema_info.resolve_refs(params);
1741+
1742+
std::vector<common_peg_parser> required_parsers;
1743+
std::vector<common_peg_parser> optional_parsers;
1744+
for (const auto & [param_name, param_schema] : props.items()) {
1745+
bool is_required = required.find(param_name) != required.end();
1746+
bool is_string = schema_info.resolves_to_string(param_schema);
1747+
1748+
auto arg = p.tool_arg(
1749+
p.tool_arg_open(
1750+
p.literal(PARAM_START + " name=\"") +
1751+
p.tool_arg_name(p.literal(param_name)) +
1752+
p.literal("\" string=\"" + std::string(is_string ? "true" : "false") + "\">")) +
1753+
(is_string
1754+
? p.tool_arg_string_value(p.until(PARAM_END))
1755+
: p.tool_arg_json_value(p.schema(p.json(),
1756+
"tool-" + name + "-arg-" + param_name + "-schema",
1757+
param_schema, false))) +
1758+
p.tool_arg_close(p.literal(PARAM_END)));
1759+
1760+
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
1761+
if (is_required) {
1762+
required_parsers.push_back(named_arg);
1763+
} else {
1764+
optional_parsers.push_back(named_arg);
1765+
}
1766+
}
1767+
1768+
common_peg_parser args_seq = p.eps();
1769+
for (size_t i = 0; i < required_parsers.size(); i++) {
1770+
if (i > 0) {
1771+
args_seq = args_seq + p.space();
1772+
}
1773+
args_seq = args_seq + required_parsers[i];
1774+
}
1775+
1776+
if (!optional_parsers.empty()) {
1777+
common_peg_parser any_opt = p.choice();
1778+
for (const auto & opt : optional_parsers) {
1779+
any_opt |= opt;
1780+
}
1781+
args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1);
1782+
}
1783+
1784+
common_peg_parser invoke_body = args_seq;
1785+
auto func_parser = p.tool(
1786+
p.tool_open(p.literal(INVOKE_START + " name=\"") +
1787+
p.tool_name(p.literal(name)) + p.literal("\">\n")) +
1788+
invoke_body + p.space() +
1789+
p.tool_close(p.literal(INVOKE_END)));
1790+
1791+
tool_choice |= p.rule("tool-" + name, func_parser);
1792+
});
1793+
1794+
auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1795+
1796+
common_peg_parser tool_calls = p.eps();
1797+
if (inputs.parallel_tool_calls) {
1798+
tool_calls = p.trigger_rule("tool-call",
1799+
p.literal(FC_START) + p.space() + tool_choice +
1800+
p.zero_or_more(p.space() + tool_choice) + p.space() + p.literal(FC_END));
1801+
} else {
1802+
tool_calls = p.trigger_rule("tool-call",
1803+
p.literal(FC_START) + p.space() + tool_choice + p.space() + p.literal(FC_END));
1804+
}
1805+
1806+
if (!require_tools) {
1807+
tool_calls = p.optional(tool_calls);
1808+
}
1809+
1810+
auto content_before_tools = p.content(p.until(FC_START));
1811+
return generation_prompt + reasoning + content_before_tools + tool_calls + end;
1812+
});
1813+
1814+
data.parser = parser.save();
1815+
1816+
if (include_grammar) {
1817+
data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
1818+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1819+
foreach_function(inputs.tools, [&](const json & tool) {
1820+
const auto & function = tool.at("function");
1821+
auto schema = function.contains("parameters") ? function.at("parameters") : json::object();
1822+
builder.resolve_refs(schema);
1823+
});
1824+
if (has_response_format) {
1825+
auto schema = inputs.json_schema;
1826+
builder.resolve_refs(schema);
1827+
}
1828+
parser.build_grammar(builder, data.grammar_lazy);
1829+
});
1830+
1831+
data.grammar_triggers = {
1832+
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, FC_START },
1833+
};
1834+
}
1835+
1836+
return data;
1837+
}
1838+
16591839
namespace workaround {
16601840

16611841
static void map_developer_role_to_system(json & messages) {
@@ -1927,6 +2107,15 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
19272107
return common_chat_params_init_gigachat_v3(tmpl, params);
19282108
}
19292109

2110+
// DeepSeek V3.2 format detection: template defines dsml_token and uses it for tool calls.
2111+
// The template source contains the token as a variable assignment, not as a literal in markup.
2112+
if (src.find("dsml_token") != std::string::npos &&
2113+
src.find("function_calls") != std::string::npos &&
2114+
src.find("DSML") != std::string::npos) {
2115+
LOG_DBG("Using specialized template: DeepSeek V3.2\n");
2116+
return common_chat_params_init_deepseek_v3_2(tmpl, params);
2117+
}
2118+
19302119
// Gemma4 format detection
19312120
if (src.find("'<|tool_call>call:'") != std::string::npos) {
19322121
if (src.find("{#- OpenAI Chat Completions:") == std::string::npos) {

common/download.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ static bool common_pull_file(httplib::Client & cli,
258258
if (progress_step >= p.total / 1000 || p.downloaded == p.total) {
259259
if (callback) {
260260
callback->on_update(p);
261+
if (callback->is_cancelled()) {
262+
return false;
263+
}
261264
}
262265
progress_step = 0;
263266
}
@@ -373,6 +376,9 @@ static int common_download_file_single_online(const std::string & url,
373376
}
374377

375378
for (int i = 0; i < max_attempts; ++i) {
379+
if (opts.callback && opts.callback->is_cancelled()) {
380+
break;
381+
}
376382
if (i) {
377383
LOG_WRN("%s: retrying after %d seconds...\n", __func__, delay);
378384
std::this_thread::sleep_for(std::chrono::seconds(delay));
@@ -412,6 +418,12 @@ static int common_download_file_single_online(const std::string & url,
412418
if (opts.callback) {
413419
opts.callback->on_done(p, success);
414420
}
421+
if (opts.callback && opts.callback->is_cancelled() &&
422+
std::filesystem::exists(path_temporary)) {
423+
if (remove(path_temporary.c_str()) != 0) {
424+
LOG_ERR("%s: unable to delete temporary file: %s\n", __func__, path_temporary.c_str());
425+
}
426+
}
415427
if (!success) {
416428
LOG_ERR("%s: download failed after %d attempts\n", __func__, max_attempts);
417429
return -1; // max attempts reached

common/download.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class common_download_callback {
2121
virtual void on_start(const common_download_progress & p) = 0;
2222
virtual void on_update(const common_download_progress & p) = 0;
2323
virtual void on_done(const common_download_progress & p, bool ok) = 0;
24+
virtual bool is_cancelled() const { return false; }
2425
};
2526

2627
struct common_remote_params {

common/peg-parser.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,10 @@ struct parser_executor {
890890
}
891891
return result;
892892
}
893+
894+
common_peg_parse_result operator()(const common_peg_gbnf_parser & p) {
895+
return arena.parse(p.child, ctx, start_pos);
896+
}
893897
};
894898

895899
common_peg_parse_result common_peg_arena::parse(common_peg_parse_context & ctx, size_t start) const {
@@ -957,7 +961,8 @@ void common_peg_arena::resolve_refs() {
957961
std::is_same_v<T, common_peg_and_parser> ||
958962
std::is_same_v<T, common_peg_not_parser> ||
959963
std::is_same_v<T, common_peg_tag_parser> ||
960-
std::is_same_v<T, common_peg_atomic_parser>) {
964+
std::is_same_v<T, common_peg_atomic_parser> ||
965+
std::is_same_v<T, common_peg_gbnf_parser>) {
961966
p.child = resolve_ref(p.child);
962967
} else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
963968
p.child = resolve_ref(p.child);
@@ -1036,6 +1041,8 @@ std::string common_peg_arena::dump_impl(common_peg_parser_id
10361041
return "Not(" + dump_impl(p.child, visited) + ")";
10371042
} else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
10381043
return "Atomic(" + dump_impl(p.child, visited) + ")";
1044+
} else if constexpr (std::is_same_v<T, common_peg_gbnf_parser>) {
1045+
return "Gbnf(" + p.grammar + ", " + dump_impl(p.child, visited) + ")";
10391046
} else if constexpr (std::is_same_v<T, common_peg_any_parser>) {
10401047
return "Any";
10411048
} else if constexpr (std::is_same_v<T, common_peg_space_parser>) {
@@ -1565,6 +1572,7 @@ static std::unordered_set<std::string> collect_reachable_rules(
15651572
std::is_same_v<T, common_peg_not_parser> ||
15661573
std::is_same_v<T, common_peg_tag_parser> ||
15671574
std::is_same_v<T, common_peg_atomic_parser> ||
1575+
std::is_same_v<T, common_peg_gbnf_parser> ||
15681576
std::is_same_v<T, common_peg_schema_parser>) {
15691577
visit(p.child);
15701578
} else if constexpr (std::is_same_v<T, common_peg_rule_parser>) {
@@ -1651,10 +1659,13 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
16511659
} else if constexpr (std::is_same_v<T, common_peg_sequence_parser>) {
16521660
std::string s;
16531661
for (const auto & child : p.children) {
1662+
auto child_gbnf = to_gbnf(child);
1663+
if (child_gbnf.empty()) {
1664+
continue;
1665+
}
16541666
if (!s.empty()) {
16551667
s += " ";
16561668
}
1657-
auto child_gbnf = to_gbnf(child);
16581669
const auto & child_parser = effective_parser(child);
16591670
if (std::holds_alternative<common_peg_choice_parser>(child_parser) ||
16601671
std::holds_alternative<common_peg_sequence_parser>(child_parser)) {
@@ -1754,6 +1765,8 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
17541765
return to_gbnf(p.child);
17551766
} else if constexpr (std::is_same_v<T, common_peg_atomic_parser>) {
17561767
return to_gbnf(p.child);
1768+
} else if constexpr (std::is_same_v<T, common_peg_gbnf_parser>) {
1769+
return p.grammar;
17571770
} else {
17581771
static_assert(is_always_false_v<T>);
17591772
}
@@ -1888,6 +1901,8 @@ static nlohmann::json serialize_parser_variant(const common_peg_parser_variant &
18881901
{"child", p.child},
18891902
{"tag", p.tag}
18901903
};
1904+
} else if constexpr (std::is_same_v<T, common_peg_gbnf_parser>) {
1905+
return json{{"type", "gbnf"}, {"child", p.child}, {"grammar", p.grammar}};
18911906
}
18921907
}, variant);
18931908
}
@@ -2050,6 +2065,16 @@ static common_peg_parser_variant deserialize_parser_variant(const nlohmann::json
20502065
};
20512066
}
20522067

2068+
if (type == "gbnf") {
2069+
if (!j.contains("child") || !j.contains("grammar")) {
2070+
throw std::runtime_error("gbnf parser missing required fields");
2071+
}
2072+
return common_peg_gbnf_parser{
2073+
j["child"].get<common_peg_parser_id>(),
2074+
j["grammar"].get<std::string>(),
2075+
};
2076+
}
2077+
20532078
throw std::runtime_error("Unknown parser type: " + type);
20542079
}
20552080

0 commit comments

Comments
 (0)