@@ -1091,6 +1091,14 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
10911091 common_chat_params data;
10921092
10931093 data.prompt = common_chat_template_direct_apply_impl (tmpl, inputs);
1094+
1095+ if (inputs.add_generation_prompt && string_ends_with (data.prompt , " <turn|>\n " )) {
1096+ // This may happen if the model generates content + tool_call, the
1097+ // template does not add the model's next turn and confuses the model
1098+ // from emitting its proper reasoning token sequence.
1099+ data.prompt += " <|turn>model\n " ;
1100+ }
1101+
10941102 data.format = COMMON_CHAT_FORMAT_PEG_GEMMA4;
10951103 data.supports_thinking = true ;
10961104 data.thinking_start_tag = " <|channel>thought" ;
@@ -1118,7 +1126,8 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
11181126 p.rule (" thought" , p.content (p.literal (" <|channel>thought" ) + p.space () + p.until (" <channel|>" ) + p.literal (" <channel|>" )));
11191127 }
11201128
1121- auto thought = (p.peek (p.literal (" <|channel>" )) + p.ref (" thought" )) | p.negate (p.literal (" <|channel>" ));
1129+ auto consume_empty_channels = p.gbnf (p.zero_or_more (p.literal (" <|channel>" ) + p.negate (p.literal (" thought" ))), " " );
1130+ auto thought = (p.peek (p.literal (" <|channel>" )) + consume_empty_channels + p.ref (" thought" )) | p.negate (p.literal (" <|channel>" ));
11221131
11231132 if (has_response_format) {
11241133 auto response_format = p.literal (" ```json" ) <<
@@ -1182,12 +1191,16 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
11821191 /* max = */ inputs.parallel_tool_calls ? -1 : 1
11831192 ));
11841193
1185- auto content = p.rule (" content" , p.content (p.until_one_of ({" <|channel>" , " <|tool_call>" })));
1194+ auto scan_to_toolcall = p.rule (" scan-to-toolcall" , p.until (" <|tool_call>" ));
1195+ auto content = p.rule (" content" , p.content (p.until_one_of ({" <|channel>" , " <channel|>" , " <|tool_call>" })));
11861196 auto message = p.rule (" message" , thought + content);
1187- return start + p.zero_or_more (message) + tool_call;
1197+ return start + p.zero_or_more (message) + scan_to_toolcall + tool_call;
11881198 }
11891199
1190- auto content = p.rule (" content" , p.content (p.until (" <|channel>" )));
1200+ // Gemma 4 may emit an extra <|channel>thought\n<channel|> at the end of the content. It may
1201+ // also emit a single trailing <channel|> token. Consume all complete reasoning blocks and
1202+ // then stop at the first unmatched <channel|> token.
1203+ auto content = p.rule (" content" , p.content (p.until_one_of ({" <|channel>" , " <channel|>" })));
11911204 auto message = p.rule (" message" , thought + content);
11921205 return start + p.one_or_more (message);
11931206 });
@@ -1656,6 +1669,173 @@ static common_chat_params common_chat_params_init_gigachat_v3(
16561669 return data;
16571670}
16581671
1672+ static common_chat_params common_chat_params_init_deepseek_v3_2 (const common_chat_template & tmpl,
1673+ const autoparser::generation_params & inputs) {
1674+ common_chat_params data;
1675+
1676+ data.prompt = common_chat_template_direct_apply_impl (tmpl, inputs);
1677+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
1678+ data.supports_thinking = true ;
1679+ data.thinking_start_tag = " <think>" ;
1680+ data.thinking_end_tag = " </think>" ;
1681+ data.preserved_tokens = {
1682+ " |DSML|" ,
1683+ " <think>" ,
1684+ " </think>" ,
1685+ };
1686+
1687+ auto has_tools = inputs.tools .is_array () && !inputs.tools .empty ();
1688+ auto has_response_format = !inputs.json_schema .is_null () && inputs.json_schema .is_object ();
1689+ auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
1690+ auto include_grammar = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
1691+
1692+ const std::string DSML = " |DSML|" ;
1693+ const std::string THINK_START = " <think>" ;
1694+ const std::string THINK_END = " </think>" ;
1695+ const std::string FC_START = " <" + DSML + " function_calls>" ;
1696+ const std::string FC_END = " </" + DSML + " function_calls>" ;
1697+ const std::string INVOKE_START = " <" + DSML + " invoke" ;
1698+ const std::string INVOKE_END = " </" + DSML + " invoke>" ;
1699+ const std::string PARAM_START = " <" + DSML + " parameter" ;
1700+ const std::string PARAM_END = " </" + DSML + " parameter>" ;
1701+
1702+ auto parser = build_chat_peg_parser ([&](common_chat_peg_builder & p) {
1703+ auto generation_prompt = p.prefix (inputs.generation_prompt , THINK_START);
1704+ auto end = p.end ();
1705+
1706+ auto reasoning = p.eps ();
1707+ if (extract_reasoning && inputs.enable_thinking ) {
1708+ reasoning = p.optional (THINK_START + p.reasoning (p.until (THINK_END)) + THINK_END);
1709+ } else if (extract_reasoning) {
1710+ // Thinking disabled but reasoning extraction requested: the generation prompt
1711+ // contains an empty <think></think> pair that must still be consumed.
1712+ reasoning = p.optional (p.literal (THINK_START) + p.until (THINK_END) + p.literal (THINK_END));
1713+ }
1714+
1715+ if (has_response_format) {
1716+ auto response_format = p.rule (" response-format" ,
1717+ p.literal (" ```json" ) + p.space () +
1718+ p.content (p.schema (p.json (), " response-format-schema" , inputs.json_schema )) +
1719+ p.space () + p.literal (" ```" ));
1720+ return generation_prompt + reasoning + response_format + end;
1721+ }
1722+
1723+ if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
1724+ return generation_prompt + reasoning + p.content (p.rest ()) + end;
1725+ }
1726+
1727+ auto tool_choice = p.choice ();
1728+ foreach_function (inputs.tools , [&](const json & tool) {
1729+ const auto & function = tool.at (" function" );
1730+ std::string name = function.at (" name" );
1731+ auto params = function.contains (" parameters" ) ? function.at (" parameters" ) : json::object ();
1732+ const auto & props = params.contains (" properties" ) ? params.at (" properties" ) : json::object ();
1733+
1734+ std::set<std::string> required;
1735+ if (params.contains (" required" )) {
1736+ params.at (" required" ).get_to (required);
1737+ }
1738+
1739+ auto schema_info = common_schema_info ();
1740+ schema_info.resolve_refs (params);
1741+
1742+ std::vector<common_peg_parser> required_parsers;
1743+ std::vector<common_peg_parser> optional_parsers;
1744+ for (const auto & [param_name, param_schema] : props.items ()) {
1745+ bool is_required = required.find (param_name) != required.end ();
1746+ bool is_string = schema_info.resolves_to_string (param_schema);
1747+
1748+ auto arg = p.tool_arg (
1749+ p.tool_arg_open (
1750+ p.literal (PARAM_START + " name=\" " ) +
1751+ p.tool_arg_name (p.literal (param_name)) +
1752+ p.literal (" \" string=\" " + std::string (is_string ? " true" : " false" ) + " \" >" )) +
1753+ (is_string
1754+ ? p.tool_arg_string_value (p.until (PARAM_END))
1755+ : p.tool_arg_json_value (p.schema (p.json (),
1756+ " tool-" + name + " -arg-" + param_name + " -schema" ,
1757+ param_schema, false ))) +
1758+ p.tool_arg_close (p.literal (PARAM_END)));
1759+
1760+ auto named_arg = p.rule (" tool-" + name + " -arg-" + param_name, arg);
1761+ if (is_required) {
1762+ required_parsers.push_back (named_arg);
1763+ } else {
1764+ optional_parsers.push_back (named_arg);
1765+ }
1766+ }
1767+
1768+ common_peg_parser args_seq = p.eps ();
1769+ for (size_t i = 0 ; i < required_parsers.size (); i++) {
1770+ if (i > 0 ) {
1771+ args_seq = args_seq + p.space ();
1772+ }
1773+ args_seq = args_seq + required_parsers[i];
1774+ }
1775+
1776+ if (!optional_parsers.empty ()) {
1777+ common_peg_parser any_opt = p.choice ();
1778+ for (const auto & opt : optional_parsers) {
1779+ any_opt |= opt;
1780+ }
1781+ args_seq = args_seq + p.repeat (p.space () + any_opt, 0 , -1 );
1782+ }
1783+
1784+ common_peg_parser invoke_body = args_seq;
1785+ auto func_parser = p.tool (
1786+ p.tool_open (p.literal (INVOKE_START + " name=\" " ) +
1787+ p.tool_name (p.literal (name)) + p.literal (" \" >\n " )) +
1788+ invoke_body + p.space () +
1789+ p.tool_close (p.literal (INVOKE_END)));
1790+
1791+ tool_choice |= p.rule (" tool-" + name, func_parser);
1792+ });
1793+
1794+ auto require_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1795+
1796+ common_peg_parser tool_calls = p.eps ();
1797+ if (inputs.parallel_tool_calls ) {
1798+ tool_calls = p.trigger_rule (" tool-call" ,
1799+ p.literal (FC_START) + p.space () + tool_choice +
1800+ p.zero_or_more (p.space () + tool_choice) + p.space () + p.literal (FC_END));
1801+ } else {
1802+ tool_calls = p.trigger_rule (" tool-call" ,
1803+ p.literal (FC_START) + p.space () + tool_choice + p.space () + p.literal (FC_END));
1804+ }
1805+
1806+ if (!require_tools) {
1807+ tool_calls = p.optional (tool_calls);
1808+ }
1809+
1810+ auto content_before_tools = p.content (p.until (FC_START));
1811+ return generation_prompt + reasoning + content_before_tools + tool_calls + end;
1812+ });
1813+
1814+ data.parser = parser.save ();
1815+
1816+ if (include_grammar) {
1817+ data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
1818+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1819+ foreach_function (inputs.tools , [&](const json & tool) {
1820+ const auto & function = tool.at (" function" );
1821+ auto schema = function.contains (" parameters" ) ? function.at (" parameters" ) : json::object ();
1822+ builder.resolve_refs (schema);
1823+ });
1824+ if (has_response_format) {
1825+ auto schema = inputs.json_schema ;
1826+ builder.resolve_refs (schema);
1827+ }
1828+ parser.build_grammar (builder, data.grammar_lazy );
1829+ });
1830+
1831+ data.grammar_triggers = {
1832+ { COMMON_GRAMMAR_TRIGGER_TYPE_WORD, FC_START },
1833+ };
1834+ }
1835+
1836+ return data;
1837+ }
1838+
16591839namespace workaround {
16601840
16611841static void map_developer_role_to_system (json & messages) {
@@ -1927,6 +2107,15 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
19272107 return common_chat_params_init_gigachat_v3 (tmpl, params);
19282108 }
19292109
2110+ // DeepSeek V3.2 format detection: template defines dsml_token and uses it for tool calls.
2111+ // The template source contains the token as a variable assignment, not as a literal in markup.
2112+ if (src.find (" dsml_token" ) != std::string::npos &&
2113+ src.find (" function_calls" ) != std::string::npos &&
2114+ src.find (" DSML" ) != std::string::npos) {
2115+ LOG_DBG (" Using specialized template: DeepSeek V3.2\n " );
2116+ return common_chat_params_init_deepseek_v3_2 (tmpl, params);
2117+ }
2118+
19302119 // Gemma4 format detection
19312120 if (src.find (" '<|tool_call>call:'" ) != std::string::npos) {
19322121 if (src.find (" {#- OpenAI Chat Completions:" ) == std::string::npos) {
0 commit comments