@@ -2198,6 +2198,191 @@ static json common_chat_extra_context() {
21982198 return ctx;
21992199}
22002200
2201+ // Laguna (poolside) — GLM-4-MoE-style tool calls + <think> reasoning.
2202+ // Tool call wire format:
2203+ // <tool_call>{name}
2204+ // <arg_key>{k}</arg_key>
2205+ // <arg_value>{v}</arg_value>
2206+ // ...
2207+ // </tool_call>
2208+ // String-typed args are emitted raw between <arg_value>...</arg_value>; all other
2209+ // args are JSON literals. Reasoning is <think>...</think>; the turn ends with
2210+ // </assistant>. Both Laguna-XS.2 and Laguna-M.1 share this format.
2211+ static common_chat_params common_chat_params_init_laguna (const common_chat_template & tmpl,
2212+ const autoparser::generation_params & inputs) {
2213+ common_chat_params data;
2214+
2215+ data.prompt = common_chat_template_direct_apply_impl (tmpl, inputs);
2216+ data.generation_prompt = common_chat_template_generation_prompt_impl (tmpl, inputs);
2217+ data.format = COMMON_CHAT_FORMAT_PEG_NATIVE ;
2218+ data.supports_thinking = true ;
2219+ data.thinking_start_tag = " <think>" ;
2220+ data.thinking_end_tag = " </think>" ;
2221+ data.preserved_tokens = {
2222+ " <tool_call>" , " </tool_call>" ,
2223+ " <arg_key>" , " </arg_key>" ,
2224+ " <arg_value>" , " </arg_value>" ,
2225+ " <think>" , " </think>" ,
2226+ " <assistant>" , " </assistant>" ,
2227+ };
2228+
2229+ // </assistant> ends the assistant turn. The single eot token (24) handles
2230+ // this when sampled directly, but the model occasionally emits the
2231+ // multi-token spelling; register it as a stop so generation always halts.
2232+ data.additional_stops .push_back (" </assistant>" );
2233+
2234+ const std::string THINK_START = " <think>" ;
2235+ const std::string THINK_END = " </think>" ;
2236+ const std::string CALL_START = " <tool_call>" ;
2237+ const std::string CALL_END = " </tool_call>" ;
2238+ const std::string KEY_START = " <arg_key>" ;
2239+ const std::string KEY_END = " </arg_key>" ;
2240+ const std::string VAL_START = " <arg_value>" ;
2241+ const std::string VAL_END = " </arg_value>" ;
2242+
2243+ auto has_tools = inputs.tools .is_array () && !inputs.tools .empty ();
2244+ auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE ;
2245+ auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE ;
2246+
2247+ auto parser = build_chat_peg_parser ([&](common_chat_peg_builder & p) {
2248+ auto end = p.end ();
2249+
2250+ // The framework prepends data.generation_prompt to the parsed output:
2251+ // "<assistant>\n" followed by the opening <think> (thinking enabled) or
2252+ // </think> (disabled). Consume it, then capture reasoning. The model's
2253+ // output therefore starts with the reasoning text (no leading <think>).
2254+ const std::string GEN_PROMPT = " <assistant>\n " ;
2255+ auto head = p.eps ();
2256+ if (extract_reasoning && inputs.enable_thinking ) {
2257+ // The model normally closes reasoning with </think> before a tool
2258+ // call, but sometimes emits <tool_call> directly without it.
2259+ // Terminate reasoning on whichever marker comes first so the call is
2260+ // not swallowed into reasoning_content; consume </think> if present.
2261+ head = p.literal (GEN_PROMPT + THINK_START ) +
2262+ p.reasoning (p.until_one_of ({ THINK_END , CALL_START })) +
2263+ p.optional (p.literal (THINK_END ));
2264+ } else if (extract_reasoning) {
2265+ head = p.literal (GEN_PROMPT + THINK_END );
2266+ } else {
2267+ head = p.literal (GEN_PROMPT ) +
2268+ p.optional (p.literal (THINK_START )) + p.optional (p.literal (THINK_END ));
2269+ }
2270+
2271+ if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE ) {
2272+ return head + p.content (p.until_one_of ({ " </assistant>" })) +
2273+ p.optional (p.literal (" </assistant>" )) + p.space () + end;
2274+ }
2275+
2276+ auto tool_choice = p.choice ();
2277+ foreach_function (inputs.tools , [&](const json & tool) {
2278+ const auto & function = tool.at (" function" );
2279+ std::string name = function.at (" name" );
2280+ auto params = function.contains (" parameters" ) ? function.at (" parameters" ) : json::object ();
2281+ const auto & props = params.contains (" properties" ) ? params.at (" properties" ) : json::object ();
2282+
2283+ std::set<std::string> required;
2284+ if (params.contains (" required" )) {
2285+ params.at (" required" ).get_to (required);
2286+ }
2287+ auto schema_info = common_schema_info ();
2288+ schema_info.resolve_refs (params);
2289+
2290+ std::vector<common_peg_parser> required_parsers;
2291+ std::vector<common_peg_parser> optional_parsers;
2292+ for (const auto & [param_name, param_schema] : props.items ()) {
2293+ bool is_required = required.find (param_name) != required.end ();
2294+ bool is_string = schema_info.resolves_to_string (param_schema);
2295+
2296+ // <arg_key>name</arg_key>\n<arg_value>VALUE</arg_value>
2297+ auto arg = p.tool_arg (
2298+ p.tool_arg_open (
2299+ p.literal (KEY_START ) + p.tool_arg_name (p.literal (param_name)) + p.literal (KEY_END ) +
2300+ p.space () + p.literal (VAL_START )) +
2301+ (is_string
2302+ ? p.tool_arg_string_value (p.until (VAL_END ))
2303+ : p.tool_arg_json_value (p.schema (p.json (),
2304+ " tool-" + name + " -arg-" + param_name + " -schema" ,
2305+ param_schema, false ))) +
2306+ p.tool_arg_close (p.literal (VAL_END )));
2307+
2308+ auto named_arg = p.rule (" tool-" + name + " -arg-" + param_name, arg);
2309+ if (is_required) {
2310+ required_parsers.push_back (named_arg);
2311+ } else {
2312+ optional_parsers.push_back (named_arg);
2313+ }
2314+ }
2315+
2316+ common_peg_parser args_seq = p.eps ();
2317+ for (size_t i = 0 ; i < required_parsers.size (); i++) {
2318+ if (i > 0 ) {
2319+ args_seq = args_seq + p.space ();
2320+ }
2321+ args_seq = args_seq + required_parsers[i];
2322+ }
2323+ if (!optional_parsers.empty ()) {
2324+ common_peg_parser any_opt = p.choice ();
2325+ for (const auto & opt : optional_parsers) {
2326+ any_opt |= opt;
2327+ }
2328+ args_seq = args_seq + p.repeat (p.space () + any_opt, 0 , -1 );
2329+ }
2330+
2331+ // <tool_call>name\n {args} </tool_call>
2332+ auto func_parser = p.tool (
2333+ p.tool_open (p.literal (CALL_START ) + p.tool_name (p.literal (name)) + p.literal (" \n " )) +
2334+ args_seq + p.space () +
2335+ p.tool_close (p.literal (CALL_END )));
2336+
2337+ tool_choice |= p.rule (" tool-" + name, func_parser);
2338+ });
2339+
2340+ auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0 ;
2341+ auto max_calls = inputs.parallel_tool_calls ? -1 : 1 ;
2342+ // Mirror the kimi_k2 structure: each tool is followed by optional
2343+ // whitespace, and the trigger rule ends with an optional </assistant>
2344+ // exit marker. This lets the (lazy) trigger rule COMPLETE once the model
2345+ // emits the turn-end, so the grammar disengages instead of offering
2346+ // another <tool_call> indefinitely (the parallel=true / max_calls=-1
2347+ // loop). Space is a suffix (not prefix) to avoid ambiguity with the exit.
2348+ auto tool_calls = p.rule (" tool-calls" ,
2349+ p.trigger_rule (" tool-call" ,
2350+ p.repeat (tool_choice + p.space (), min_calls, max_calls) +
2351+ p.optional (p.literal (" </assistant>" ))));
2352+
2353+ auto content_before_tools = p.content (p.until (CALL_START ));
2354+
2355+ // After the tool call(s) the model may emit trailing text (Laguna is
2356+ // wrapper-less, so a repetitive model can ramble past the final call).
2357+ // Absorb anything up to the turn-end as content so a stray trailing
2358+ // fragment does not fail the whole parse — mirrors the reference
2359+ // implementation, which extracts the calls and ignores the remainder.
2360+ auto trailing = p.content (p.until_one_of ({ " </assistant>" }));
2361+
2362+ return head + content_before_tools + tool_calls + trailing +
2363+ p.optional (p.literal (" </assistant>" )) + p.space () + end;
2364+ });
2365+
2366+ data.parser = parser.save ();
2367+
2368+ if (include_grammar) {
2369+ data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO ;
2370+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
2371+ foreach_function (inputs.tools , [&](const json & tool) {
2372+ const auto & function = tool.at (" function" );
2373+ auto schema = function.at (" parameters" );
2374+ builder.resolve_refs (schema);
2375+ });
2376+ parser.build_grammar (builder, data.grammar_lazy );
2377+ });
2378+ data.grammar_triggers = {
2379+ { COMMON_GRAMMAR_TRIGGER_TYPE_WORD , " <tool_call>" }
2380+ };
2381+ }
2382+
2383+ return data;
2384+ }
2385+
22012386std::optional<common_chat_params> common_chat_try_specialized_template (
22022387 const common_chat_template & tmpl,
22032388 const std::string & src,
@@ -2210,6 +2395,16 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
22102395 return common_chat_params_init_ministral_3 (tmpl, params);
22112396 }
22122397
2398+ // Laguna (poolside) — GLM-4-MoE-style tool calls with <arg_key>/<arg_value>
2399+ // pairs, plus <assistant>/</assistant> role tags (distinct from GLM's
2400+ // <|assistant|>). Covers both Laguna-XS.2 and Laguna-M.1.
2401+ if (src.find (" <arg_key>" ) != std::string::npos &&
2402+ src.find (" <arg_value>" ) != std::string::npos &&
2403+ src.find (" </assistant>" ) != std::string::npos) {
2404+ LOG_DBG (" Using specialized template: Laguna\n " );
2405+ return common_chat_params_init_laguna (tmpl, params);
2406+ }
2407+
22132408 // GPT-OSS - has unique channel-based structure that needs dedicated handler
22142409 if (src.find (" <|channel|>" ) != std::string::npos) {
22152410 LOG_DBG (" Using specialized template: GPT-OSS\n " );
0 commit comments