Skip to content

Commit 519f0c5

Browse files
authored
Merge pull request #30 from AtomicBot-ai/scratch/try-laguna-merge
Scratch/try laguna merge
2 parents 69e91e7 + 61ee3eb commit 519f0c5

17 files changed

Lines changed: 983 additions & 2 deletions

File tree

common/chat.cpp

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,6 +2198,191 @@ static json common_chat_extra_context() {
21982198
return ctx;
21992199
}
22002200

2201+
// Laguna (poolside) — GLM-4-MoE-style tool calls + <think> reasoning.
2202+
// Tool call wire format:
2203+
// <tool_call>{name}
2204+
// <arg_key>{k}</arg_key>
2205+
// <arg_value>{v}</arg_value>
2206+
// ...
2207+
// </tool_call>
2208+
// String-typed args are emitted raw between <arg_value>...</arg_value>; all other
2209+
// args are JSON literals. Reasoning is <think>...</think>; the turn ends with
2210+
// </assistant>. Both Laguna-XS.2 and Laguna-M.1 share this format.
2211+
static common_chat_params common_chat_params_init_laguna(const common_chat_template & tmpl,
2212+
const autoparser::generation_params & inputs) {
2213+
common_chat_params data;
2214+
2215+
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
2216+
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
2217+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
2218+
data.supports_thinking = true;
2219+
data.thinking_start_tag = "<think>";
2220+
data.thinking_end_tag = "</think>";
2221+
data.preserved_tokens = {
2222+
"<tool_call>", "</tool_call>",
2223+
"<arg_key>", "</arg_key>",
2224+
"<arg_value>", "</arg_value>",
2225+
"<think>", "</think>",
2226+
"<assistant>", "</assistant>",
2227+
};
2228+
2229+
// </assistant> ends the assistant turn. The single eot token (24) handles
2230+
// this when sampled directly, but the model occasionally emits the
2231+
// multi-token spelling; register it as a stop so generation always halts.
2232+
data.additional_stops.push_back("</assistant>");
2233+
2234+
const std::string THINK_START = "<think>";
2235+
const std::string THINK_END = "</think>";
2236+
const std::string CALL_START = "<tool_call>";
2237+
const std::string CALL_END = "</tool_call>";
2238+
const std::string KEY_START = "<arg_key>";
2239+
const std::string KEY_END = "</arg_key>";
2240+
const std::string VAL_START = "<arg_value>";
2241+
const std::string VAL_END = "</arg_value>";
2242+
2243+
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
2244+
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
2245+
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
2246+
2247+
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
2248+
auto end = p.end();
2249+
2250+
// The framework prepends data.generation_prompt to the parsed output:
2251+
// "<assistant>\n" followed by the opening <think> (thinking enabled) or
2252+
// </think> (disabled). Consume it, then capture reasoning. The model's
2253+
// output therefore starts with the reasoning text (no leading <think>).
2254+
const std::string GEN_PROMPT = "<assistant>\n";
2255+
auto head = p.eps();
2256+
if (extract_reasoning && inputs.enable_thinking) {
2257+
// The model normally closes reasoning with </think> before a tool
2258+
// call, but sometimes emits <tool_call> directly without it.
2259+
// Terminate reasoning on whichever marker comes first so the call is
2260+
// not swallowed into reasoning_content; consume </think> if present.
2261+
head = p.literal(GEN_PROMPT + THINK_START) +
2262+
p.reasoning(p.until_one_of({ THINK_END, CALL_START })) +
2263+
p.optional(p.literal(THINK_END));
2264+
} else if (extract_reasoning) {
2265+
head = p.literal(GEN_PROMPT + THINK_END);
2266+
} else {
2267+
head = p.literal(GEN_PROMPT) +
2268+
p.optional(p.literal(THINK_START)) + p.optional(p.literal(THINK_END));
2269+
}
2270+
2271+
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
2272+
return head + p.content(p.until_one_of({ "</assistant>" })) +
2273+
p.optional(p.literal("</assistant>")) + p.space() + end;
2274+
}
2275+
2276+
auto tool_choice = p.choice();
2277+
foreach_function(inputs.tools, [&](const json & tool) {
2278+
const auto & function = tool.at("function");
2279+
std::string name = function.at("name");
2280+
auto params = function.contains("parameters") ? function.at("parameters") : json::object();
2281+
const auto & props = params.contains("properties") ? params.at("properties") : json::object();
2282+
2283+
std::set<std::string> required;
2284+
if (params.contains("required")) {
2285+
params.at("required").get_to(required);
2286+
}
2287+
auto schema_info = common_schema_info();
2288+
schema_info.resolve_refs(params);
2289+
2290+
std::vector<common_peg_parser> required_parsers;
2291+
std::vector<common_peg_parser> optional_parsers;
2292+
for (const auto & [param_name, param_schema] : props.items()) {
2293+
bool is_required = required.find(param_name) != required.end();
2294+
bool is_string = schema_info.resolves_to_string(param_schema);
2295+
2296+
// <arg_key>name</arg_key>\n<arg_value>VALUE</arg_value>
2297+
auto arg = p.tool_arg(
2298+
p.tool_arg_open(
2299+
p.literal(KEY_START) + p.tool_arg_name(p.literal(param_name)) + p.literal(KEY_END) +
2300+
p.space() + p.literal(VAL_START)) +
2301+
(is_string
2302+
? p.tool_arg_string_value(p.until(VAL_END))
2303+
: p.tool_arg_json_value(p.schema(p.json(),
2304+
"tool-" + name + "-arg-" + param_name + "-schema",
2305+
param_schema, false))) +
2306+
p.tool_arg_close(p.literal(VAL_END)));
2307+
2308+
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
2309+
if (is_required) {
2310+
required_parsers.push_back(named_arg);
2311+
} else {
2312+
optional_parsers.push_back(named_arg);
2313+
}
2314+
}
2315+
2316+
common_peg_parser args_seq = p.eps();
2317+
for (size_t i = 0; i < required_parsers.size(); i++) {
2318+
if (i > 0) {
2319+
args_seq = args_seq + p.space();
2320+
}
2321+
args_seq = args_seq + required_parsers[i];
2322+
}
2323+
if (!optional_parsers.empty()) {
2324+
common_peg_parser any_opt = p.choice();
2325+
for (const auto & opt : optional_parsers) {
2326+
any_opt |= opt;
2327+
}
2328+
args_seq = args_seq + p.repeat(p.space() + any_opt, 0, -1);
2329+
}
2330+
2331+
// <tool_call>name\n {args} </tool_call>
2332+
auto func_parser = p.tool(
2333+
p.tool_open(p.literal(CALL_START) + p.tool_name(p.literal(name)) + p.literal("\n")) +
2334+
args_seq + p.space() +
2335+
p.tool_close(p.literal(CALL_END)));
2336+
2337+
tool_choice |= p.rule("tool-" + name, func_parser);
2338+
});
2339+
2340+
auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
2341+
auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
2342+
// Mirror the kimi_k2 structure: each tool is followed by optional
2343+
// whitespace, and the trigger rule ends with an optional </assistant>
2344+
// exit marker. This lets the (lazy) trigger rule COMPLETE once the model
2345+
// emits the turn-end, so the grammar disengages instead of offering
2346+
// another <tool_call> indefinitely (the parallel=true / max_calls=-1
2347+
// loop). Space is a suffix (not prefix) to avoid ambiguity with the exit.
2348+
auto tool_calls = p.rule("tool-calls",
2349+
p.trigger_rule("tool-call",
2350+
p.repeat(tool_choice + p.space(), min_calls, max_calls) +
2351+
p.optional(p.literal("</assistant>"))));
2352+
2353+
auto content_before_tools = p.content(p.until(CALL_START));
2354+
2355+
// After the tool call(s) the model may emit trailing text (Laguna is
2356+
// wrapper-less, so a repetitive model can ramble past the final call).
2357+
// Absorb anything up to the turn-end as content so a stray trailing
2358+
// fragment does not fail the whole parse — mirrors the reference
2359+
// implementation, which extracts the calls and ignores the remainder.
2360+
auto trailing = p.content(p.until_one_of({ "</assistant>" }));
2361+
2362+
return head + content_before_tools + tool_calls + trailing +
2363+
p.optional(p.literal("</assistant>")) + p.space() + end;
2364+
});
2365+
2366+
data.parser = parser.save();
2367+
2368+
if (include_grammar) {
2369+
data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
2370+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
2371+
foreach_function(inputs.tools, [&](const json & tool) {
2372+
const auto & function = tool.at("function");
2373+
auto schema = function.at("parameters");
2374+
builder.resolve_refs(schema);
2375+
});
2376+
parser.build_grammar(builder, data.grammar_lazy);
2377+
});
2378+
data.grammar_triggers = {
2379+
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "<tool_call>" }
2380+
};
2381+
}
2382+
2383+
return data;
2384+
}
2385+
22012386
std::optional<common_chat_params> common_chat_try_specialized_template(
22022387
const common_chat_template & tmpl,
22032388
const std::string & src,
@@ -2210,6 +2395,16 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
22102395
return common_chat_params_init_ministral_3(tmpl, params);
22112396
}
22122397

2398+
// Laguna (poolside) — GLM-4-MoE-style tool calls with <arg_key>/<arg_value>
2399+
// pairs, plus <assistant>/</assistant> role tags (distinct from GLM's
2400+
// <|assistant|>). Covers both Laguna-XS.2 and Laguna-M.1.
2401+
if (src.find("<arg_key>") != std::string::npos &&
2402+
src.find("<arg_value>") != std::string::npos &&
2403+
src.find("</assistant>") != std::string::npos) {
2404+
LOG_DBG("Using specialized template: Laguna\n");
2405+
return common_chat_params_init_laguna(tmpl, params);
2406+
}
2407+
22132408
// GPT-OSS - has unique channel-based structure that needs dedicated handler
22142409
if (src.find("<|channel|>") != std::string::npos) {
22152410
LOG_DBG("Using specialized template: GPT-OSS\n");

conversion/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
TEXT_MODEL_MAP: dict[str, str] = {
2020
"AfmoeForCausalLM": "afmoe",
21+
"LagunaForCausalLM": "laguna",
2122
"ApertusForCausalLM": "llama",
2223
"ArceeForCausalLM": "llama",
2324
"ArcticForCausalLM": "arctic",

conversion/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1672,6 +1672,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
16721672
if chkhsh == "9dcf830ee9990cdbf78cc523a5f7bd9ad8f3f9890c2d3581d2785ad10f07049d":
16731673
# ref: https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Base
16741674
res = "mellum2"
1675+
if chkhsh == "972da7b59cec44d1f0a490a86c96df53859e486e481563e5dddac155013d87ac":
1676+
# ref: https://huggingface.co/poolside/Laguna-XS.2
1677+
res = "laguna"
16751678

16761679
if res is None:
16771680
logger.warning("\n")

0 commit comments

Comments
 (0)