Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pi/gg/SYSTEM.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ General:
- By very precise and concise when writing code, comments, explanations, etc.
- PR and commit titles format: `<module> : <title>`. Lookup recents for examples
- Don't try to build or run the code unless you are explicitly asked to do so
- Use the `gh` CLI tool when querying PRs, issues, or other GitHub resources

Coding:
- When in doubt, always refer to the CONTRIBUTING.md file of the project
Expand Down
9 changes: 6 additions & 3 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2864,7 +2864,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
{"--tools"}, "TOOL1,TOOL2,...",
"experimental: whether to enable built-in tools for AI agents - do not enable in untrusted environments (default: no tools)\n"
"specify \"all\" to enable all tools\n"
"available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff",
"available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff, get_datetime",
[](common_params & params, const std::string & value) {
params.server_tools = parse_csv_row(value);
}
Expand Down Expand Up @@ -3380,7 +3380,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_spec().set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
add_opt(common_arg(
{"--spec-draft-poll", "--poll-draft"}, "<0|1>",
"Use polling to wait for draft model work (default: same as --poll])",
"Use polling to wait for draft model work (default: same as --poll)",
[](common_params & params, int value) {
params.speculative.draft.cpuparams.poll = value;
}
Expand Down Expand Up @@ -3794,7 +3794,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
add_opt(common_arg(
{"--diffusion-algorithm"}, "N",
string_format("diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)", params.diffusion.algorithm),
string_format(
"diffusion algorithm: 0=DIFFUSION_ALGORITHM_ORIGIN, 1=DIFFUSION_ALGORITHM_ENTROPY_BASED, "
"2=DIFFUSION_ALGORITHM_MARGIN_BASED, 3=DIFFUSION_ALGORITHM_RANDOM, "
"4=DIFFUSION_ALGORITHM_CONFIDENCE_BASED (default: %d)", params.diffusion.algorithm),
[](common_params & params, int value) { params.diffusion.algorithm = value; }
).set_examples({ LLAMA_EXAMPLE_DIFFUSION }));
add_opt(common_arg(
Expand Down
16 changes: 5 additions & 11 deletions common/chat-auto-parser-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ common_peg_parser analyze_reasoning::build_parser(parser_build_context & ctx) co
if (!end.empty()) {
if (!start.empty()) {
// Standard tag-based: optional(<think>reasoning</think>)
return p.optional(start + p.reasoning(p.until(end)) + end + p.space());
return p.optional(p.optspace(start) + p.reasoning(p.until(trim_whitespace(end))) + p.optspace(end));
}
// Delimiter-style (empty start)
return p.optional(p.reasoning(p.until(end)) + end + p.space());
return p.optional(p.reasoning(p.until(trim_whitespace(end))) + p.optspace(end));
}
}

Expand Down Expand Up @@ -186,7 +186,6 @@ common_peg_parser analyze_tools::build_parser(parser_build_context & ctx) const
common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_context & ctx) const {
auto & p = ctx.p;
const auto & inputs = ctx.inputs;
bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;

// Build effective field names with dot notation if function_field is set
std::string name_field = format.name_field;
Expand Down Expand Up @@ -225,8 +224,7 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
tool_start = format.per_call_start;
}

return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(p.until(tool_start)))) + tools_parser +
p.end();
return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
}

common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name,
Expand Down Expand Up @@ -270,7 +268,6 @@ common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p,
common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context & ctx) const {
auto & p = ctx.p;
const auto & inputs = ctx.inputs;
bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;

common_peg_parser tool_choice = p.choice();

Expand Down Expand Up @@ -336,14 +333,12 @@ common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context

std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
p.end();
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}

common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
auto & p = ctx.p;
const auto & inputs = ctx.inputs;
bool force_tools = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED;

auto until_suffix = p.rule("until-suffix", p.until(arguments.value_suffix));

Expand Down Expand Up @@ -471,8 +466,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte

std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
return ctx.reasoning_parser + (force_tools ? p.eps() : p.optional(p.content(content_before_tools))) + tool_calls +
p.end();
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
}

} // namespace autoparser
8 changes: 4 additions & 4 deletions common/chat-diff-analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ void analyze_reasoning::compare_thinking_enabled() {
if (left_trimmed.empty() && !diff.right.empty()) {
if (!right_trimmed.empty() && string_ends_with(comparison->output_B, right_trimmed)) {
if (start.empty()) {
start = trim_leading_whitespace(diff.right);
start = diff.right;
mode = reasoning_mode::TAG_BASED;
}
}
Expand All @@ -353,7 +353,7 @@ void analyze_reasoning::compare_thinking_enabled() {
if (seg.size() >= 2 && seg[seg.size() - 1].value == left_trimmed && seg[seg.size() - 2].type == segment_type::MARKER) {
start = seg[seg.size() - 2].value;
}
end = trim_trailing_whitespace(diff.left);
end = diff.left;
mode = reasoning_mode::TAG_BASED;
}
}
Expand Down Expand Up @@ -445,14 +445,14 @@ void analyze_reasoning::compare_reasoning_scope() {
auto result = parser_wrapped.parse_anywhere_and_extract(comparison->output_B);
if (result.result.success()) {
start = result.tags["pre"];
end = trim_trailing_whitespace(result.tags["post"]);
end = result.tags["post"];
} else {
auto parser_delimiter = build_tagged_peg_parser([&](common_peg_parser_builder &p) {
return p.literal(reasoning_content) + p.space() + p.optional(p.tag("post", (p.marker() + p.space())));
});
result = parser_delimiter.parse_anywhere_and_extract(comparison->output_B);
if (result.result.success()) {
end = trim_trailing_whitespace(result.tags["post"]);
end = result.tags["post"];
} else {
LOG_DBG(ANSI_ORANGE "%s: Unable to extract reasoning markers, falling back to reasoning = NONE\n" ANSI_RESET, __func__);
mode = reasoning_mode::NONE;
Expand Down
26 changes: 26 additions & 0 deletions common/chat-peg-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,32 @@ common_peg_parser common_chat_peg_builder::prefix(const std::string & s, const s
return literal(s.substr(0, s.rfind(delimiter)));
}

common_peg_parser common_chat_peg_builder::optspace(const std::string & tag) {
auto parser = eps();
size_t end_of_prefix_space = tag.size();
size_t start_of_suffix_space = tag.size();
for (size_t i = 0; i < tag.size(); i++) {
if (!std::isspace(tag[i])) {
end_of_prefix_space = i;
break;
}
}
for (size_t i = tag.size(); i > 0; i--) {
if (!std::isspace(tag[i - 1])) {
start_of_suffix_space = i;
break;
}
}
for (size_t i = 0; i < end_of_prefix_space; i++) {
parser += optional(literal(std::string(1, tag[i])));
}
parser += literal(tag.substr(end_of_prefix_space, start_of_suffix_space - end_of_prefix_space));
for (size_t i = start_of_suffix_space; i < tag.size(); i++) {
parser += optional(literal(std::string(1, tag[i])));
}
return parser;
}

common_peg_parser common_chat_peg_builder::standard_json_tools(
const std::string & section_start,
const std::string & section_end,
Expand Down
3 changes: 3 additions & 0 deletions common/chat-peg-parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ class common_chat_peg_builder : public common_peg_parser_builder {
// Return a parser that parses the prefix of a string, up to a given delimiter.
common_peg_parser prefix(const std::string & s, const std::string & delimiter = {});

// Return a parser that parses all elements of tag, but leading and trailing spaces are optional
common_peg_parser optspace(const std::string & tag);

// Legacy-compatible helper for building standard JSON tool calls
// Used by tests and manual parsers
// name_key/args_key: JSON key names for function name and arguments
Expand Down
49 changes: 29 additions & 20 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2116,22 +2116,38 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
return std::nullopt;
}

static std::string common_chat_templates_generation_prompt(const common_chat_template & tmpl, const autoparser::generation_params & inputs) {
autoparser::generation_params params = inputs;
params.add_generation_prompt = false;
std::string no_gen_prompt = common_chat_template_direct_apply_impl(tmpl, params);
params.add_generation_prompt = true;
std::string gen_prompt = common_chat_template_direct_apply_impl(tmpl, params);

size_t prefix_len = 0;
size_t min_size = std::min(no_gen_prompt.size(), gen_prompt.size());
while (prefix_len < min_size && no_gen_prompt[prefix_len] == gen_prompt[prefix_len]) {
prefix_len++;
}
return gen_prompt.substr(prefix_len);
}

static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls,
const struct common_chat_templates_inputs & inputs) {
autoparser::generation_params params;
params.tools = common_chat_tools_to_json_oaicompat(inputs.tools);
const auto & tmpl =
params.tools.is_array() && tmpls->template_tool_use ? *tmpls->template_tool_use : *tmpls->template_default;
const auto & src = tmpl.source();
const auto & caps = tmpl.original_caps();
params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
params.tool_choice = inputs.tool_choice;
params.reasoning_format = inputs.reasoning_format;
params.enable_thinking = inputs.enable_thinking;
params.grammar = inputs.grammar;
params.now = inputs.now;
params.add_bos = tmpls->add_bos;
params.add_eos = tmpls->add_eos;
const auto & src = tmpl.source();
const auto & caps = tmpl.original_caps();
params.messages = render_message_to_json(inputs.messages, tmpl.original_caps());
params.tool_choice = inputs.tool_choice;
params.reasoning_format = inputs.reasoning_format;
params.enable_thinking = inputs.enable_thinking;
params.grammar = inputs.grammar;
params.now = inputs.now;
params.add_generation_prompt = inputs.add_generation_prompt;
params.add_bos = tmpls->add_bos;
params.add_eos = tmpls->add_eos;

if (src.find("<|channel|>") == std::string::npos) {
// map developer to system for all models except for GPT-OSS
Expand All @@ -2153,14 +2169,7 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
workaround::func_args_not_string(params.messages);
}

params.add_generation_prompt = false;
std::string no_gen_prompt = common_chat_template_direct_apply_impl(tmpl, params);
params.add_generation_prompt = true;
std::string gen_prompt = common_chat_template_direct_apply_impl(tmpl, params);
auto diff = calculate_diff_split(no_gen_prompt, gen_prompt);
params.generation_prompt = diff.right + diff.suffix;

params.add_generation_prompt = inputs.add_generation_prompt;
params.generation_prompt = common_chat_templates_generation_prompt(tmpl, params);

params.extra_context = common_chat_extra_context();
for (auto el : inputs.chat_template_kwargs) {
Expand Down Expand Up @@ -2212,8 +2221,8 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
auto auto_params = autoparser::peg_generator::generate_parser(tmpl, params, autoparser);
auto_params.supports_thinking = autoparser.reasoning.mode != autoparser::reasoning_mode::NONE;
if (auto_params.supports_thinking) {
auto_params.thinking_start_tag = autoparser.reasoning.start;
auto_params.thinking_end_tag = autoparser.reasoning.end;
auto_params.thinking_start_tag = trim_whitespace(autoparser.reasoning.start);
auto_params.thinking_end_tag = trim_whitespace(autoparser.reasoning.end);
}
auto_params.generation_prompt = params.generation_prompt;
common_peg_arena arena;
Expand Down
2 changes: 2 additions & 0 deletions common/reasoning-budget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ static void common_reasoning_budget_apply(struct llama_sampler * smpl, llama_tok
for (size_t i = 0; i < cur_p->size; i++) {
if (cur_p->data[i].id != forced) {
cur_p->data[i].logit = -INFINITY;
} else {
cur_p->data[i].logit = +INFINITY; // force the token
}
}
}
Expand Down
Loading