Skip to content

Commit 582a864

Browse files
committed
Merge upstream llama.cpp master
Resolutions follow upstream architecture with fork features kept separate: - hparams: adopt n_layer()/is_swa_impl/is_recr_impl refactor, re-add DFlash fields and n_layer_kv() helper - kv cache: thread upstream mem_other/share (ctx_other cell sharing) through iswa and server draft/MTP context creation; KVarN opts out of sharing - gemma4: adopt upstream nextn support using the fork need_full_h_nextn early-trim pattern (matches qwen35/qwen35moe) - server: follow upstream ggml-org#24108 (drop ON_DEVICE spec checkpoint flags), remove superseded slot_save_and_clear - test-dflash-plumbing: add whitespace-insensitive matching for formatting-fragile invariants to survive upstream reformatting
2 parents 98caf25 + d73cd07 commit 582a864

316 files changed

Lines changed: 9930 additions & 4267 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.devops/musa.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ LABEL org.opencontainers.image.created=$BUILD_DATE \
6464
org.opencontainers.image.source=$IMAGE_SOURCE
6565

6666
RUN apt-get update \
67-
&& apt-get install -y libgomp1 curl \
67+
&& apt-get install -y libgomp1 curl ffmpeg \
6868
&& apt autoremove -y \
6969
&& apt clean -y \
7070
&& rm -rf /tmp/* /var/tmp/* \

.devops/openvino.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ LABEL org.opencontainers.image.created=$BUILD_DATE \
107107
org.opencontainers.image.source=$IMAGE_SOURCE
108108

109109
RUN apt-get update \
110-
&& apt-get install -y libgomp1 libtbb12 curl wget ocl-icd-libopencl1 \
110+
&& apt-get install -y libgomp1 libtbb12 curl wget ffmpeg ocl-icd-libopencl1 \
111111
&& apt autoremove -y \
112112
&& apt clean -y \
113113
&& rm -rf /tmp/* /var/tmp/* \

.devops/zendnn.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ LABEL org.opencontainers.image.created=$BUILD_DATE \
4646
org.opencontainers.image.source=$IMAGE_SOURCE
4747

4848
RUN apt-get update \
49-
&& apt-get install -y libgomp1 libnuma1 curl \
49+
&& apt-get install -y libgomp1 libnuma1 curl ffmpeg \
5050
&& apt autoremove -y \
5151
&& apt clean -y \
5252
&& rm -rf /tmp/* /var/tmp/* \

.pi/gg/SYSTEM.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ Pull requests (PRs):
1616
- New branch names are prefixed with "gg/"
1717
- Before opening a pull request, ask the user to confirm the description
1818
- When creating a pull request, look for the repository's PR template and follow it
19-
- For the AI usage disclosure section, write "YES. llama.cpp + pi + [MODEL]"
19+
- For the AI usage disclosure section, write "YES. pi:llama.cpp/[MODEL]"
2020
- Ask the user to tell you what model was used and write it in place of [MODEL]
2121
- Always create the pull requests in draft mode
2222

2323
Commits:
24-
- On every commit that you make, include a "Assisted-by: llama.cpp:local pi" tag
24+
- On every commit that you make, include a "Assisted-by: pi:llama.cpp/[MODEL]" tag
2525
- Do not explicitly set the git author in commits - rely on the default git config
2626
- Always use `--no-gpg-sign` when committing
2727
- Never `git push` without explicit confirmation from the user

build-xcframework.sh

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,7 @@ setup_framework_structure() {
130130
# Create module map (common for all platforms)
131131
cat > ${module_path}module.modulemap << EOF
132132
framework module llama {
133-
header "llama.h"
134-
header "ggml.h"
135-
header "ggml-alloc.h"
136-
header "ggml-backend.h"
137-
header "ggml-metal.h"
138-
header "ggml-cpu.h"
139-
header "ggml-blas.h"
140-
header "gguf.h"
133+
umbrella "Headers"
141134
142135
link "c++"
143136
link framework "Accelerate"

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ add_library(${TARGET}
7979
hf-cache.cpp
8080
hf-cache.h
8181
http.h
82+
imatrix-loader.cpp
83+
imatrix-loader.h
8284
json-partial.cpp
8385
json-partial.h
8486
json-schema-to-grammar.cpp

common/arg.cpp

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,14 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
515515
opts.skip_download = params.skip_download;
516516
opts.download_mtp = spec_type_draft_mtp;
517517
opts.download_dflash = spec_type_dflash;
518-
opts.download_mmproj = !params.no_mmproj;
518+
opts.download_mmproj = !params.no_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty();
519+
520+
// sub-models (draft, mmproj, vocoder) are explicitly specified by the user,
521+
// so we should not auto-discover mtp/mmproj/dflash siblings for them
522+
common_download_opts sub_opts = opts;
523+
sub_opts.download_mtp = false;
524+
sub_opts.download_mmproj = false;
525+
sub_opts.download_dflash = false;
519526

520527
try {
521528
auto res = common_params_handle_model(params.model, opts);
@@ -528,7 +535,7 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
528535
// only download mmproj if the current example is using it
529536
for (const auto & ex : mmproj_examples) {
530537
if (curr_ex == ex) {
531-
common_params_handle_model(params.mmproj, opts);
538+
common_params_handle_model(params.mmproj, sub_opts);
532539
break;
533540
}
534541
}
@@ -547,8 +554,8 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
547554
params.speculative.draft.mparams.url.empty()) {
548555
params.speculative.draft.mparams.path = res.dflash.path;
549556
}
550-
common_params_handle_model(params.speculative.draft.mparams, opts);
551-
common_params_handle_model(params.vocoder.model, opts);
557+
common_params_handle_model(params.speculative.draft.mparams, sub_opts);
558+
common_params_handle_model(params.vocoder.model, sub_opts);
552559
return true;
553560
} catch (const common_skip_download_exception &) {
554561
return false;
@@ -1578,7 +1585,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
15781585
add_opt(common_arg(
15791586
{"--cache-idle-slots"},
15801587
{"--no-cache-idle-slots"},
1581-
"save and clear idle slots on new task (default: enabled, requires unified KV and cache-ram)",
1588+
"save idle slots to the prompt cache on new task, and clear them when using unified KV (default: enabled, requires cache-ram)",
15821589
[](common_params & params, bool value) {
15831590
params.cache_idle_slots = value;
15841591
}
@@ -1840,7 +1847,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
18401847
string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()),
18411848
[](common_params & params, const std::string & value) {
18421849
const auto sampler_names = string_split<std::string>(value, ';');
1843-
params.sampling.samplers = common_sampler_types_from_names(sampler_names, true);
1850+
params.sampling.samplers = common_sampler_types_from_names(sampler_names);
18441851
params.sampling.user_sampling_config |= common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS;
18451852
}
18461853
).set_sampling());
@@ -2454,8 +2461,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
24542461
}
24552462
).set_examples({LLAMA_EXAMPLE_SERVER}));
24562463
add_opt(common_arg(
2457-
{"--image", "--audio"}, "FILE",
2458-
"path to an image or audio file. use with multimodal models, use comma-separated values for multiple files\n",
2464+
{"--image", "--audio", "--video"}, "FILE",
2465+
"path to an image, audio, or video file. use with multimodal models, use comma-separated values for multiple files\n",
24592466
[](common_params & params, const std::string & value) {
24602467
for (const auto & item : parse_csv_row(value)) {
24612468
params.image.emplace_back(item);
@@ -3616,6 +3623,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
36163623
common_log_set_file(common_log_main(), value.c_str());
36173624
}
36183625
).set_env("LLAMA_ARG_LOG_FILE"));
3626+
add_opt(common_arg(
3627+
{"--log-prompts-dir"}, "PATH",
3628+
"Log prompts to directory (only used for debugging, default: disabled)",
3629+
[](common_params & params, const std::string & value) {
3630+
params.path_prompts_log_dir = value;
3631+
}
3632+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
36193633
add_opt(common_arg(
36203634
{"--log-colors"}, "[on|off|auto]",
36213635
"Set colored logging ('on', 'off', or 'auto', default: 'auto')\n"

common/chat-peg-parser.cpp

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ static std::string normalize_quotes_to_json(const std::string & input) {
8787
bool in_single_quoted = false;
8888
bool in_double_quoted = false;
8989

90+
auto is_word_char = [](char ch) { return std::isalnum(static_cast<unsigned char>(ch)) || ch == '_'; };
91+
9092
for (size_t i = 0; i < input.size(); ++i) {
9193
char c = input[i];
9294

@@ -151,6 +153,29 @@ static std::string normalize_quotes_to_json(const std::string & input) {
151153
in_single_quoted = true;
152154
result += '"';
153155
}
156+
} else if (!in_single_quoted && !in_double_quoted && (c == 'T' || c == 'F' || c == 'N') &&
157+
(i == 0 || !is_word_char(input[i - 1]))) {
158+
// Python literals -> JSON; prefix match keeps streamed partials monotonic.
159+
static constexpr std::pair<std::string_view, std::string_view> literals[] = {
160+
{ "True", "true" }, { "False", "false" }, { "None", "null" },
161+
};
162+
size_t n = 0;
163+
while (i + n < input.size() && is_word_char(input[i + n])) {
164+
++n;
165+
}
166+
std::string_view token(input.data() + i, n);
167+
bool matched = false;
168+
for (const auto & [py, js] : literals) {
169+
if (py.substr(0, n) == token) {
170+
result += js.substr(0, n);
171+
i += n - 1;
172+
matched = true;
173+
break;
174+
}
175+
}
176+
if (!matched) {
177+
result += c;
178+
}
154179
} else {
155180
result += c;
156181
}
@@ -353,12 +378,8 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
353378
}
354379
value_to_add += escape_json_string_inner(value_content);
355380
} else if (!value_content.empty()) {
356-
// For potential containers, normalize Python-style single quotes to JSON double quotes
357-
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
358-
if (is_potential_container) {
359-
value_content = normalize_container_value(value_content);
360-
}
361-
value_to_add += value_content;
381+
// Pythonic scalars/containers -> JSON.
382+
value_to_add += normalize_container_value(value_content);
362383
}
363384

364385
args_target() += value_to_add;
@@ -466,11 +487,34 @@ common_peg_parser common_chat_peg_builder::standard_constructed_tools(
466487
return force_tool_calls ? section : optional(section);
467488
}
468489

490+
// Like python_value(), but the leaf also accepts JSON-cased true/false/null, used by LFM2/LFM2.5
491+
common_peg_parser common_chat_peg_builder::python_or_json_value() {
492+
return rule("python-or-json-value", [this]() {
493+
auto ws = space();
494+
auto value = python_or_json_value();
495+
496+
auto member = sequence({ python_string(), ws, literal(":"), ws, value });
497+
auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
498+
auto dict = rule("python-or-json-dict", [&]() {
499+
return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }), ws });
500+
});
501+
502+
auto elements = sequence({ value, zero_or_more(sequence({ literal(","), ws, value })) });
503+
auto array = rule("python-or-json-array", [&]() {
504+
return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }), ws });
505+
});
506+
507+
return choice({ dict, array, python_string(), python_number(),
508+
python_bool(), python_null(), json_bool(), json_null() });
509+
});
510+
}
511+
469512
// Python-style tool calls: name(arg1="value1", arg2=123)
470513
// Used only by LFM2 for now, so we don't merge it into autoparser
471514
common_peg_parser common_chat_peg_builder::python_style_tool_calls(
472515
const ordered_json & tools,
473-
bool parallel_tool_calls) {
516+
bool parallel_tool_calls,
517+
bool allow_json_literals) {
474518
if (!tools.is_array() || tools.empty()) {
475519
return eps();
476520
}
@@ -504,7 +548,7 @@ common_peg_parser common_chat_peg_builder::python_style_tool_calls(
504548
if (is_string_type) {
505549
arg_value_parser = string_value_parser;
506550
} else {
507-
arg_value_parser = tool_arg_value(python_value());
551+
arg_value_parser = tool_arg_value(allow_json_literals ? python_or_json_value() : python_value());
508552
}
509553

510554
// Full argument: name="value" or name=value

common/chat-peg-parser.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,13 @@ class common_chat_peg_builder : public common_peg_parser_builder {
132132
// Helper for Python-style function call format: name(arg1="value1", arg2=123)
133133
// Used by LFM2 and similar templates
134134
common_peg_parser python_style_tool_calls(const nlohmann::ordered_json & tools,
135-
bool parallel_tool_calls);
135+
bool parallel_tool_calls,
136+
bool allow_json_literals);
136137

137138
private:
139+
// Python values plus JSON true/false/null.
140+
common_peg_parser python_or_json_value();
141+
138142
// Implementation helpers for standard_json_tools — one per JSON tool call layout mode
139143
common_peg_parser build_json_tools_function_is_key(const nlohmann::ordered_json & tools,
140144
const std::string & args_key,
@@ -195,4 +199,3 @@ struct tagged_peg_parser {
195199

196200
tagged_peg_parser build_tagged_peg_parser(
197201
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
198-

0 commit comments

Comments
 (0)