Skip to content

Commit dac4002

Browse files
Merge pull request #515 from janhq/update-dev-from-master-2026-05-14-01-09
Sync master with upstream release b9140
2 parents 6b10873 + 1e4579f commit dac4002

31 files changed

Lines changed: 2864 additions & 2622 deletions

File tree

.github/workflows/code-style.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Code Style Checker
2+
3+
on:
4+
workflow_dispatch: # allows manual triggering
5+
push:
6+
branches:
7+
- master
8+
pull_request:
9+
branches:
10+
- master
11+
12+
concurrency:
13+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
14+
cancel-in-progress: true
15+
16+
jobs:
17+
model-naming:
18+
runs-on: ubuntu-slim
19+
steps:
20+
- uses: actions/checkout@v6
21+
- name: Check model naming conventions
22+
run: |
23+
python3 - << 'EOF'
24+
import re, os, sys
25+
26+
pairs = re.findall(
27+
r'case\s+(LLM_ARCH_\w+)\s*:\s*\n\s+return new (llama_model_\w+)\s*\(',
28+
open("src/llama-model.cpp").read())
29+
30+
errors = []
31+
for arch, cls in pairs:
32+
suffix = arch[len("LLM_ARCH_"):]
33+
csuffix = cls[len("llama_model_"):]
34+
fname = csuffix.replace("_", "-") + ".cpp"
35+
36+
if not re.fullmatch(r'[A-Z][A-Z0-9_]*', suffix):
37+
errors.append(f"{arch}: suffix not upper snake case, example: LLM_ARCH_MY_MODEL")
38+
39+
if not re.fullmatch(r'[a-z][a-z0-9_]*', csuffix):
40+
errors.append(f"{arch}: class suffix not lower snake case, example: llama_model_my_model")
41+
42+
elif suffix.lower() != csuffix:
43+
errors.append(f"{arch}: arch/class name mismatch, expected class 'llama_model_{suffix.lower()}' but got '{cls}'")
44+
45+
elif not os.path.isfile(f"src/models/{fname}"):
46+
errors.append(f"{arch}: expects model file name to be src/models/{fname}, but not found")
47+
48+
if errors:
49+
print('\n'.join(f" - {e}" for e in errors)); sys.exit(1)
50+
print(f"OK: {len(pairs)} mappings validated.")
51+
EOF

.github/workflows/editorconfig.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@ name: EditorConfig Checker
22

33
on:
44
workflow_dispatch: # allows manual triggering
5-
inputs:
6-
create_release:
7-
description: 'Create new release'
8-
required: true
9-
type: boolean
105
push:
116
branches:
127
- master

common/arg.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,7 @@ static handle_model_result common_params_handle_model(struct common_params_model
357357
auto download_result = common_download_model(model, opts, true);
358358

359359
if (download_result.model_path.empty()) {
360-
LOG_ERR("error: failed to download model from Hugging Face\n");
361-
exit(1);
360+
throw std::runtime_error("failed to download model from Hugging Face");
362361
}
363362

364363
model.name = model.hf_repo;
@@ -380,8 +379,7 @@ static handle_model_result common_params_handle_model(struct common_params_model
380379
opts.offline = offline;
381380
auto download_result = common_download_model(model, opts);
382381
if (download_result.model_path.empty()) {
383-
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
384-
exit(1);
382+
throw std::runtime_error("failed to download model from " + model.url);
385383
}
386384
}
387385

@@ -2223,7 +2221,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22232221
if (llama_supports_rpc()) {
22242222
add_opt(common_arg(
22252223
{"--rpc"}, "SERVERS",
2226-
"comma separated list of RPC servers (host:port)",
2224+
"comma-separated list of RPC servers (host:port)",
22272225
[](common_params & params, const std::string & value) {
22282226
add_rpc_devices(value);
22292227
GGML_UNUSED(params);
@@ -3555,7 +3553,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
35553553
).set_spec().set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_SPEC_DRAFT_MODEL"));
35563554
add_opt(common_arg(
35573555
{"--spec-type"}, common_speculative_all_types_str(),
3558-
string_format("type of speculative decoding to use when no draft model is provided (default: %s)\n",
3556+
string_format("comma-separated list of types of speculative decoding to use (default: %s)\n",
35593557
common_speculative_type_name_str(params.speculative.types).c_str()),
35603558
[](common_params & params, const std::string & value) {
35613559
const auto enabled_types = string_split<std::string>(value, ',');

common/common.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ enum common_params_sampling_config : uint64_t {
157157

158158
enum common_speculative_type {
159159
COMMON_SPECULATIVE_TYPE_NONE, // no speculative decoding
160-
COMMON_SPECULATIVE_TYPE_DRAFT, // draft model
161-
COMMON_SPECULATIVE_TYPE_EAGLE3, // eagle draft model
162-
COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE, // simple self-speculative decoding
160+
COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE, // standalone draft model speculative decoding
161+
COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3, // Eagle3 speculative decoding
162+
COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE, // simple self-speculative decoding based on n-grams
163163
COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K, // self-speculative decoding with n-gram keys only
164164
COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V, // self-speculative decoding with n-gram keys and 4 m-gram values
165165
COMMON_SPECULATIVE_TYPE_NGRAM_MOD,
@@ -342,6 +342,7 @@ struct common_params_speculative_ngram_cache {
342342
struct common_params_speculative {
343343
std::vector<enum common_speculative_type> types = { COMMON_SPECULATIVE_TYPE_NONE };
344344

345+
// used by Simple, MTP, Eagle3, etc. - all methods that require some kind of draft model
345346
common_params_speculative_draft draft;
346347

347348
common_params_speculative_ngram_mod ngram_mod;

common/speculative.cpp

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121

2222
const std::map<std::string, common_speculative_type> common_speculative_type_from_name_map = {
2323
{"none", COMMON_SPECULATIVE_TYPE_NONE},
24-
{"draft", COMMON_SPECULATIVE_TYPE_DRAFT},
25-
{"eagle3", COMMON_SPECULATIVE_TYPE_EAGLE3},
24+
{"draft-simple", COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE},
25+
{"draft-eagle3", COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3},
2626
{"ngram-simple", COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE},
2727
{"ngram-map-k", COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K},
2828
{"ngram-map-k4v", COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V},
@@ -145,15 +145,15 @@ struct common_speculative_impl {
145145
virtual void accept(llama_seq_id seq_id, uint16_t n_accepted) = 0;
146146
};
147147

148-
struct common_speculative_state_draft : public common_speculative_impl {
148+
struct common_speculative_impl_draft_simple : public common_speculative_impl {
149149
common_params_speculative_draft params;
150150

151151
llama_batch batch;
152152

153153
std::vector<common_sampler_ptr> smpls;
154154

155-
common_speculative_state_draft(const common_params_speculative & params, uint32_t n_seq)
156-
: common_speculative_impl(COMMON_SPECULATIVE_TYPE_DRAFT, n_seq)
155+
common_speculative_impl_draft_simple(const common_params_speculative & params, uint32_t n_seq)
156+
: common_speculative_impl(COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE, n_seq)
157157
, params(params.draft)
158158
{
159159
auto * ctx_dft = this->params.ctx_dft;
@@ -206,7 +206,7 @@ struct common_speculative_state_draft : public common_speculative_impl {
206206
}
207207
}
208208

209-
~common_speculative_state_draft() override {
209+
~common_speculative_impl_draft_simple() override {
210210
llama_batch_free(batch);
211211
}
212212

@@ -340,11 +340,11 @@ struct common_speculative_state_draft : public common_speculative_impl {
340340
}
341341
};
342342

343-
struct common_speculative_state_eagle3 : public common_speculative_impl {
343+
struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
344344
//common_params_speculative_eagle3 params;
345345

346-
common_speculative_state_eagle3(const common_params_speculative & /*params*/, uint32_t n_seq)
347-
: common_speculative_impl(COMMON_SPECULATIVE_TYPE_EAGLE3, n_seq) {}
346+
common_speculative_impl_draft_eagle3(const common_params_speculative & /*params*/, uint32_t n_seq)
347+
: common_speculative_impl(COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3, n_seq) {}
348348

349349
void begin(llama_seq_id /*seq_id*/, const llama_tokens & /*prompt*/) override {
350350
// noop
@@ -365,13 +365,13 @@ struct common_speculative_state_eagle3 : public common_speculative_impl {
365365
};
366366

367367
// state of self-speculation (simple implementation, not ngram-map)
368-
struct common_speculative_state_ngram_simple : public common_speculative_impl {
368+
struct common_speculative_impl_ngram_simple : public common_speculative_impl {
369369
common_params_speculative_ngram_map params;
370370

371371
// shared across all sequences
372372
common_ngram_simple_config config;
373373

374-
common_speculative_state_ngram_simple(
374+
common_speculative_impl_ngram_simple(
375375
const common_params_speculative & params, uint32_t n_seq,
376376
common_ngram_simple_config config)
377377
: common_speculative_impl(COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE, n_seq)
@@ -405,13 +405,13 @@ struct common_speculative_state_ngram_simple : public common_speculative_impl {
405405
}
406406
};
407407

408-
struct common_speculative_state_ngram_map_k : public common_speculative_impl {
408+
struct common_speculative_impl_ngram_map_k : public common_speculative_impl {
409409
common_params_speculative_ngram_map params;
410410

411411
// n_seq configs
412412
std::vector<common_ngram_map> config;
413413

414-
common_speculative_state_ngram_map_k(
414+
common_speculative_impl_ngram_map_k(
415415
const common_params_speculative & params,
416416
const common_ngram_map & config,
417417
uint32_t n_seq)
@@ -453,7 +453,7 @@ struct common_speculative_state_ngram_map_k : public common_speculative_impl {
453453
}
454454
};
455455

456-
struct common_speculative_state_ngram_mod : public common_speculative_impl {
456+
struct common_speculative_impl_ngram_mod : public common_speculative_impl {
457457
common_params_speculative_ngram_mod params;
458458

459459
// shared across all sequences
@@ -475,7 +475,7 @@ struct common_speculative_state_ngram_mod : public common_speculative_impl {
475475

476476
std::vector<seq_info> sinfos;
477477

478-
common_speculative_state_ngram_mod(
478+
common_speculative_impl_ngram_mod(
479479
const common_params_speculative & params,
480480
uint32_t n_seq)
481481
: common_speculative_impl(COMMON_SPECULATIVE_TYPE_NGRAM_MOD, n_seq)
@@ -621,7 +621,7 @@ struct common_speculative_state_ngram_mod : public common_speculative_impl {
621621
}
622622
};
623623

624-
struct common_speculative_state_ngram_cache : public common_speculative_impl {
624+
struct common_speculative_impl_ngram_cache : public common_speculative_impl {
625625
common_params_speculative_ngram_cache params;
626626

627627
uint16_t n_draft;
@@ -639,7 +639,7 @@ struct common_speculative_state_ngram_cache : public common_speculative_impl {
639639

640640
std::vector<seq_info> sinfos;
641641

642-
common_speculative_state_ngram_cache(
642+
common_speculative_impl_ngram_cache(
643643
const common_params_speculative & params,
644644
uint32_t n_seq,
645645
uint16_t n_draft,
@@ -775,7 +775,7 @@ static common_ngram_map get_common_ngram_map(
775775
return common_ngram_map(size_key, size_value, key_only, min_hits);
776776
}
777777

778-
static common_speculative_state_ngram_cache create_state_ngram_cache(
778+
static common_speculative_impl_ngram_cache create_state_ngram_cache(
779779
const common_speculative_config & config,
780780
uint32_t n_seq,
781781
const std::string & path_static,
@@ -786,7 +786,7 @@ static common_speculative_state_ngram_cache create_state_ngram_cache(
786786
bool save_static = false;
787787
bool save_dynamic = false;
788788

789-
common_speculative_state_ngram_cache state(config.params, n_seq, n_draft, path_static, path_dynamic, save_static, save_dynamic);
789+
common_speculative_impl_ngram_cache state(config.params, n_seq, n_draft, path_static, path_dynamic, save_static, save_dynamic);
790790

791791
return state;
792792
}
@@ -818,8 +818,8 @@ const char * common_speculative_all_types_str() {
818818
std::string common_speculative_type_to_str(common_speculative_type type) {
819819
switch (type) {
820820
case COMMON_SPECULATIVE_TYPE_NONE: return "none";
821-
case COMMON_SPECULATIVE_TYPE_DRAFT: return "draft";
822-
case COMMON_SPECULATIVE_TYPE_EAGLE3: return "eagle3";
821+
case COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE: return "draft-simple";
822+
case COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3: return "draft-eagle3";
823823
case COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE: return "ngram-simple";
824824
case COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K: return "ngram-map-k";
825825
case COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V: return "ngram-map-k4v";
@@ -872,9 +872,9 @@ common_speculative * common_speculative_init(common_params_speculative & params,
872872
{
873873
uint32_t enabled_configs = common_get_enabled_speculative_configs(params.types);
874874

875-
bool has_draft = (enabled_configs & (1u << COMMON_SPECULATIVE_TYPE_DRAFT));
876-
bool has_draft_model = !params.draft.mparams.path.empty();
875+
bool has_draft_model_path = !params.draft.mparams.path.empty();
877876

877+
bool has_draft_simple = (enabled_configs & (1u << COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE));
878878
// bool has_mtp = false; // TODO: add MTP here
879879
bool has_draft_eagle3 = false; // TODO PR-18039: if params.speculative.eagle3
880880

@@ -906,22 +906,22 @@ common_speculative * common_speculative_init(common_params_speculative & params,
906906
if (has_ngram_cache) {
907907
configs.push_back(common_speculative_config(COMMON_SPECULATIVE_TYPE_NGRAM_CACHE, params));
908908
}
909-
if (has_draft) {
910-
if (!has_draft_model) {
909+
if (has_draft_simple) {
910+
if (!has_draft_model_path) {
911911
LOG_WRN("%s: draft model is not specified - cannot use 'draft' type\n", __func__);
912-
has_draft = false;
912+
has_draft_simple = false;
913913
}
914-
} else if (has_draft_model) {
914+
} else if (has_draft_model_path) {
915915
LOG_WRN("%s: draft model is specified but 'draft' speculative type is not explicitly enabled - enabling it\n", __func__);
916-
has_draft = true;
916+
has_draft_simple = true;
917917
}
918918

919-
if (has_draft) {
920-
configs.push_back(common_speculative_config(COMMON_SPECULATIVE_TYPE_DRAFT, params));
919+
if (has_draft_simple) {
920+
configs.push_back(common_speculative_config(COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE, params));
921921
}
922922
// TODO: add MTP here
923923
if (has_draft_eagle3) {
924-
configs.push_back(common_speculative_config(COMMON_SPECULATIVE_TYPE_EAGLE3, params));
924+
configs.push_back(common_speculative_config(COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3, params));
925925
}
926926
}
927927

@@ -932,12 +932,12 @@ common_speculative * common_speculative_init(common_params_speculative & params,
932932
switch (config.type) {
933933
case COMMON_SPECULATIVE_TYPE_NONE:
934934
break;
935-
case COMMON_SPECULATIVE_TYPE_DRAFT: {
936-
impls.push_back(std::make_unique<common_speculative_state_draft>(config.params, n_seq));
935+
case COMMON_SPECULATIVE_TYPE_DRAFT_SIMPLE: {
936+
impls.push_back(std::make_unique<common_speculative_impl_draft_simple>(config.params, n_seq));
937937
break;
938938
}
939-
case COMMON_SPECULATIVE_TYPE_EAGLE3: {
940-
impls.push_back(std::make_unique<common_speculative_state_eagle3>(config.params, n_seq));
939+
case COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3: {
940+
impls.push_back(std::make_unique<common_speculative_impl_draft_eagle3>(config.params, n_seq));
941941
break;
942942
}
943943
case COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE: {
@@ -950,7 +950,7 @@ common_speculative * common_speculative_init(common_params_speculative & params,
950950
/* .size_ngram = */ ngram_size_key,
951951
/* .size_mgram = */ mgram_size_value
952952
};
953-
auto state = std::make_unique<common_speculative_state_ngram_simple>(
953+
auto state = std::make_unique<common_speculative_impl_ngram_simple>(
954954
/* .params = */ config.params,
955955
/* .n_seq = */ n_seq,
956956
/* .state = */ config_simple
@@ -961,21 +961,21 @@ common_speculative * common_speculative_init(common_params_speculative & params,
961961
case COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K:
962962
case COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V: {
963963
impls.push_back(
964-
std::make_unique<common_speculative_state_ngram_map_k>(
964+
std::make_unique<common_speculative_impl_ngram_map_k>(
965965
config.params, get_common_ngram_map(config.type, config.params.ngram_map_k), n_seq));
966966
break;
967967
}
968968
case COMMON_SPECULATIVE_TYPE_NGRAM_MOD: {
969969
impls.push_back(
970-
std::make_unique<common_speculative_state_ngram_mod>(config.params, n_seq));
970+
std::make_unique<common_speculative_impl_ngram_mod>(config.params, n_seq));
971971
break;
972972
}
973973
case COMMON_SPECULATIVE_TYPE_NGRAM_CACHE: {
974974
auto state = create_state_ngram_cache(
975975
config, n_seq,
976976
params.ngram_cache.lookup_cache_static,
977977
params.ngram_cache.lookup_cache_dynamic);
978-
impls.push_back(std::make_unique<common_speculative_state_ngram_cache>(state));
978+
impls.push_back(std::make_unique<common_speculative_impl_ngram_cache>(state));
979979
break;
980980
}
981981
default:

0 commit comments

Comments
 (0)