Skip to content

Commit bc33838

Browse files
committed
common : rename speculative.draftless_type -> speculative.type
1 parent 351e798 commit bc33838

4 files changed

Lines changed: 18 additions & 17 deletions

File tree

common/arg.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3400,18 +3400,18 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34003400
add_opt(common_arg(
34013401
{"--spec-draftless"}, "[none|ngram-cache|ngram-simple|ngram-map-k|ngram-map-k4v]",
34023402
string_format("type of speculative decoding to use when no draft model is provided (default: %s)\n",
3403-
common_speculative_type_to_str(params.speculative.draftless_type).c_str()),
3403+
common_speculative_type_to_str(params.speculative.type).c_str()),
34043404
[](common_params & params, const std::string & value) {
34053405
if (value == "none") {
3406-
params.speculative.draftless_type = COMMON_SPECULATIVE_TYPE_NONE;
3406+
params.speculative.type = COMMON_SPECULATIVE_TYPE_NONE;
34073407
} else if (value == "ngram-cache") {
3408-
params.speculative.draftless_type = COMMON_SPECULATIVE_TYPE_NGRAM_CACHE;
3408+
params.speculative.type = COMMON_SPECULATIVE_TYPE_NGRAM_CACHE;
34093409
} else if (value == "ngram-simple") {
3410-
params.speculative.draftless_type = COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE;
3410+
params.speculative.type = COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE;
34113411
} else if (value == "ngram-map-k") {
3412-
params.speculative.draftless_type = COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K;
3412+
params.speculative.type = COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K;
34133413
} else if (value == "ngram-map-k4v") {
3414-
params.speculative.draftless_type = COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V;
3414+
params.speculative.type = COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V;
34153415
} else {
34163416
throw std::invalid_argument("unknown speculative decoding type without draft model");
34173417
}

common/common.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,7 @@ struct common_params_speculative {
273273

274274
struct common_params_model model;
275275

276-
// draftless:
277-
278-
common_speculative_type draftless_type = COMMON_SPECULATIVE_TYPE_NONE; // type of speculative decoding without a draft model
276+
common_speculative_type type = COMMON_SPECULATIVE_TYPE_NONE; // type of speculative decoding
279277

280278
uint16_t ngram_size_n = 12; // ngram size for lookup
281279
uint16_t ngram_size_m = 48; // mgram size for speculative tokens

common/speculative.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,12 +355,14 @@ struct common_speculative * common_speculative_init(
355355
// Compute the implementations to use based on the config and their order of preference
356356
std::vector<common_speculative_config> configs = {}; // list of speculative configs to try
357357
{
358-
bool has_draft =!params.model.path.empty();
358+
bool has_draft = !params.model.path.empty();
359359
bool has_draft_eagle3 = false; // TODO PR-18039: if params.speculative.eagle3
360-
bool has_ngram_cache = (params.draftless_type == COMMON_SPECULATIVE_TYPE_NGRAM_CACHE);
361-
bool has_ngram_simple = (params.draftless_type == COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE);
362-
bool has_ngram_map_k = (params.draftless_type == COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K);
363-
bool has_ngram_map_k4v = (params.draftless_type == COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V);
360+
361+
bool has_ngram_cache = (params.type == COMMON_SPECULATIVE_TYPE_NGRAM_CACHE);
362+
bool has_ngram_simple = (params.type == COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE);
363+
bool has_ngram_map_k = (params.type == COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K);
364+
bool has_ngram_map_k4v = (params.type == COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V);
365+
364366
// In a more complex implementation we could use the same implementation but with different parameters.
365367
// This was initially used in PR-18471 but removed to simplify the code.
366368
if (has_ngram_simple) {

tools/server/server-task.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ json task_params::to_json(bool only_metrics) const {
7777
{"speculative.n_max", speculative.n_max},
7878
{"speculative.n_min", speculative.n_min},
7979
{"speculative.p_min", speculative.p_min},
80-
{"speculative.draftless_t", common_speculative_type_to_str(speculative.draftless_type)},
80+
{"speculative.type", common_speculative_type_to_str(speculative.type)},
8181
{"speculative.ngram_size_n", speculative.ngram_size_n},
8282
{"speculative.ngram_size_m", speculative.ngram_size_m},
8383
{"speculative.ngram_c_rate", speculative.ngram_check_rate},
@@ -141,7 +141,7 @@ json task_params::to_json(bool only_metrics) const {
141141
{"speculative.n_max", speculative.n_max},
142142
{"speculative.n_min", speculative.n_min},
143143
{"speculative.p_min", speculative.p_min},
144-
{"speculative.draftless_t", common_speculative_type_to_str(speculative.draftless_type)},
144+
{"speculative.type", common_speculative_type_to_str(speculative.type)},
145145
{"speculative.ngram_size_n", speculative.ngram_size_n},
146146
{"speculative.ngram_size_m", speculative.ngram_size_m},
147147
{"speculative.ngram_c_rate", speculative.ngram_check_rate},
@@ -253,7 +253,8 @@ task_params server_task::params_from_json_cmpl(
253253
params.speculative.n_min = std::max(params.speculative.n_min, 0);
254254
params.speculative.n_max = std::max(params.speculative.n_max, 0);
255255

256-
params.speculative.draftless_type = common_speculative_type_from_name(json_value(data, "speculative.draftless_t", common_speculative_type_to_str(defaults.speculative.draftless_type)));
256+
params.speculative.type = common_speculative_type_from_name(json_value(data, "speculative.type", common_speculative_type_to_str(defaults.speculative.type)));
257+
257258
params.speculative.ngram_size_n = json_value(data, "speculative.ngram_size_n", defaults.speculative.ngram_size_n);
258259
params.speculative.ngram_size_m = json_value(data, "speculative.ngram_size_m", defaults.speculative.ngram_size_m);
259260
params.speculative.ngram_check_rate = json_value(data, "speculative.ngram_c_rate", defaults.speculative.ngram_check_rate);

0 commit comments

Comments
 (0)