Skip to content

Commit a1584ac

Browse files
committed
server: cleanup (remove slot.batch_spec, rename)
1 parent 1e29af4 commit a1584ac

11 files changed

Lines changed: 41 additions & 51 deletions

File tree

common/arg.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -629,7 +629,8 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
629629
{
630630
bool has_draft =!params.speculative.model.path.empty();
631631
bool has_draft_eagle3 = false; // TODO PR-18039: if params.speculative.eagle3
632-
bool has_lookup_caches = !params.lookup_cache_static.empty() && !params.lookup_cache_dynamic.empty();
632+
bool has_lookup_caches = !params.speculative.lookup_cache_static.empty()
633+
&& !params.speculative.lookup_cache_dynamic.empty();
633634
bool has_simple = (params.speculative.draftless_type == COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE);
634635
bool found_config_draft = false;
635636
bool found_config_eagle3 = false;
@@ -1253,14 +1254,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
12531254
{"-lcs", "--lookup-cache-static"}, "FNAME",
12541255
"path to static lookup cache to use for lookup decoding (not updated by generation)",
12551256
[](common_params & params, const std::string & value) {
1256-
params.lookup_cache_static = value;
1257+
params.speculative.lookup_cache_static = value;
12571258
}
12581259
).set_examples({LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}));
12591260
add_opt(common_arg(
12601261
{"-lcd", "--lookup-cache-dynamic"}, "FNAME",
12611262
"path to dynamic lookup cache to use for lookup decoding (updated by generation)",
12621263
[](common_params & params, const std::string & value) {
1263-
params.lookup_cache_dynamic = value;
1264+
params.speculative.lookup_cache_dynamic = value;
12641265
}
12651266
).set_examples({LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}));
12661267
add_opt(common_arg(

common/common.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,11 +281,16 @@ struct common_params_speculative {
281281

282282
struct common_params_model model;
283283

284+
// draftless:
285+
284286
common_speculative_type draftless_type = COMMON_SPECULATIVE_TYPE_NONE; // type of speculative decoding without a draft model
285-
uint16_t spec_ngram_size_n = 12;
286-
uint16_t spec_ngram_size_m = 48;
287+
uint16_t spec_ngram_size_n = 12; // ngram size for lookup
288+
uint16_t spec_ngram_size_m = 48; // mgram size for speculative tokens
287289

288290
std::vector<common_speculative_config> configs = {}; // list of speculative configs to try
291+
292+
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
293+
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
289294
};
290295

291296
struct common_params_vocoder {
@@ -403,8 +408,6 @@ struct common_params {
403408
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
404409
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
405410
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
406-
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
407-
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
408411
std::string logits_file = ""; // file for saving *all* logits // NOLINT
409412

410413
// llama-debug specific options

common/ngram-map.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ void common_ngram_map_draft(common_ngram_map & map,
237237
map.last_draft_value_idx = slot_max; // value used for draft generation.
238238
}
239239

240-
void common_ngram_map_send_accepted(common_ngram_map & map, uint16_t n_accepted) {
240+
void common_ngram_map_accept(common_ngram_map & map, uint16_t n_accepted) {
241241
if (!map.last_draft_created) {
242242
return;
243243
}

common/ngram-map.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,5 @@ void common_ngram_map_draft(
6262
const llama_tokens & inp, llama_token sampled,
6363
llama_tokens & draft);
6464

65-
// Update the statistics of a value after a draft was accepted.
66-
void common_ngram_map_send_accepted(common_ngram_map & map, uint16_t n_accepted);
65+
// Update the statistics of a value after a draft was processed.
66+
void common_ngram_map_accept(common_ngram_map & map, uint16_t n_accepted);

common/speculative.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,12 @@ enum common_speculative_type common_speculative_type_from_name(const std::string
251251

252252

253253
struct common_speculative * common_speculative_init(
254-
struct common_params & params,
254+
struct common_params_speculative & params,
255255
struct llama_context * ctx_tgt,
256256
struct llama_context * ctx_dft
257257
) {
258258
std::vector<std::unique_ptr<common_speculative_state>> implementations = {};
259-
for (const common_speculative_config & config : params.speculative.configs) {
259+
for (const common_speculative_config & config : params.configs) {
260260
LOG_INF("common_speculative_init: adding implementation %s\n", common_speculative_type_to_str(config.type).c_str());
261261
switch (config.type) {
262262
case COMMON_SPECULATIVE_TYPE_NONE:
@@ -271,7 +271,7 @@ struct common_speculative * common_speculative_init(
271271
}
272272
case COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE: {
273273
common_ngram_map ngram_map = get_common_ngram_map(config,
274-
params.speculative.spec_ngram_size_n, params.speculative.spec_ngram_size_m);
274+
params.spec_ngram_size_n, params.spec_ngram_size_m);
275275
uint16_t ngram_size_key = ngram_map.size_key;
276276
uint16_t mgram_size_value = ngram_map.size_value;
277277
uint16_t check_rate = ngram_map.check_rate;
@@ -287,14 +287,14 @@ struct common_speculative * common_speculative_init(
287287
case COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K: {
288288
implementations.push_back(std::make_unique<common_speculative_state_ngram_map_k>(
289289
(config.type), get_common_ngram_map(config,
290-
params.speculative.spec_ngram_size_n, params.speculative.spec_ngram_size_m)
290+
params.spec_ngram_size_n, params.spec_ngram_size_m)
291291
));
292292
break;
293293
}
294294
case COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V: {
295295
implementations.push_back(std::make_unique<common_speculative_state_ngram_map_k4v>(
296296
(config.type), get_common_ngram_map(config,
297-
params.speculative.spec_ngram_size_n, params.speculative.spec_ngram_size_m)));
297+
params.spec_ngram_size_n, params.spec_ngram_size_m)));
298298
break;
299299
}
300300
case COMMON_SPECULATIVE_TYPE_NGRAM_CACHE: {
@@ -746,7 +746,7 @@ llama_tokens common_speculative_use_draft_model(
746746
return result;
747747
}
748748

749-
void common_speculative_send_accepted(struct common_speculative * spec, const uint16_t n_accepted) {
749+
void common_speculative_accept(struct common_speculative * spec, const uint16_t n_accepted) {
750750
common_speculative_state * impl = spec->curr_impl;
751751
if (impl != nullptr) {
752752
if (n_accepted > 0) {
@@ -756,7 +756,7 @@ void common_speculative_send_accepted(struct common_speculative * spec, const ui
756756
if (impl->type == COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K ||
757757
impl->type == COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V) {
758758
auto state = static_cast<struct common_speculative_state_ngram_map_k *>(impl);
759-
common_ngram_map_send_accepted(state->map, n_accepted);
759+
common_ngram_map_accept(state->map, n_accepted);
760760
}
761761
}
762762
}

common/speculative.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ enum common_speculative_type common_speculative_type_from_name(const std::string
2222
std::string common_speculative_type_to_str(enum common_speculative_type type);
2323

2424
struct common_speculative * common_speculative_init(
25-
struct common_params & params,
25+
struct common_params_speculative & params,
2626
struct llama_context * ctx_tgt,
2727
struct llama_context * ctx_dft
2828
);
@@ -45,7 +45,7 @@ llama_tokens common_speculative_gen_draft(
4545
llama_token id_last);
4646

4747
// informs the speculative decoder that n_accepted tokens were accepted by the target model
48-
void common_speculative_send_accepted(
48+
void common_speculative_accept(
4949
struct common_speculative * spec,
5050
const uint16_t n_accepted);
5151

examples/lookup/lookup-create.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ int main(int argc, char ** argv){
3232

3333
common_ngram_cache ngram_cache;
3434
common_ngram_cache_update(ngram_cache, LLAMA_NGRAM_STATIC, LLAMA_NGRAM_STATIC, inp, inp.size(), true);
35-
fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.lookup_cache_static.c_str());
35+
fprintf(stderr, "%s: hashing done, writing file to %s\n", __func__, params.speculative.lookup_cache_static.c_str());
3636

37-
common_ngram_cache_save(ngram_cache, params.lookup_cache_static);
37+
common_ngram_cache_save(ngram_cache, params.speculative.lookup_cache_static);
3838

3939
return 0;
4040
}

examples/lookup/lookup-stats.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,18 @@ int main(int argc, char ** argv){
4646
{
4747
const int64_t t_start_draft_us = ggml_time_us();
4848

49-
if (!params.lookup_cache_static.empty()) {
49+
if (!params.speculative.lookup_cache_static.empty()) {
5050
try {
51-
ngram_cache_static = common_ngram_cache_load(params.lookup_cache_static);
51+
ngram_cache_static = common_ngram_cache_load(params.speculative.lookup_cache_static);
5252
} catch (std::ifstream::failure const &) {
53-
LOG_ERR("failed to open static lookup cache: %s", params.lookup_cache_static.c_str());
53+
LOG_ERR("failed to open static lookup cache: %s", params.speculative.lookup_cache_static.c_str());
5454
exit(1);
5555
}
5656
}
5757

58-
if (!params.lookup_cache_dynamic.empty()) {
58+
if (!params.speculative.lookup_cache_dynamic.empty()) {
5959
try {
60-
ngram_cache_dynamic = common_ngram_cache_load(params.lookup_cache_dynamic);
60+
ngram_cache_dynamic = common_ngram_cache_load(params.speculative.lookup_cache_dynamic);
6161
} catch (std::ifstream::failure const &) {} // if the file does not exist it will simply be created at the end of the program
6262
}
6363

examples/lookup/lookup.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,18 @@ int main(int argc, char ** argv){
5151
const int64_t t_start_draft_us = ggml_time_us();
5252
common_ngram_cache_update(ngram_cache_context, LLAMA_NGRAM_MIN, LLAMA_NGRAM_MAX, inp, inp.size(), false);
5353

54-
if (!params.lookup_cache_static.empty()) {
54+
if (!params.speculative.lookup_cache_static.empty()) {
5555
try {
56-
ngram_cache_static = common_ngram_cache_load(params.lookup_cache_static);
56+
ngram_cache_static = common_ngram_cache_load(params.speculative.lookup_cache_static);
5757
} catch (std::ifstream::failure const &) {
58-
LOG_ERR("failed to open static lookup cache: %s", params.lookup_cache_static.c_str());
58+
LOG_ERR("failed to open static lookup cache: %s", params.speculative.lookup_cache_static.c_str());
5959
exit(1);
6060
}
6161
}
6262

63-
if (!params.lookup_cache_dynamic.empty()) {
63+
if (!params.speculative.lookup_cache_dynamic.empty()) {
6464
try {
65-
ngram_cache_dynamic = common_ngram_cache_load(params.lookup_cache_dynamic);
65+
ngram_cache_dynamic = common_ngram_cache_load(params.speculative.lookup_cache_dynamic);
6666
} catch (std::ifstream::failure const &) {} // if the file does not exist it will simply be created at the end of the program
6767
}
6868

@@ -210,7 +210,7 @@ int main(int argc, char ** argv){
210210

211211
// Update dynamic ngram cache with context ngram cache and save it to disk:
212212
common_ngram_cache_merge(ngram_cache_dynamic, ngram_cache_context);
213-
common_ngram_cache_save(ngram_cache_dynamic, params.lookup_cache_dynamic);
213+
common_ngram_cache_save(ngram_cache_dynamic, params.speculative.lookup_cache_dynamic);
214214

215215
LOG("\n\n");
216216

examples/speculative-simple/speculative-simple.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ int main(int argc, char ** argv) {
132132
params_spec.n_reuse = llama_n_ctx(ctx_dft) - n_draft;
133133
params_spec.p_min = p_min;
134134

135-
struct common_speculative * spec = common_speculative_init(params, ctx_tgt, ctx_dft);
135+
struct common_speculative * spec = common_speculative_init(params.speculative, ctx_tgt, ctx_dft);
136136
for (auto &pair : params.speculative.replacements) {
137137
common_speculative_add_replacement_tgt_dft(spec, pair.first.c_str(), pair.second.c_str());
138138
}

0 commit comments

Comments
 (0)