Skip to content

Commit 2e1c956

Browse files
author
Petros Sideris
committed
spec: save the dynamic/static ngram cache file
* fix todo on providing n_draft, save_static and save_dynamic from common/common.h * implement the functionality by saving the cache at the common_speculative_state_ngram_cache instantiation
1 parent 45cac7c commit 2e1c956

2 files changed

Lines changed: 20 additions & 16 deletions

File tree

common/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ struct common_params_speculative {
309309

310310
// ngram-based speculative decoding
311311

312+
uint16_t ngram_n_draft = 8; // ngram n tokens to draft
312313
uint16_t ngram_size_n = 12; // ngram size for lookup
313314
uint16_t ngram_size_m = 48; // mgram size for speculative tokens
314315
uint16_t ngram_min_hits = 1; // minimum hits at ngram/mgram lookup for mgram to be proposed
@@ -318,6 +319,9 @@ struct common_params_speculative {
318319
std::string lookup_cache_static; // path of static ngram cache file for lookup decoding // NOLINT
319320
std::string lookup_cache_dynamic; // path of dynamic ngram cache file for lookup decoding // NOLINT
320321

322+
bool save_lookup_cache_static = false; // whether or not we should save the static ngram cache file // NOLINT
323+
bool save_lookup_cache_dynamic = false; // whether or not we should save the dynamic ngram cache file // NOLINT
324+
321325
// draft-model speculative decoding
322326

323327
struct common_params_model mparams_dft;

common/speculative.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -646,8 +646,6 @@ struct common_speculative_state_ngram_mod : public common_speculative_state {
646646

647647
struct common_speculative_state_ngram_cache : public common_speculative_state {
648648
uint16_t n_draft;
649-
bool save_dynamic;
650-
bool save_static;
651649

652650
common_ngram_cache ngram_cache_context;
653651
common_ngram_cache ngram_cache_dynamic;
@@ -657,19 +655,20 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
657655

658656
common_speculative_state_ngram_cache(
659657
const enum common_speculative_type type,
658+
uint16_t n_draft,
660659
const std::string & path_static,
661660
const std::string & path_dynamic,
662-
uint16_t n_draft,
663661
bool save_dynamic,
664662
bool save_static)
665663
: common_speculative_state(type)
666664
, n_draft(n_draft)
667-
, save_dynamic(save_dynamic)
668-
, save_static(save_static)
669665
{
670666
if (!path_static.empty()) {
671667
try {
672668
ngram_cache_static = common_ngram_cache_load(path_static);
669+
if (save_static) {
670+
common_ngram_cache_save(ngram_cache_static, path_static);
671+
}
673672
} catch (...) {
674673
LOG_ERR("failed to open static lookup cache: %s", path_static.c_str());
675674
GGML_ABORT("Couldn't read static lookup cache");
@@ -679,6 +678,9 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
679678
if (!path_dynamic.empty()) {
680679
try {
681680
ngram_cache_dynamic = common_ngram_cache_load(path_dynamic);
681+
if (save_dynamic) {
682+
common_ngram_cache_save(ngram_cache_dynamic, path_dynamic);
683+
}
682684
} catch (...) {
683685
LOG_ERR("failed to open dynamic lookup cache: %s", path_dynamic.c_str());
684686
GGML_ABORT("Couldn't read dynamic lookup cache");
@@ -751,16 +753,15 @@ static common_ngram_map get_common_ngram_map(const common_speculative_config & c
751753
return common_ngram_map(size_key, size_value, key_only, min_hits);
752754
}
753755

754-
static common_speculative_state_ngram_cache create_state_ngram_cache(
755-
const std::string & path_static, const std::string & path_dynamic,
756-
const common_speculative_config & config) {
757-
uint16_t n_draft = 8; // TODO get from config?
758-
759-
// TODO bool param in common/common.h to set save_static/save_dynamic?
760-
bool save_static = false;
761-
bool save_dynamic = false;
756+
static common_speculative_state_ngram_cache create_state_ngram_cache(const common_speculative_config & config) {
762757

763-
common_speculative_state_ngram_cache state(config.type, path_static, path_dynamic, n_draft, save_static, save_dynamic);
758+
common_speculative_state_ngram_cache state(
759+
config.type,
760+
config.params.ngram_n_draft,
761+
config.params.lookup_cache_static,
762+
config.params.lookup_cache_dynamic,
763+
config.params.save_lookup_cache_static,
764+
config.params.save_lookup_cache_dynamic);
764765

765766
return state;
766767
}
@@ -950,8 +951,7 @@ common_speculative * common_speculative_init(
950951
break;
951952
}
952953
case COMMON_SPECULATIVE_TYPE_NGRAM_CACHE: {
953-
auto state = create_state_ngram_cache(
954-
params.lookup_cache_static, params.lookup_cache_dynamic, config);
954+
auto state = create_state_ngram_cache(config);
955955
impls.push_back(std::make_unique<common_speculative_state_ngram_cache>(state));
956956
break;
957957
}

0 commit comments

Comments
 (0)