Skip to content

Commit cf7a308

Browse files
author
Petros Sideris
committed
spec: save the dynamic/static ngram cache file
* fix todo on providing n_draft, save_static and save_dynamic from common/common.h * implement the functionality by saving the cache at the common_speculative_state_ngram_cache instantiation
1 parent de71b5f commit cf7a308

2 files changed

Lines changed: 20 additions & 16 deletions

File tree

common/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ struct common_params_speculative {
308308

309309
// ngram-based speculative decoding
310310

311+
uint16_t ngram_n_draft = 8; // ngram n tokens to draft
311312
uint16_t ngram_size_n = 12; // ngram size for lookup
312313
uint16_t ngram_size_m = 48; // mgram size for speculative tokens
313314
uint16_t ngram_min_hits = 1; // minimum hits at ngram/mgram lookup for mgram to be proposed
@@ -317,6 +318,9 @@ struct common_params_speculative {
317318
std::string lookup_cache_static; // path of static ngram cache file for lookup decoding // NOLINT
318319
std::string lookup_cache_dynamic; // path of dynamic ngram cache file for lookup decoding // NOLINT
319320

321+
bool save_lookup_cache_static = false; // whether or not we should save the static ngram cache file // NOLINT
322+
bool save_lookup_cache_dynamic = false; // whether or not we should save the dynamic ngram cache file // NOLINT
323+
320324
// draft-model speculative decoding
321325

322326
struct common_params_model mparams_dft;

common/speculative.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -759,8 +759,6 @@ struct common_speculative_state_ngram_mod : public common_speculative_state {
759759

760760
struct common_speculative_state_ngram_cache : public common_speculative_state {
761761
uint16_t n_draft;
762-
bool save_dynamic;
763-
bool save_static;
764762

765763
common_ngram_cache ngram_cache_context;
766764
common_ngram_cache ngram_cache_dynamic;
@@ -770,19 +768,20 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
770768

771769
common_speculative_state_ngram_cache(
772770
const enum common_speculative_type type,
771+
uint16_t n_draft,
773772
const std::string & path_static,
774773
const std::string & path_dynamic,
775-
uint16_t n_draft,
776774
bool save_dynamic,
777775
bool save_static)
778776
: common_speculative_state(type)
779777
, n_draft(n_draft)
780-
, save_dynamic(save_dynamic)
781-
, save_static(save_static)
782778
{
783779
if (!path_static.empty()) {
784780
try {
785781
ngram_cache_static = common_ngram_cache_load(path_static);
782+
if (save_static) {
783+
common_ngram_cache_save(ngram_cache_static, path_static);
784+
}
786785
} catch (...) {
787786
LOG_ERR("failed to open static lookup cache: %s", path_static.c_str());
788787
GGML_ABORT("Couldn't read static lookup cache");
@@ -792,6 +791,9 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
792791
if (!path_dynamic.empty()) {
793792
try {
794793
ngram_cache_dynamic = common_ngram_cache_load(path_dynamic);
794+
if (save_dynamic) {
795+
common_ngram_cache_save(ngram_cache_dynamic, path_dynamic);
796+
}
795797
} catch (...) {
796798
LOG_ERR("failed to open dynamic lookup cache: %s", path_dynamic.c_str());
797799
GGML_ABORT("Couldn't read dynamic lookup cache");
@@ -865,16 +867,15 @@ static common_ngram_map get_common_ngram_map(const common_speculative_config & c
865867
return common_ngram_map(size_key, size_value, key_only, min_hits);
866868
}
867869

868-
static common_speculative_state_ngram_cache create_state_ngram_cache(
869-
const std::string & path_static, const std::string & path_dynamic,
870-
const common_speculative_config & config) {
871-
uint16_t n_draft = 8; // TODO get from config?
872-
873-
// TODO bool param in common/common.h to set save_static/save_dynamic?
874-
bool save_static = false;
875-
bool save_dynamic = false;
870+
static common_speculative_state_ngram_cache create_state_ngram_cache(const common_speculative_config & config) {
876871

877-
common_speculative_state_ngram_cache state(config.type, path_static, path_dynamic, n_draft, save_static, save_dynamic);
872+
common_speculative_state_ngram_cache state(
873+
config.type,
874+
config.params.ngram_n_draft,
875+
config.params.lookup_cache_static,
876+
config.params.lookup_cache_dynamic,
877+
config.params.save_lookup_cache_static,
878+
config.params.save_lookup_cache_dynamic);
878879

879880
return state;
880881
}
@@ -1031,8 +1032,7 @@ common_speculative * common_speculative_init(
10311032
break;
10321033
}
10331034
case COMMON_SPECULATIVE_TYPE_NGRAM_CACHE: {
1034-
auto state = create_state_ngram_cache(
1035-
params.lookup_cache_static, params.lookup_cache_dynamic, config);
1035+
auto state = create_state_ngram_cache(config);
10361036
impls.push_back(std::make_unique<common_speculative_state_ngram_cache>(state));
10371037
break;
10381038
}

0 commit comments

Comments
 (0)