Skip to content

Commit 4d256ae

Browse files
author
Petros Sideris
committed
spec : save the dynamic/static ngram cache file
* fix todo on providing n_draft, save_static and save_dynamic from common/common.h * implement the functionality by saving the cache at the common_speculative_state_ngram_cache destruction
1 parent 80afa33 commit 4d256ae

2 files changed

Lines changed: 28 additions & 12 deletions

File tree

common/common.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,12 @@ struct common_params_speculative_ngram_map {
342342
uint16_t min_hits = 1; // minimum hits at ngram/mgram lookup for mgram to be proposed
343343
};
344344

345-
struct common_params_speculative_ngram_cache {
345+
struct common_params_speculative_ngram_cache : common_params_speculative_ngram_map {
346346
std::string lookup_cache_static; // path of static ngram cache file for lookup decoding
347347
std::string lookup_cache_dynamic; // path of dynamic ngram cache file for lookup decoding
348+
349+
bool save_lookup_cache_static = false; // whether or not we should save the static ngram cache file // NOLINT
350+
bool save_lookup_cache_dynamic = false; // whether or not we should save the dynamic ngram cache file // NOLINT
348351
};
349352

350353
struct common_params_speculative {

common/speculative.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,9 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
809809
bool save_dynamic;
810810
bool save_static;
811811

812+
const std::string path_static;
813+
const std::string path_dynamic;
814+
812815
common_ngram_cache ngram_cache_context;
813816
common_ngram_cache ngram_cache_dynamic;
814817
common_ngram_cache ngram_cache_static;
@@ -817,15 +820,17 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
817820

818821
common_speculative_state_ngram_cache(
819822
const enum common_speculative_type type,
823+
uint16_t n_draft,
820824
const std::string & path_static,
821825
const std::string & path_dynamic,
822-
uint16_t n_draft,
823826
bool save_dynamic,
824827
bool save_static)
825828
: common_speculative_state(type)
826829
, n_draft(n_draft)
827830
, save_dynamic(save_dynamic)
828831
, save_static(save_static)
832+
, path_static(path_static)
833+
, path_dynamic(path_dynamic)
829834
{
830835
if (!path_static.empty()) {
831836
try {
@@ -846,6 +851,15 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
846851
}
847852
}
848853

854+
~common_speculative_state_ngram_cache() override {
855+
if (save_static) {
856+
common_ngram_cache_save(ngram_cache_static, path_static);
857+
}
858+
if (save_dynamic) {
859+
common_ngram_cache_save(ngram_cache_dynamic, path_dynamic);
860+
}
861+
}
862+
849863
void begin(const llama_tokens & prompt) override {
850864
GGML_UNUSED(prompt);
851865
}
@@ -922,16 +936,15 @@ static common_ngram_map get_common_ngram_map(
922936
return common_ngram_map(size_key, size_value, key_only, min_hits);
923937
}
924938

925-
static common_speculative_state_ngram_cache create_state_ngram_cache(
926-
const std::string & path_static, const std::string & path_dynamic,
927-
const common_speculative_config & config) {
928-
uint16_t n_draft = 8; // TODO get from config?
929-
930-
// TODO bool param in common/common.h to set save_static/save_dynamic?
931-
bool save_static = false;
932-
bool save_dynamic = false;
939+
static common_speculative_state_ngram_cache create_state_ngram_cache(const common_speculative_config & config) {
933940

934-
common_speculative_state_ngram_cache state(config.type, path_static, path_dynamic, n_draft, save_static, save_dynamic);
941+
common_speculative_state_ngram_cache state(
942+
config.type,
943+
config.params.ngram_cache.size_n,
944+
config.params.ngram_cache.lookup_cache_static,
945+
config.params.ngram_cache.lookup_cache_dynamic,
946+
config.params.ngram_cache.save_lookup_cache_static,
947+
config.params.ngram_cache.save_lookup_cache_dynamic);
935948

936949
return state;
937950
}
@@ -1089,7 +1102,7 @@ common_speculative * common_speculative_init(
10891102
break;
10901103
}
10911104
case COMMON_SPECULATIVE_TYPE_NGRAM_CACHE: {
1092-
auto state = create_state_ngram_cache(params.ngram_cache.lookup_cache_static, params.ngram_cache.lookup_cache_dynamic, config);
1105+
auto state = create_state_ngram_cache(config);
10931106
impls.push_back(std::make_unique<common_speculative_state_ngram_cache>(state));
10941107
break;
10951108
}

0 commit comments

Comments
 (0)