Skip to content

Commit 5c5bea4

Browse files
author
Petros Sideris
committed
spec : save the dynamic/static ngram cache file
* fix todo on providing n_draft, save_static and save_dynamic from common/common.h * implement the functionality by saving the cache at the common_speculative_state_ngram_cache destruction
1 parent 1cbc846 commit 5c5bea4

2 files changed

Lines changed: 28 additions & 12 deletions

File tree

common/common.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,9 +342,12 @@ struct common_params_speculative_ngram_map {
342342
uint16_t min_hits = 1; // minimum hits at ngram/mgram lookup for mgram to be proposed
343343
};
344344

345-
struct common_params_speculative_ngram_cache {
345+
struct common_params_speculative_ngram_cache : common_params_speculative_ngram_map {
346346
std::string lookup_cache_static; // path of static ngram cache file for lookup decoding
347347
std::string lookup_cache_dynamic; // path of dynamic ngram cache file for lookup decoding
348+
349+
bool save_lookup_cache_static = false; // whether or not we should save the static ngram cache file // NOLINT
350+
bool save_lookup_cache_dynamic = false; // whether or not we should save the dynamic ngram cache file // NOLINT
348351
};
349352

350353
struct common_params_speculative {

common/speculative.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,9 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
820820
bool save_dynamic;
821821
bool save_static;
822822

823+
const std::string path_static;
824+
const std::string path_dynamic;
825+
823826
common_ngram_cache ngram_cache_context;
824827
common_ngram_cache ngram_cache_dynamic;
825828
common_ngram_cache ngram_cache_static;
@@ -828,15 +831,17 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
828831

829832
common_speculative_state_ngram_cache(
830833
const enum common_speculative_type type,
834+
uint16_t n_draft,
831835
const std::string & path_static,
832836
const std::string & path_dynamic,
833-
uint16_t n_draft,
834837
bool save_dynamic,
835838
bool save_static)
836839
: common_speculative_state(type)
837840
, n_draft(n_draft)
838841
, save_dynamic(save_dynamic)
839842
, save_static(save_static)
843+
, path_static(path_static)
844+
, path_dynamic(path_dynamic)
840845
{
841846
if (!path_static.empty()) {
842847
try {
@@ -857,6 +862,15 @@ struct common_speculative_state_ngram_cache : public common_speculative_state {
857862
}
858863
}
859864

865+
~common_speculative_state_ngram_cache() override {
866+
if (save_static) {
867+
common_ngram_cache_save(ngram_cache_static, path_static);
868+
}
869+
if (save_dynamic) {
870+
common_ngram_cache_save(ngram_cache_dynamic, path_dynamic);
871+
}
872+
}
873+
860874
void begin(const llama_tokens & prompt) override {
861875
GGML_UNUSED(prompt);
862876
}
@@ -933,16 +947,15 @@ static common_ngram_map get_common_ngram_map(
933947
return common_ngram_map(size_key, size_value, key_only, min_hits);
934948
}
935949

936-
static common_speculative_state_ngram_cache create_state_ngram_cache(
937-
const std::string & path_static, const std::string & path_dynamic,
938-
const common_speculative_config & config) {
939-
uint16_t n_draft = 8; // TODO get from config?
940-
941-
// TODO bool param in common/common.h to set save_static/save_dynamic?
942-
bool save_static = false;
943-
bool save_dynamic = false;
950+
static common_speculative_state_ngram_cache create_state_ngram_cache(const common_speculative_config & config) {
944951

945-
common_speculative_state_ngram_cache state(config.type, path_static, path_dynamic, n_draft, save_static, save_dynamic);
952+
common_speculative_state_ngram_cache state(
953+
config.type,
954+
config.params.ngram_cache.size_n,
955+
config.params.ngram_cache.lookup_cache_static,
956+
config.params.ngram_cache.lookup_cache_dynamic,
957+
config.params.ngram_cache.save_lookup_cache_static,
958+
config.params.ngram_cache.save_lookup_cache_dynamic);
946959

947960
return state;
948961
}
@@ -1100,7 +1113,7 @@ common_speculative * common_speculative_init(
11001113
break;
11011114
}
11021115
case COMMON_SPECULATIVE_TYPE_NGRAM_CACHE: {
1103-
auto state = create_state_ngram_cache(params.ngram_cache.lookup_cache_static, params.ngram_cache.lookup_cache_dynamic, config);
1116+
auto state = create_state_ngram_cache(config);
11041117
impls.push_back(std::make_unique<common_speculative_state_ngram_cache>(state));
11051118
break;
11061119
}

0 commit comments

Comments
 (0)