Skip to content

Commit b867b67

Browse files
committed
added mechanics for a full clear if fast forward is not used, this should help recover from bad states
1 parent 3550265 commit b867b67

File tree

3 files changed

+19
-5
lines changed

3 files changed

+19
-5
lines changed

gpttype_adapter.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3782,7 +3782,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
37823782
{
37833783
if(kcpp_data->use_fastforward)
37843784
{
3785-
ContextFastForward(current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, false, true);
3785+
ContextFastForward(current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, false, true, 0);
37863786
}
37873787
}
37883788
if(is_recurrent)
@@ -3830,12 +3830,19 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38303830
}
38313831
if(kcpp_data->use_fastforward)
38323832
{
3833-
ContextFastForward(current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, triggersc, false);
3833+
ContextFastForward(current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, triggersc, false, 4);
38343834
}
38353835
}
38363836
if(file_format == FileFormat::GGUF_GENERIC)
38373837
{
3838-
llama_memory_seq_rm(llama_get_memory(llama_ctx_v4), 0, n_past, -1);
3838+
if(n_past==0) //force full clear
3839+
{
3840+
llama_memory_clear(llama_get_memory(llama_ctx_v4),true);
3841+
}
3842+
else
3843+
{
3844+
llama_memory_seq_rm(llama_get_memory(llama_ctx_v4), 0, n_past, -1);
3845+
}
38393846
if(draft_ctx)
38403847
{
38413848
llama_memory_seq_rm(llama_get_memory(draft_ctx), 0, n_past, -1);

model_adapter.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ std::string gguf_get_model_arch(const std::string & gguf_filename)
513513

514514
void ContextFastForward(std::vector<int> &current_context_tokens, std::vector<int> &embd_inp,
515515
int &n_past, std::vector<int> &last_n_tokens, const int nctx, std::vector<int> &smartcontext,
516-
bool useSmartContext, const bool requireFullSubset)
516+
bool useSmartContext, const bool requireFullSubset, const int minimum_to_proceed)
517517
{
518518
const int SCCtxLenThreshold = nctx * 0.8; //how much context length must be reach to trigger smartcontext
519519
const int SCInpLenThreshold = nctx * 0.6; //how big must the input array be to trigger smartcontext
@@ -568,6 +568,13 @@ std::string gguf_get_model_arch(const std::string & gguf_filename)
568568
}
569569
}
570570

571+
if(n_past < minimum_to_proceed) //too few tokens to fast forward, so lets start fresh
572+
{
573+
last_n_tokens.erase(last_n_tokens.end() - n_past, last_n_tokens.end());
574+
n_past = 0;
575+
fastforwardok = false;
576+
}
577+
571578
if(fastforwardok)
572579
{
573580
last_n_tokens.erase(last_n_tokens.begin(), last_n_tokens.begin() + n_past);

model_adapter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ int ArrFindIndexOf(const std::vector<int> targetArray, const std::vector<int> se
129129
FileFormat check_file_format(const std::string & fname, FileFormatExtraMeta * fileformatmeta);
130130
void ContextFastForward(std::vector<int> &current_context_tokens, std::vector<int> &embd_inp,
131131
int &n_past, std::vector<int> &last_n_tokens, const int nctx, std::vector<int> &smartcontext,
132-
const bool useSmartContext, const bool requireFullSubset);
132+
const bool useSmartContext, const bool requireFullSubset, const int minimum_to_proceed);
133133
bool gguf_tensor_exists(const std::string & filename, std::string tensor_name, bool exactmatch);
134134
std::string gguf_get_model_arch(const std::string & filename);
135135

0 commit comments

Comments
 (0)