@@ -3782,7 +3782,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
37823782 {
37833783 if (kcpp_data->use_fastforward )
37843784 {
3785- ContextFastForward (current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, false , true );
3785+ ContextFastForward (current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, false , true , 0 );
37863786 }
37873787 }
37883788 if (is_recurrent)
@@ -3830,12 +3830,19 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
38303830 }
38313831 if (kcpp_data->use_fastforward )
38323832 {
3833- ContextFastForward (current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, triggersc, false );
3833+ ContextFastForward (current_context_tokens, embd_inp, n_past, last_n_tokens, nctx, smartcontext, triggersc, false , 4 );
38343834 }
38353835 }
38363836 if (file_format == FileFormat::GGUF_GENERIC)
38373837 {
3838- llama_memory_seq_rm (llama_get_memory (llama_ctx_v4), 0 , n_past, -1 );
3838+ if (n_past==0 ) // force full clear
3839+ {
3840+ llama_memory_clear (llama_get_memory (llama_ctx_v4),true );
3841+ }
3842+ else
3843+ {
3844+ llama_memory_seq_rm (llama_get_memory (llama_ctx_v4), 0 , n_past, -1 );
3845+ }
38393846 if (draft_ctx)
38403847 {
38413848 llama_memory_seq_rm (llama_get_memory (draft_ctx), 0 , n_past, -1 );
0 commit comments