@@ -2512,7 +2512,7 @@ struct server_context_impl {
25122512 llama_memory_seq_pos_max (llama_get_memory (ctx_tgt), slot.id ));
25132513
25142514 if (use_ckpt_dft) {
2515- slot.spec_ckpt .update_dft (ctx_dft.get (), slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY | LLAMA_STATE_SEQ_FLAGS_ON_DEVICE );
2515+ slot.spec_ckpt .update_dft (ctx_dft.get (), slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY );
25162516 }
25172517
25182518 slot.spec_prompt = slot.prompt .tokens .get_text_tokens ();
@@ -2551,7 +2551,7 @@ struct server_context_impl {
25512551
25522552 if (ctx_dft) {
25532553 if (use_ckpt_dft) {
2554- ckpt.load_dft (ctx_dft.get (), slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY | LLAMA_STATE_SEQ_FLAGS_ON_DEVICE );
2554+ ckpt.load_dft (ctx_dft.get (), slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY );
25552555 }
25562556
25572557 common_context_seq_rm (ctx_dft.get (), slot.id , ckpt.pos_max + 1 , -1 );
@@ -2568,7 +2568,7 @@ struct server_context_impl {
25682568 if (use_ckpt_tgt) {
25692569 // const int64_t t_start = ggml_time_us();
25702570
2571- ckpt.update_tgt (ctx_tgt, slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY | LLAMA_STATE_SEQ_FLAGS_ON_DEVICE );
2571+ ckpt.update_tgt (ctx_tgt, slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY );
25722572
25732573 // const int64_t t_total = ggml_time_us() - t_start;
25742574 // printf("checkpoint total: %f ms\n", t_total / 1000.0);
@@ -2580,7 +2580,7 @@ struct server_context_impl {
25802580 }
25812581
25822582 if (use_ckpt_dft) {
2583- ckpt.update_dft (ctx_dft.get (), slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY | LLAMA_STATE_SEQ_FLAGS_ON_DEVICE );
2583+ ckpt.update_dft (ctx_dft.get (), slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY );
25842584 }
25852585 }
25862586 }
@@ -3447,13 +3447,13 @@ struct server_context_impl {
34473447 SLT_DBG (slot, " restoring speculative checkpoint (pos_min = %d, pos_max = %d, size = %zu)\n " , ckpt.pos_min , ckpt.pos_max , ckpt.size ());
34483448
34493449 {
3450- ckpt.load_tgt (slot.ctx_tgt , slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY | LLAMA_STATE_SEQ_FLAGS_ON_DEVICE );
3450+ ckpt.load_tgt (slot.ctx_tgt , slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY );
34513451
34523452 common_context_seq_rm (slot.ctx_tgt , slot.id , ckpt.pos_max + 1 , -1 );
34533453 }
34543454
34553455 if (slot.ctx_dft ) {
3456- ckpt.load_dft (slot.ctx_dft , slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY | LLAMA_STATE_SEQ_FLAGS_ON_DEVICE );
3456+ ckpt.load_dft (slot.ctx_dft , slot.id , LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY );
34573457
34583458 common_context_seq_rm (slot.ctx_dft , slot.id , ckpt.pos_max + 1 , -1 );
34593459 }
0 commit comments