Skip to content

Commit 516e8d7

Browse files
authored
server: use pos_next instead of n_tokens for m-rope (#22439)
1 parent 434b2a1 commit 516e8d7

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

tools/server/server-context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3031,7 +3031,7 @@ struct server_context_impl {
30313031
slot.sampled = ids.back(); // last accepted token
30323032
SLT_DBG(slot, "add accepted tokens: sampled=%d, ids.size=%zu, n_draft=%zu\n", slot.sampled, ids.size(), n_draft);
30333033

3034-
llama_memory_seq_rm(llama_get_memory(slot.ctx), slot.id, slot.prompt.n_tokens(), -1);
3034+
llama_memory_seq_rm(llama_get_memory(slot.ctx), slot.id, slot.prompt.tokens.pos_next(), -1);
30353035

30363036
for (size_t i = 0; i < ids.size(); ++i) {
30373037
completion_token_output result;

0 commit comments

Comments
 (0)