diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 08ff1e362..567ddd4f9 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -3031,7 +3031,7 @@ private: slot.sampled = ids.back(); // last accepted token SLT_DBG(slot, "add accepted tokens: sampled=%d, ids.size=%zu, n_draft=%zu\n", slot.sampled, ids.size(), n_draft); - llama_memory_seq_rm(llama_get_memory(slot.ctx), slot.id, slot.prompt.n_tokens(), -1); + llama_memory_seq_rm(llama_get_memory(slot.ctx), slot.id, slot.prompt.tokens.pos_next(), -1); for (size_t i = 0; i < ids.size(); ++i) { completion_token_output result;