server : improve context checkpoint logic (#19408)

This commit is contained in:
Georgi Gerganov
2026-02-08 09:40:04 +02:00
committed by GitHub
parent 5999b50eb0
commit eb449cdfa4
+2 -1
View File
@@ -2507,7 +2507,8 @@ private:
slot.n_prompt_tokens_processed++;
// process the last few tokens of the prompt separately in order to allow for a checkpoint to be created.
if (do_checkpoint && slot.task->n_tokens() - slot.prompt.n_tokens() == 64) {
const int n_last = std::min(n_batch, 512);
if (do_checkpoint && slot.task->n_tokens() == slot.prompt.n_tokens() + n_last) {
break;
}
}