server: move msg diffs tracking to HTTP thread (#17740)

* server: move msg diffs tracking to HTTP thread

* wip

* tool call tests ok

* minor : style

* cont : fix

* move states to server_response_reader

* add safe-guard

* fix

* fix 2

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Xuan-Son Nguyen
2025-12-04 15:46:08 +01:00
committed by GitHub
parent 817d743cc1
commit c4c10bfb86
5 changed files with 167 additions and 94 deletions
+9
View File
@@ -7,6 +7,8 @@
#include <mutex>
#include <unordered_set>
// struct for managing server tasks
// in most cases, use server_response_reader to post new tasks and retrieve results
struct server_queue {
private:
int id = 0;
@@ -67,6 +69,8 @@ private:
void cleanup_pending_task(int id_target);
};
// struct for managing server responses
// in most cases, use server_response_reader to retrieve results
struct server_response {
private:
bool running = true;
@@ -120,6 +124,10 @@ struct server_response_reader {
bool cancelled = false;
int polling_interval_seconds;
// tracking generation state and partial tool calls
// only used by streaming completions
std::vector<task_result_state> states;
// should_stop function will be called each polling_interval_seconds
server_response_reader(std::pair<server_queue &, server_response &> server_queues, int polling_interval_seconds)
: queue_tasks(server_queues.first), queue_results(server_queues.second), polling_interval_seconds(polling_interval_seconds) {}
@@ -127,6 +135,7 @@ struct server_response_reader {
stop();
}
void set_states(std::vector<task_result_state> && states);
void post_tasks(std::vector<server_task> && tasks);
bool has_next() const;