server: prevent data race from HTTP threads (#18263)

* server: prevent data race from HTTP threads

* fix params

* fix default_generation_settings

* nits: make handle_completions_impl looks less strange

* stricter const

* fix GGML_ASSERT(idx < states.size())

* move index to be managed by server_response_reader

* http: make sure req & res lifecycle are tied together

* fix compile

* fix index handling buggy

* fix data race for lora endpoint

* nits: fix shadow variable

* nits: revert redundant changes

* nits: correct naming for json_webui_settings
This commit is contained in:
Xuan-Son Nguyen
2025-12-22 14:23:34 +01:00
committed by GitHub
parent 3997c78e33
commit 6ce863c803
11 changed files with 459 additions and 366 deletions
+5 -2
View File
@@ -5,6 +5,7 @@
#include <condition_variable>
#include <deque>
#include <mutex>
#include <vector>
#include <unordered_set>
// struct for managing server tasks
@@ -173,8 +174,10 @@ struct server_response_reader {
int get_new_id() {
return queue_tasks.get_new_id();
}
void post_task(server_task && task);
void post_tasks(std::vector<server_task> && tasks);
// if front = true, the task will be posted to the front of the queue (high priority)
void post_task(server_task && task, bool front = false);
void post_tasks(std::vector<server_task> && tasks, bool front = false);
bool has_next() const;
// return nullptr if should_stop() is true before receiving a result