server: (router) add stop-timeout option (#18350)

* server: (router) add stop-timeout option

* also allow stop while loading

* add docs

* unload_lru: also wait for unload to complete
This commit is contained in:
Xuan-Son Nguyen
2025-12-24 23:47:49 +01:00
committed by GitHub
parent 4cbafad4f0
commit f5acfb2ffa
5 changed files with 133 additions and 64 deletions
+7 -1
View File
@@ -9,6 +9,7 @@
#include <condition_variable>
#include <functional>
#include <memory>
#include <set>
/**
* state diagram:
@@ -56,6 +57,7 @@ struct server_model_meta {
int64_t last_used = 0; // for LRU unloading
std::vector<std::string> args; // args passed to the model instance, will be populated by render_args()
int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED)
int stop_timeout = 0; // seconds to wait before force-killing the model instance during shutdown
bool is_active() const {
return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING;
@@ -83,6 +85,10 @@ private:
std::condition_variable cv;
std::map<std::string, instance_t> mapping;
// for stopping models
std::condition_variable cv_stop;
std::set<std::string> stopping_models;
common_preset_context ctx_preset;
common_params base_params;
@@ -119,7 +125,7 @@ public:
void unload_all();
// update the status of a model instance (thread-safe)
void update_status(const std::string & name, server_model_status status);
void update_status(const std::string & name, server_model_status status, int exit_code);
// wait until the model instance is fully loaded (thread-safe)
// return when the model is loaded or failed to load