llama-server: fix model params not propagated (#21509)

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
Aaron Teo
2026-04-07 21:39:41 +08:00
committed by GitHub
parent 0d049d6a92
commit 69c28f1547
2 changed files with 6 additions and 3 deletions
+5 -2
View File
@@ -632,7 +632,7 @@ private:
// load the model and initialize llama_context // load the model and initialize llama_context
// this may also be called to resume from sleeping state // this may also be called to resume from sleeping state
bool load_model(const common_params & params) { bool load_model(common_params & params) {
bool is_resume = sleeping; bool is_resume = sleeping;
SRV_INF("loading model '%s'\n", params.model.path.c_str()); SRV_INF("loading model '%s'\n", params.model.path.c_str());
@@ -641,6 +641,9 @@ private:
llama_init = common_init_from_params(params_base); llama_init = common_init_from_params(params_base);
// propagate model-metadata sampling defaults back to caller
params.sampling = params_base.sampling;
model = llama_init->model(); model = llama_init->model();
ctx = llama_init->context(); ctx = llama_init->context();
@@ -2978,7 +2981,7 @@ private:
server_context::server_context() : impl(new server_context_impl()) {} server_context::server_context() : impl(new server_context_impl()) {}
server_context::~server_context() = default; server_context::~server_context() = default;
bool server_context::load_model(const common_params & params) { bool server_context::load_model(common_params & params) {
return impl->load_model(params); return impl->load_model(params);
} }
+1 -1
View File
@@ -56,7 +56,7 @@ struct server_context {
// load the model and initialize llama_context // load the model and initialize llama_context
// returns true on success // returns true on success
bool load_model(const common_params & params); bool load_model(common_params & params);
// this function will block main thread until termination // this function will block main thread until termination
void start_loop(); void start_loop();