server: (webui) add --webui-config (#18028)

* server/webui: add server-side WebUI config support

Add CLI arguments --webui-config (inline JSON) and --webui-config-file
(file path) to configure WebUI default settings from server side.

Backend changes:
- Parse JSON once in server_context::load_model() for performance
- Cache parsed config in webui_settings member (zero overhead on /props)
- Add proper error handling in router mode with try/catch
- Expose webui_settings in /props endpoint for both router and child modes

Frontend changes:
- Add 14 configurable WebUI settings via parameter sync
- Add tests for webui settings extraction
- Fix subpath support with base path in API calls

Addresses feedback from @ngxson and @ggerganov

* server: address review feedback from ngxson

* server: regenerate README with llama-gen-docs
This commit is contained in:
Pascal
2025-12-17 21:45:45 +01:00
committed by GitHub
parent e85e9d7637
commit 6ce3d85796
15 changed files with 163 additions and 27 deletions
+13 -1
View File
@@ -544,6 +544,8 @@ struct server_context_impl {
server_metrics metrics;
json webui_settings = json::object();
// Necessary similarity of prompt for slot selection
float slot_prompt_similarity = 0.0f;
@@ -575,6 +577,16 @@ struct server_context_impl {
params_base = params;
webui_settings = json::object();
if (!params_base.webui_config_json.empty()) {
try {
webui_settings = json::parse(params_base.webui_config_json);
} catch (const std::exception & e) {
SRV_ERR("%s: failed to parse webui config: %s\n", __func__, e.what());
return false;
}
}
llama_init = common_init_from_params(params_base);
model = llama_init->model();
@@ -3103,7 +3115,6 @@ void server_routes::init_routes() {
};
}
// this endpoint is publicly available, please only return what is safe to be exposed
json data = {
{ "default_generation_settings", default_generation_settings_for_props },
{ "total_slots", ctx_server.params_base.n_parallel },
@@ -3117,6 +3128,7 @@ void server_routes::init_routes() {
{ "endpoint_props", params.endpoint_props },
{ "endpoint_metrics", params.endpoint_metrics },
{ "webui", params.webui },
{ "webui_settings", ctx_server.webui_settings },
{ "chat_template", common_chat_templates_source(ctx_server.chat_templates.get()) },
{ "bos_token", common_token_to_piece(ctx_server.ctx, llama_vocab_bos(ctx_server.vocab), /* special= */ true)},
{ "eos_token", common_token_to_piece(ctx_server.ctx, llama_vocab_eos(ctx_server.vocab), /* special= */ true)},