server : support multiple model aliases via comma-separated --alias (#19926)

* server : support multiple model aliases via comma-separated --alias

* server : update --alias description and regenerate docs

* server : multiple model aliases and tags

- address review feedback from ngxson
- --alias accepts comma-separated values (std::set, no duplicates)
- --tags for informational metadata (not used for routing)
- aliases resolve transparently in router via get_meta/has_model
- /v1/models exposes aliases and tags fields

* regenerate docs

* nits

* server : use first alias as model_name for backward compat

address review feedback from ngxson

* server : add single-model test for aliases and tags
This commit is contained in:
Pascal
2026-02-27 07:05:23 +01:00
committed by GitHub
parent a8b192b6ec
commit 2e7e638523
12 changed files with 173 additions and 32 deletions
+91 -8
View File
@@ -184,6 +184,51 @@ void server_models::add_model(server_model_meta && meta) {
if (mapping.find(meta.name) != mapping.end()) {
throw std::runtime_error(string_format("model '%s' appears multiple times", meta.name.c_str()));
}
// check model name does not conflict with existing aliases
for (const auto & [key, inst] : mapping) {
if (inst.meta.aliases.count(meta.name)) {
throw std::runtime_error(string_format("model name '%s' conflicts with alias of model '%s'",
meta.name.c_str(), key.c_str()));
}
}
// parse aliases from preset's --alias option (comma-separated)
std::string alias_str;
if (meta.preset.get_option("LLAMA_ARG_ALIAS", alias_str) && !alias_str.empty()) {
for (auto & alias : string_split<std::string>(alias_str, ',')) {
alias = string_strip(alias);
if (!alias.empty()) {
meta.aliases.insert(alias);
}
}
}
// parse tags from preset's --tags option (comma-separated)
std::string tags_str;
if (meta.preset.get_option("LLAMA_ARG_TAGS", tags_str) && !tags_str.empty()) {
for (auto & tag : string_split<std::string>(tags_str, ',')) {
tag = string_strip(tag);
if (!tag.empty()) {
meta.tags.insert(tag);
}
}
}
// validate aliases do not conflict with existing names or aliases
for (const auto & alias : meta.aliases) {
if (mapping.find(alias) != mapping.end()) {
throw std::runtime_error(string_format("alias '%s' for model '%s' conflicts with existing model name",
alias.c_str(), meta.name.c_str()));
}
for (const auto & [key, inst] : mapping) {
if (inst.meta.aliases.count(alias)) {
throw std::runtime_error(string_format("alias '%s' for model '%s' conflicts with alias of model '%s'",
alias.c_str(), meta.name.c_str(), key.c_str()));
}
}
}
meta.update_args(ctx_preset, bin_path); // render args
std::string name = meta.name;
mapping[name] = instance_t{
@@ -249,6 +294,8 @@ void server_models::load_models() {
server_model_meta meta{
/* preset */ preset.second,
/* name */ preset.first,
/* aliases */ {},
/* tags */ {},
/* port */ 0,
/* status */ SERVER_MODEL_STATUS_UNLOADED,
/* last_used */ 0,
@@ -265,10 +312,28 @@ void server_models::load_models() {
for (const auto & [name, preset] : custom_presets) {
custom_names.insert(name);
}
auto join_set = [](const std::set<std::string> & s) {
std::string result;
for (const auto & v : s) {
if (!result.empty()) {
result += ", ";
}
result += v;
}
return result;
};
SRV_INF("Available models (%zu) (*: custom preset)\n", mapping.size());
for (const auto & [name, inst] : mapping) {
bool has_custom = custom_names.find(name) != custom_names.end();
SRV_INF(" %c %s\n", has_custom ? '*' : ' ', name.c_str());
std::string info;
if (!inst.meta.aliases.empty()) {
info += " (aliases: " + join_set(inst.meta.aliases) + ")";
}
if (!inst.meta.tags.empty()) {
info += " [tags: " + join_set(inst.meta.tags) + "]";
}
SRV_INF(" %c %s%s\n", has_custom ? '*' : ' ', name.c_str(), info.c_str());
}
}
@@ -320,7 +385,15 @@ void server_models::update_meta(const std::string & name, const server_model_met
bool server_models::has_model(const std::string & name) {
std::lock_guard<std::mutex> lk(mutex);
return mapping.find(name) != mapping.end();
if (mapping.find(name) != mapping.end()) {
return true;
}
for (const auto & [key, inst] : mapping) {
if (inst.meta.aliases.count(name)) {
return true;
}
}
return false;
}
std::optional<server_model_meta> server_models::get_meta(const std::string & name) {
@@ -329,6 +402,11 @@ std::optional<server_model_meta> server_models::get_meta(const std::string & nam
if (it != mapping.end()) {
return it->second.meta;
}
for (const auto & [key, inst] : mapping) {
if (inst.meta.aliases.count(name)) {
return inst.meta;
}
}
return std::nullopt;
}
@@ -766,7 +844,7 @@ static void res_err(std::unique_ptr<server_http_res> & res, const json & error_d
res->data = safe_json_to_str({{ "error", error_data }});
}
static bool router_validate_model(const std::string & name, server_models & models, bool models_autoload, std::unique_ptr<server_http_res> & res) {
static bool router_validate_model(std::string & name, server_models & models, bool models_autoload, std::unique_ptr<server_http_res> & res) {
if (name.empty()) {
res_err(res, format_error_response("model name is missing from the request", ERROR_TYPE_INVALID_REQUEST));
return false;
@@ -776,6 +854,8 @@ static bool router_validate_model(const std::string & name, server_models & mode
res_err(res, format_error_response(string_format("model '%s' not found", name.c_str()), ERROR_TYPE_INVALID_REQUEST));
return false;
}
// resolve alias to canonical model name
name = meta->name;
if (models_autoload) {
models.ensure_model_loaded(name);
} else {
@@ -847,16 +927,16 @@ void server_models_routes::init_routes() {
auto res = std::make_unique<server_http_res>();
json body = json::parse(req.body);
std::string name = json_value(body, "model", std::string());
auto model = models.get_meta(name);
if (!model.has_value()) {
auto meta = models.get_meta(name);
if (!meta.has_value()) {
res_err(res, format_error_response("model is not found", ERROR_TYPE_NOT_FOUND));
return res;
}
if (model->status == SERVER_MODEL_STATUS_LOADED) {
if (meta->status == SERVER_MODEL_STATUS_LOADED) {
res_err(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST));
return res;
}
models.load(name);
models.load(meta->name);
res_ok(res, {{"success", true}});
return res;
};
@@ -877,6 +957,7 @@ void server_models_routes::init_routes() {
preset_copy.unset_option("LLAMA_ARG_HOST");
preset_copy.unset_option("LLAMA_ARG_PORT");
preset_copy.unset_option("LLAMA_ARG_ALIAS");
preset_copy.unset_option("LLAMA_ARG_TAGS");
status["preset"] = preset_copy.to_ini();
}
if (meta.is_failed()) {
@@ -885,6 +966,8 @@ void server_models_routes::init_routes() {
}
models_json.push_back(json {
{"id", meta.name},
{"aliases", meta.aliases},
{"tags", meta.tags},
{"object", "model"}, // for OAI-compat
{"owned_by", "llamacpp"}, // for OAI-compat
{"created", t}, // for OAI-compat
@@ -912,7 +995,7 @@ void server_models_routes::init_routes() {
res_err(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST));
return res;
}
models.unload(name);
models.unload(model->name);
res_ok(res, {{"success", true}});
return res;
};