server: support load model on startup, support preset-only options (#18206)
* server: support autoload model, support preset-only options * add docs * load-on-startup * fix * Update common/arg.cpp Co-authored-by: Pascal <admin@serveurperso.com> --------- Co-authored-by: Pascal <admin@serveurperso.com>
This commit is contained in:
@@ -96,6 +96,11 @@ common_arg & common_arg::set_sparam() {
|
||||
return *this;
|
||||
}
|
||||
|
||||
common_arg & common_arg::set_preset_only() {
|
||||
is_preset_only = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool common_arg::in_example(enum llama_example ex) {
|
||||
return examples.find(ex) != examples.end();
|
||||
}
|
||||
@@ -3494,3 +3499,24 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
|
||||
return ctx_arg;
|
||||
}
|
||||
|
||||
void common_params_add_preset_options(std::vector<common_arg> & args) {
|
||||
// arguments below won't be treated as CLI args, only preset options
|
||||
args.push_back(common_arg(
|
||||
{"load-on-startup"}, "NAME",
|
||||
"in server router mode, autoload this model on startup",
|
||||
[](common_params &, const std::string &) { /* unused */ }
|
||||
).set_env(COMMON_ARG_PRESET_LOAD_ON_STARTUP).set_preset_only());
|
||||
|
||||
// args.push_back(common_arg(
|
||||
// {"pin"},
|
||||
// "in server router mode, do not unload this model if models_max is exceeded",
|
||||
// [](common_params &) { /* unused */ }
|
||||
// ).set_preset_only());
|
||||
|
||||
// args.push_back(common_arg(
|
||||
// {"unload-idle-seconds"}, "SECONDS",
|
||||
// "in server router mode, unload models idle for more than this many seconds",
|
||||
// [](common_params &, int) { /* unused */ }
|
||||
// ).set_preset_only());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user