context : reserve new scheduler when graph topology changes (#18547)
* context : reserve new scheduler when graph topology changes * cont : fix * cont : fix reserve * cont : reserve only when changes occur + timing * context : add comments * llama : reserve on sampler changes * common : allow null common_sampler * server : task declares needs (embd, logits, sampling) * server : do not init sampler if not needed * llama : fix need_reserve when unsetting a sampler * server : consolidate slot reset/clear logic
This commit is contained in:
@@ -1172,7 +1172,6 @@ common_init_result::common_init_result(common_params & params) :
|
||||
pimpl->samplers_seq_config[i] = { i, common_sampler_get(pimpl->samplers[i].get()) };
|
||||
}
|
||||
|
||||
// TODO: temporarily gated behind a flag
|
||||
if (params.sampling.backend_sampling) {
|
||||
cparams.samplers = pimpl->samplers_seq_config.data();
|
||||
cparams.n_samplers = pimpl->samplers_seq_config.size();
|
||||
|
||||
+19
-5
@@ -334,15 +334,21 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||
}
|
||||
|
||||
void common_sampler_free(struct common_sampler * gsmpl) {
|
||||
if (gsmpl) {
|
||||
llama_sampler_free(gsmpl->grmr);
|
||||
llama_sampler_free(gsmpl->chain);
|
||||
|
||||
delete gsmpl;
|
||||
if (!gsmpl) {
|
||||
return;
|
||||
}
|
||||
|
||||
llama_sampler_free(gsmpl->grmr);
|
||||
llama_sampler_free(gsmpl->chain);
|
||||
|
||||
delete gsmpl;
|
||||
}
|
||||
|
||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
|
||||
if (!gsmpl) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto tm = gsmpl->tm();
|
||||
|
||||
if (gsmpl->grmr && accept_grammar) {
|
||||
@@ -355,6 +361,10 @@ void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, boo
|
||||
}
|
||||
|
||||
void common_sampler_reset(struct common_sampler * gsmpl) {
|
||||
if (!gsmpl) {
|
||||
return;
|
||||
}
|
||||
|
||||
gsmpl->reset();
|
||||
}
|
||||
|
||||
@@ -415,6 +425,10 @@ void common_perf_print(const struct llama_context * ctx, const struct common_sam
|
||||
}
|
||||
|
||||
struct llama_sampler * common_sampler_get(const struct common_sampler * gsmpl) {
|
||||
if (!gsmpl) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gsmpl->chain;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user