cli: Remove redundant local sampling variables (#20429) (#22264)

This change implements the third requested change in issue 20429.
Because defaults.sampling contains the reasoning budget token count and
the reasoning budget message, it's not necessary to assign them to
struct variables.
This commit is contained in:
Ethan Turner
2026-04-23 15:53:23 -07:00
committed by GitHub
parent 5d2b52d80d
commit fa0b8a70a8
+2 -6
View File
@@ -59,8 +59,6 @@ struct cli_context {
std::vector<raw_buffer> input_files; std::vector<raw_buffer> input_files;
task_params defaults; task_params defaults;
bool verbose_prompt; bool verbose_prompt;
int reasoning_budget = -1;
std::string reasoning_budget_message;
// thread for showing "loading" animation // thread for showing "loading" animation
std::atomic<bool> loading_show; std::atomic<bool> loading_show;
@@ -77,8 +75,6 @@ struct cli_context {
// defaults.return_progress = true; // TODO: show progress // defaults.return_progress = true; // TODO: show progress
verbose_prompt = params.verbose_prompt; verbose_prompt = params.verbose_prompt;
reasoning_budget = params.sampling.reasoning_budget_tokens;
reasoning_budget_message = params.sampling.reasoning_budget_message;
} }
std::string generate_completion(result_timings & out_timings) { std::string generate_completion(result_timings & out_timings) {
@@ -106,7 +102,7 @@ struct cli_context {
const llama_vocab * vocab = llama_model_get_vocab( const llama_vocab * vocab = llama_model_get_vocab(
llama_get_model(ctx_server.get_llama_context())); llama_get_model(ctx_server.get_llama_context()));
task.params.sampling.reasoning_budget_tokens = reasoning_budget; task.params.sampling.reasoning_budget_tokens = defaults.sampling.reasoning_budget_tokens;
task.params.sampling.generation_prompt = chat_params.generation_prompt; task.params.sampling.generation_prompt = chat_params.generation_prompt;
if (!chat_params.thinking_start_tag.empty()) { if (!chat_params.thinking_start_tag.empty()) {
@@ -116,7 +112,7 @@ struct cli_context {
task.params.sampling.reasoning_budget_end = task.params.sampling.reasoning_budget_end =
common_tokenize(vocab, chat_params.thinking_end_tag, false, true); common_tokenize(vocab, chat_params.thinking_end_tag, false, true);
task.params.sampling.reasoning_budget_forced = task.params.sampling.reasoning_budget_forced =
common_tokenize(vocab, reasoning_budget_message + chat_params.thinking_end_tag, false, true); common_tokenize(vocab, defaults.sampling.reasoning_budget_message + chat_params.thinking_end_tag, false, true);
} }
rd.post_task({std::move(task)}); rd.post_task({std::move(task)});