common/parser: add proper reasoning tag prefill reading (#20424)

* Implement proper prefill extraction

* Refactor cli parameters, update docs, move reasoning budget sampler part to common/reasoning-budget.cpp

* Update tools/server/server-task.cpp

* refactor: move grammars to variant, remove grammar_external, handle exception internally

* Make code less C++y

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Piotr Wilkin (ilintar)
2026-03-19 16:58:21 +01:00
committed by GitHub
parent c1258830b2
commit 5e54d51b19
33 changed files with 651 additions and 454 deletions
+9 -9
View File
@@ -1081,20 +1081,21 @@ json oaicompat_chat_params_parse(
}
}
llama_params["chat_format"] = static_cast<int>(chat_params.format);
llama_params["prompt"] = chat_params.prompt;
llama_params["chat_format"] = static_cast<int>(chat_params.format);
llama_params["prompt"] = chat_params.prompt;
if (!chat_params.grammar.empty()) {
llama_params["grammar"] = chat_params.grammar;
llama_params["grammar"] = chat_params.grammar;
llama_params["grammar_type"] = std::string("tool_calls");
}
llama_params["grammar_lazy"] = chat_params.grammar_lazy;
auto grammar_triggers = json::array();
llama_params["grammar_lazy"] = chat_params.grammar_lazy;
auto grammar_triggers = json::array();
for (const auto & trigger : chat_params.grammar_triggers) {
server_grammar_trigger ct(trigger);
grammar_triggers.push_back(ct.to_json());
}
llama_params["grammar_triggers"] = grammar_triggers;
llama_params["preserved_tokens"] = chat_params.preserved_tokens;
llama_params["thinking_forced_open"] = chat_params.thinking_forced_open;
llama_params["grammar_triggers"] = grammar_triggers;
llama_params["preserved_tokens"] = chat_params.preserved_tokens;
llama_params["generation_prompt"] = chat_params.generation_prompt;
for (const auto & stop : chat_params.additional_stops) {
llama_params["stop"].push_back(stop);
}
@@ -1114,7 +1115,6 @@ json oaicompat_chat_params_parse(
llama_params["reasoning_budget_start_tag"] = chat_params.thinking_start_tag;
llama_params["reasoning_budget_end_tag"] = chat_params.thinking_end_tag;
llama_params["reasoning_budget_message"] = opt.reasoning_budget_message;
llama_params["reasoning_budget_activate_immediately"] = chat_params.thinking_forced_open;
}
}