common/parser: add proper reasoning tag prefill reading (#20424)

* Implement proper prefill extraction

* Refactor cli parameters, update docs, move reasoning budget sampler part to common/reasoning-budget.cpp

* Update tools/server/server-task.cpp

* refactor: move grammars to variant, remove grammar_external, handle exception internally

* Make code less C++y

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Piotr Wilkin (ilintar)
2026-03-19 16:58:21 +01:00
committed by GitHub
parent c1258830b2
commit 5e54d51b19
33 changed files with 651 additions and 454 deletions
@@ -51,7 +51,7 @@ describe('ParameterSyncService', () => {
chat_format: '',
reasoning_format: '',
reasoning_in_content: false,
thinking_forced_open: false,
generation_prompt: '',
'speculative.n_max': 0,
'speculative.n_min': 0,
'speculative.p_min': 0.0,
@@ -116,7 +116,7 @@ describe('ParameterSyncService', () => {
chat_format: '',
reasoning_format: '',
reasoning_in_content: false,
thinking_forced_open: false,
generation_prompt: '',
'speculative.n_max': 0,
'speculative.n_min': 0,
'speculative.p_min': 0.0,
+2 -2
View File
@@ -164,7 +164,7 @@ export interface ApiLlamaCppServerProps {
chat_format: string;
reasoning_format: string;
reasoning_in_content: boolean;
thinking_forced_open: boolean;
generation_prompt: string;
samplers: string[];
backend_sampling: boolean;
'speculative.n_max': number;
@@ -332,7 +332,7 @@ export interface ApiSlotData {
chat_format: string;
reasoning_format: string;
reasoning_in_content: boolean;
thinking_forced_open: boolean;
generation_prompt: string;
samplers: string[];
backend_sampling: boolean;
'speculative.n_max': number;