common/parser: add proper reasoning tag prefill reading (#20424)
* Implement proper prefill extraction * Refactor cli parameters, update docs, move reasoning budget sampler part to common/reasoning-budget.cpp * Update tools/server/server-task.cpp * refactor: move grammars to variant, remove grammar_external, handle exception internally * Make code less C++y Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
committed by
GitHub
parent
c1258830b2
commit
5e54d51b19
@@ -51,7 +51,7 @@ describe('ParameterSyncService', () => {
|
||||
chat_format: '',
|
||||
reasoning_format: '',
|
||||
reasoning_in_content: false,
|
||||
thinking_forced_open: false,
|
||||
generation_prompt: '',
|
||||
'speculative.n_max': 0,
|
||||
'speculative.n_min': 0,
|
||||
'speculative.p_min': 0.0,
|
||||
@@ -116,7 +116,7 @@ describe('ParameterSyncService', () => {
|
||||
chat_format: '',
|
||||
reasoning_format: '',
|
||||
reasoning_in_content: false,
|
||||
thinking_forced_open: false,
|
||||
generation_prompt: '',
|
||||
'speculative.n_max': 0,
|
||||
'speculative.n_min': 0,
|
||||
'speculative.p_min': 0.0,
|
||||
|
||||
+2
-2
@@ -164,7 +164,7 @@ export interface ApiLlamaCppServerProps {
|
||||
chat_format: string;
|
||||
reasoning_format: string;
|
||||
reasoning_in_content: boolean;
|
||||
thinking_forced_open: boolean;
|
||||
generation_prompt: string;
|
||||
samplers: string[];
|
||||
backend_sampling: boolean;
|
||||
'speculative.n_max': number;
|
||||
@@ -332,7 +332,7 @@ export interface ApiSlotData {
|
||||
chat_format: string;
|
||||
reasoning_format: string;
|
||||
reasoning_in_content: boolean;
|
||||
thinking_forced_open: boolean;
|
||||
generation_prompt: string;
|
||||
samplers: string[];
|
||||
backend_sampling: boolean;
|
||||
'speculative.n_max': number;
|
||||
|
||||
Reference in New Issue
Block a user