Send reasoning content back to the model across turns via the reasoning_content API field (#21036)

* webui: send reasoning_content back to model in context

Preserve assistant reasoning across turns by extracting it from
internal tags and sending it as a separate reasoning_content field
in the API payload. The server and Jinja templates handle native
formatting (e.g. <think> tags for Qwen, GLM, DeepSeek...).

Adds "Exclude reasoning from context" toggle in Settings > Developer
(off by default, so reasoning is preserved). Includes unit tests.

* webui: add syncable parameter for excludeReasoningFromContext

* chore: update webui build output
This commit is contained in:
Pascal
2026-03-27 08:17:35 +01:00
committed by GitHub
parent 9bcb4eff4d
commit d0fa2c9fbb
11 changed files with 281 additions and 9 deletions
@@ -296,6 +296,11 @@
label: 'Disable reasoning content parsing',
type: SettingsFieldType.CHECKBOX
},
{
key: SETTINGS_KEYS.EXCLUDE_REASONING_FROM_CONTEXT,
label: 'Exclude reasoning from context',
type: SettingsFieldType.CHECKBOX
},
{
key: SETTINGS_KEYS.SHOW_RAW_OUTPUT_SWITCH,
label: 'Enable raw output toggle',
@@ -50,6 +50,8 @@ export const AGENTIC_REGEX = {
PARTIAL_MARKER: /<<<[A-Za-z_]*$/,
// Matches reasoning content blocks (including tags)
REASONING_BLOCK: /<<<reasoning_content_start>>>[\s\S]*?<<<reasoning_content_end>>>/g,
// Captures the reasoning text between start/end tags
REASONING_EXTRACT: /<<<reasoning_content_start>>>([\s\S]*?)<<<reasoning_content_end>>>/,
// Matches an opening reasoning tag and any remaining content (unterminated)
REASONING_OPEN: /<<<reasoning_content_start>>>[\s\S]*$/,
// Matches a complete agentic tool call display block (start to end marker)
@@ -10,6 +10,7 @@ export const SETTING_CONFIG_DEFAULT: Record<string, string | number | boolean |
theme: ColorMode.SYSTEM,
showThoughtInProgress: false,
disableReasoningParsing: false,
excludeReasoningFromContext: false,
showRawOutputSwitch: false,
keepStatsVisible: false,
showMessageStats: true,
@@ -106,6 +107,8 @@ export const SETTING_CONFIG_INFO: Record<string, string> = {
showThoughtInProgress: 'Expand thought process by default when generating messages.',
disableReasoningParsing:
'Send reasoning_format=none to prevent server-side extraction of reasoning tokens into separate field',
excludeReasoningFromContext:
'Strip reasoning content from previous messages before sending to the model. When unchecked, reasoning is sent back via the reasoning_content field so the model can see its own chain-of-thought across turns.',
showRawOutputSwitch:
'Show toggle button to display messages as plain text instead of Markdown-formatted content',
keepStatsVisible: 'Keep processing statistics visible after generation finishes.',
@@ -54,6 +54,7 @@ export const SETTINGS_KEYS = {
SHOW_TOOL_CALL_IN_PROGRESS: 'showToolCallInProgress',
// Developer
DISABLE_REASONING_PARSING: 'disableReasoningParsing',
EXCLUDE_REASONING_FROM_CONTEXT: 'excludeReasoningFromContext',
SHOW_RAW_OUTPUT_SWITCH: 'showRawOutputSwitch',
CUSTOM: 'custom'
} as const;
@@ -57,6 +57,46 @@ export class ChatService {
*
*/
/**
* Extracts reasoning text from content that contains internal reasoning tags.
* Returns the concatenated reasoning content or undefined if none found.
*/
private static extractReasoningFromContent(
content: ApiChatMessageData['content'] | null | undefined
): string | undefined {
if (!content) return undefined;
const extractFromString = (text: string): string => {
const parts: string[] = [];
// Use a fresh regex instance to avoid shared lastIndex state
const re = new RegExp(AGENTIC_REGEX.REASONING_EXTRACT.source);
let match = re.exec(text);
while (match) {
parts.push(match[1]);
// advance past the matched portion and retry
text = text.slice(match.index + match[0].length);
match = re.exec(text);
}
return parts.join('');
};
if (typeof content === 'string') {
const result = extractFromString(content);
return result || undefined;
}
if (!Array.isArray(content)) return undefined;
const parts: string[] = [];
for (const part of content) {
if (part.type === ContentPartType.TEXT && part.text) {
const result = extractFromString(part.text);
if (result) parts.push(result);
}
}
return parts.length > 0 ? parts.join('') : undefined;
}
/**
* Sends a chat completion request to the llama.cpp server.
* Supports both streaming and non-streaming responses with comprehensive parameter configuration.
@@ -111,7 +151,8 @@ export class ChatService {
custom,
timings_per_token,
// Config options
disableReasoningParsing
disableReasoningParsing,
excludeReasoningFromContext
} = options;
const normalizedMessages: ApiChatMessageData[] = messages
@@ -159,14 +200,24 @@ export class ChatService {
}
const requestBody: ApiChatCompletionRequest = {
messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
role: msg.role,
// Strip reasoning tags/content from the prompt to avoid polluting KV cache.
// TODO: investigate backend expectations for reasoning tags and add a toggle if needed.
content: ChatService.stripReasoningContent(msg.content),
tool_calls: msg.tool_calls,
tool_call_id: msg.tool_call_id
})),
messages: normalizedMessages.map((msg: ApiChatMessageData) => {
// Always strip internal reasoning/agentic tags from content
const cleanedContent = ChatService.stripReasoningContent(msg.content);
const mapped: ApiChatCompletionRequest['messages'][0] = {
role: msg.role,
content: cleanedContent,
tool_calls: msg.tool_calls,
tool_call_id: msg.tool_call_id
};
// When preserving reasoning, extract it from raw content and send as separate field
if (!excludeReasoningFromContext) {
const reasoning = ChatService.extractReasoningFromContent(msg.content);
if (reasoning) {
mapped.reasoning_content = reasoning;
}
}
return mapped;
}),
stream,
return_progress: stream ? true : undefined,
tools: tools && tools.length > 0 ? tools : undefined
@@ -227,6 +227,12 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [
serverKey: 'alwaysShowAgenticTurns',
type: SyncableParameterType.BOOLEAN,
canSync: true
},
{
key: 'excludeReasoningFromContext',
serverKey: 'excludeReasoningFromContext',
type: SyncableParameterType.BOOLEAN,
canSync: true
}
];
@@ -1479,6 +1479,8 @@ class ChatStore {
if (currentConfig.disableReasoningParsing) apiOptions.disableReasoningParsing = true;
if (currentConfig.excludeReasoningFromContext) apiOptions.excludeReasoningFromContext = true;
if (hasValue(currentConfig.temperature))
apiOptions.temperature = Number(currentConfig.temperature);
+4
View File
@@ -45,6 +45,7 @@ export interface ApiErrorResponse {
export interface ApiChatMessageData {
role: ChatRole;
content: string | ApiChatMessageContentPart[];
reasoning_content?: string;
tool_calls?: ApiChatCompletionToolCall[];
tool_call_id?: string;
timestamp?: number;
@@ -201,6 +202,9 @@ export interface ApiChatCompletionRequest {
messages: Array<{
role: ChatRole;
content: string | ApiChatMessageContentPart[];
reasoning_content?: string;
tool_calls?: ApiChatCompletionToolCall[];
tool_call_id?: string;
}>;
stream?: boolean;
model?: string;
+2
View File
@@ -24,6 +24,8 @@ export interface SettingsChatServiceOptions {
systemMessage?: string;
// Disable reasoning parsing (use 'none' instead of 'auto')
disableReasoningParsing?: boolean;
// Strip reasoning content from context before sending
excludeReasoningFromContext?: boolean;
tools?: OpenAIToolDefinition[];
// Generation parameters
temperature?: number;