webui: add OAI-Compat Harmony tool-call streaming visualization and persistence in chat UI (#16618)
* webui: add OAI-Compat Harmony tool-call live streaming visualization and persistence in chat UI - Purely visual and diagnostic change, no effect on model context, prompt construction, or inference behavior - Captured assistant tool call payloads during streaming and non-streaming completions, and persisted them in chat state and storage for downstream use - Exposed parsed tool call labels beneath the assistant's model info line with graceful fallback when parsing fails - Added tool call badges beneath assistant responses that expose JSON tooltips and copy their payloads when clicked, matching the existing model badge styling - Added a user-facing setting to toggle tool call visibility to the Developer settings section directly under the model selector option * webui: remove scroll listener causing unnecessary layout updates (model selector) * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * chore: npm run format & update webui build output * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
@@ -1,6 +1,25 @@
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { selectedModelName } from '$lib/stores/models.svelte';
|
||||
import { slotsService } from './slots';
|
||||
import type {
|
||||
ApiChatCompletionRequest,
|
||||
ApiChatCompletionResponse,
|
||||
ApiChatCompletionStreamChunk,
|
||||
ApiChatCompletionToolCall,
|
||||
ApiChatCompletionToolCallDelta,
|
||||
ApiChatMessageData
|
||||
} from '$lib/types/api';
|
||||
import type {
|
||||
DatabaseMessage,
|
||||
DatabaseMessageExtra,
|
||||
DatabaseMessageExtraAudioFile,
|
||||
DatabaseMessageExtraImageFile,
|
||||
DatabaseMessageExtraLegacyContext,
|
||||
DatabaseMessageExtraPdfFile,
|
||||
DatabaseMessageExtraTextFile
|
||||
} from '$lib/types/database';
|
||||
import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
|
||||
import type { SettingsChatServiceOptions } from '$lib/types/settings';
|
||||
/**
|
||||
* ChatService - Low-level API communication layer for llama.cpp server interactions
|
||||
*
|
||||
@@ -53,6 +72,7 @@ export class ChatService {
|
||||
onComplete,
|
||||
onError,
|
||||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onModel,
|
||||
onFirstValidChunk,
|
||||
// Generation parameters
|
||||
@@ -201,6 +221,7 @@ export class ChatService {
|
||||
onComplete,
|
||||
onError,
|
||||
onReasoningChunk,
|
||||
onToolCallChunk,
|
||||
onModel,
|
||||
onFirstValidChunk,
|
||||
conversationId,
|
||||
@@ -208,7 +229,13 @@ export class ChatService {
|
||||
);
|
||||
return;
|
||||
} else {
|
||||
return this.handleNonStreamResponse(response, onComplete, onError, onModel);
|
||||
return this.handleNonStreamResponse(
|
||||
response,
|
||||
onComplete,
|
||||
onError,
|
||||
onToolCallChunk,
|
||||
onModel
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'AbortError') {
|
||||
@@ -264,10 +291,12 @@ export class ChatService {
|
||||
onComplete?: (
|
||||
response: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings
|
||||
timings?: ChatMessageTimings,
|
||||
toolCalls?: string
|
||||
) => void,
|
||||
onError?: (error: Error) => void,
|
||||
onReasoningChunk?: (chunk: string) => void,
|
||||
onToolCallChunk?: (chunk: string) => void,
|
||||
onModel?: (model: string) => void,
|
||||
onFirstValidChunk?: () => void,
|
||||
conversationId?: string,
|
||||
@@ -282,11 +311,53 @@ export class ChatService {
|
||||
const decoder = new TextDecoder();
|
||||
let aggregatedContent = '';
|
||||
let fullReasoningContent = '';
|
||||
let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
|
||||
let hasReceivedData = false;
|
||||
let lastTimings: ChatMessageTimings | undefined;
|
||||
let streamFinished = false;
|
||||
let modelEmitted = false;
|
||||
let firstValidChunkEmitted = false;
|
||||
let toolCallIndexOffset = 0;
|
||||
let hasOpenToolCallBatch = false;
|
||||
|
||||
const finalizeOpenToolCallBatch = () => {
|
||||
if (!hasOpenToolCallBatch) {
|
||||
return;
|
||||
}
|
||||
|
||||
toolCallIndexOffset = aggregatedToolCalls.length;
|
||||
hasOpenToolCallBatch = false;
|
||||
};
|
||||
|
||||
const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
|
||||
if (!toolCalls || toolCalls.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
aggregatedToolCalls = this.mergeToolCallDeltas(
|
||||
aggregatedToolCalls,
|
||||
toolCalls,
|
||||
toolCallIndexOffset
|
||||
);
|
||||
|
||||
if (aggregatedToolCalls.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
hasOpenToolCallBatch = true;
|
||||
|
||||
const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
|
||||
|
||||
if (!serializedToolCalls) {
|
||||
return;
|
||||
}
|
||||
|
||||
hasReceivedData = true;
|
||||
|
||||
if (!abortSignal?.aborted) {
|
||||
onToolCallChunk?.(serializedToolCalls);
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
let chunk = '';
|
||||
@@ -325,6 +396,7 @@ export class ChatService {
|
||||
|
||||
const content = parsed.choices[0]?.delta?.content;
|
||||
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
|
||||
const toolCalls = parsed.choices[0]?.delta?.tool_calls;
|
||||
const timings = parsed.timings;
|
||||
const promptProgress = parsed.prompt_progress;
|
||||
|
||||
@@ -342,6 +414,7 @@ export class ChatService {
|
||||
}
|
||||
|
||||
if (content) {
|
||||
finalizeOpenToolCallBatch();
|
||||
hasReceivedData = true;
|
||||
aggregatedContent += content;
|
||||
if (!abortSignal?.aborted) {
|
||||
@@ -350,12 +423,15 @@ export class ChatService {
|
||||
}
|
||||
|
||||
if (reasoningContent) {
|
||||
finalizeOpenToolCallBatch();
|
||||
hasReceivedData = true;
|
||||
fullReasoningContent += reasoningContent;
|
||||
if (!abortSignal?.aborted) {
|
||||
onReasoningChunk?.(reasoningContent);
|
||||
}
|
||||
}
|
||||
|
||||
processToolCallDelta(toolCalls);
|
||||
} catch (e) {
|
||||
console.error('Error parsing JSON chunk:', e);
|
||||
}
|
||||
@@ -368,12 +444,26 @@ export class ChatService {
|
||||
if (abortSignal?.aborted) return;
|
||||
|
||||
if (streamFinished) {
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
finalizeOpenToolCallBatch();
|
||||
|
||||
if (
|
||||
!hasReceivedData &&
|
||||
aggregatedContent.length === 0 &&
|
||||
aggregatedToolCalls.length === 0
|
||||
) {
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
||||
const finalToolCalls =
|
||||
aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
|
||||
|
||||
onComplete?.(
|
||||
aggregatedContent,
|
||||
fullReasoningContent || undefined,
|
||||
lastTimings,
|
||||
finalToolCalls
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Stream error');
|
||||
@@ -386,6 +476,54 @@ export class ChatService {
|
||||
}
|
||||
}
|
||||
|
||||
private mergeToolCallDeltas(
|
||||
existing: ApiChatCompletionToolCall[],
|
||||
deltas: ApiChatCompletionToolCallDelta[],
|
||||
indexOffset = 0
|
||||
): ApiChatCompletionToolCall[] {
|
||||
const result = existing.map((call) => ({
|
||||
...call,
|
||||
function: call.function ? { ...call.function } : undefined
|
||||
}));
|
||||
|
||||
for (const delta of deltas) {
|
||||
const index =
|
||||
typeof delta.index === 'number' && delta.index >= 0
|
||||
? delta.index + indexOffset
|
||||
: result.length;
|
||||
|
||||
while (result.length <= index) {
|
||||
result.push({ function: undefined });
|
||||
}
|
||||
|
||||
const target = result[index]!;
|
||||
|
||||
if (delta.id) {
|
||||
target.id = delta.id;
|
||||
}
|
||||
|
||||
if (delta.type) {
|
||||
target.type = delta.type;
|
||||
}
|
||||
|
||||
if (delta.function) {
|
||||
const fn = target.function ? { ...target.function } : {};
|
||||
|
||||
if (delta.function.name) {
|
||||
fn.name = delta.function.name;
|
||||
}
|
||||
|
||||
if (delta.function.arguments) {
|
||||
fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
|
||||
}
|
||||
|
||||
target.function = fn;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles non-streaming response from the chat completion API.
|
||||
* Parses the JSON response and extracts the generated content.
|
||||
@@ -401,9 +539,11 @@ export class ChatService {
|
||||
onComplete?: (
|
||||
response: string,
|
||||
reasoningContent?: string,
|
||||
timings?: ChatMessageTimings
|
||||
timings?: ChatMessageTimings,
|
||||
toolCalls?: string
|
||||
) => void,
|
||||
onError?: (error: Error) => void,
|
||||
onToolCallChunk?: (chunk: string) => void,
|
||||
onModel?: (model: string) => void
|
||||
): Promise<string> {
|
||||
try {
|
||||
@@ -423,17 +563,31 @@ export class ChatService {
|
||||
|
||||
const content = data.choices[0]?.message?.content || '';
|
||||
const reasoningContent = data.choices[0]?.message?.reasoning_content;
|
||||
const toolCalls = data.choices[0]?.message?.tool_calls;
|
||||
|
||||
if (reasoningContent) {
|
||||
console.log('Full reasoning content:', reasoningContent);
|
||||
}
|
||||
|
||||
if (!content.trim()) {
|
||||
let serializedToolCalls: string | undefined;
|
||||
|
||||
if (toolCalls && toolCalls.length > 0) {
|
||||
const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
|
||||
|
||||
if (mergedToolCalls.length > 0) {
|
||||
serializedToolCalls = JSON.stringify(mergedToolCalls);
|
||||
if (serializedToolCalls) {
|
||||
onToolCallChunk?.(serializedToolCalls);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!content.trim() && !serializedToolCalls) {
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(content, reasoningContent);
|
||||
onComplete?.(content, reasoningContent, undefined, serializedToolCalls);
|
||||
|
||||
return content;
|
||||
} catch (error) {
|
||||
|
||||
Reference in New Issue
Block a user