webui: add OAI-Compat Harmony tool-call streaming visualization and persistence in chat UI (#16618)

* webui: add OAI-Compat Harmony tool-call live streaming visualization and persistence in chat UI - Purely visual and diagnostic change, no effect on model context, prompt construction, or inference behavior - Captured assistant tool call payloads during streaming and non-streaming completions, and persisted them in chat state and storage for downstream use - Exposed parsed tool call labels beneath the assistant's model info line with graceful fallback when parsing fails - Added tool call badges beneath assistant responses that expose JSON tooltips and copy their payloads when clicked, matching the existing model badge styling - Added a user-facing setting to toggle tool call visibility to the Developer settings section directly under the model selector option * webui: remove scroll listener causing unnecessary layout updates (model selector) * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * chore: npm run format & update webui build output * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
2025-11-15 21:09:32 +01:00
parent 662192e1dc
commit 1411d9275a
12 changed files with 376 additions and 19 deletions
@@ -1,6 +1,25 @@
 import { config } from '$lib/stores/settings.svelte';
 import { selectedModelName } from '$lib/stores/models.svelte';
 import { slotsService } from './slots';
+import type {
+	ApiChatCompletionRequest,
+	ApiChatCompletionResponse,
+	ApiChatCompletionStreamChunk,
+	ApiChatCompletionToolCall,
+	ApiChatCompletionToolCallDelta,
+	ApiChatMessageData
+} from '$lib/types/api';
+import type {
+	DatabaseMessage,
+	DatabaseMessageExtra,
+	DatabaseMessageExtraAudioFile,
+	DatabaseMessageExtraImageFile,
+	DatabaseMessageExtraLegacyContext,
+	DatabaseMessageExtraPdfFile,
+	DatabaseMessageExtraTextFile
+} from '$lib/types/database';
+import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat';
+import type { SettingsChatServiceOptions } from '$lib/types/settings';
 /**
 * ChatService - Low-level API communication layer for llama.cpp server interactions
 *
@@ -53,6 +72,7 @@ export class ChatService {
 			onComplete,
 			onError,
 			onReasoningChunk,
+			onToolCallChunk,
 			onModel,
 			onFirstValidChunk,
 			// Generation parameters
@@ -201,6 +221,7 @@ export class ChatService {
 					onComplete,
 					onError,
 					onReasoningChunk,
+					onToolCallChunk,
 					onModel,
 					onFirstValidChunk,
 					conversationId,
@@ -208,7 +229,13 @@ export class ChatService {
 				);
 				return;
 			} else {
-				return this.handleNonStreamResponse(response, onComplete, onError, onModel);
+				return this.handleNonStreamResponse(
+					response,
+					onComplete,
+					onError,
+					onToolCallChunk,
+					onModel
+				);
 			}
 		} catch (error) {
 			if (error instanceof Error && error.name === 'AbortError') {
@@ -264,10 +291,12 @@ export class ChatService {
 		onComplete?: (
 			response: string,
 			reasoningContent?: string,
-			timings?: ChatMessageTimings
+			timings?: ChatMessageTimings,
+			toolCalls?: string
 		) => void,
 		onError?: (error: Error) => void,
 		onReasoningChunk?: (chunk: string) => void,
+		onToolCallChunk?: (chunk: string) => void,
 		onModel?: (model: string) => void,
 		onFirstValidChunk?: () => void,
 		conversationId?: string,
@@ -282,11 +311,53 @@ export class ChatService {
 		const decoder = new TextDecoder();
 		let aggregatedContent = '';
 		let fullReasoningContent = '';
+		let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
 		let hasReceivedData = false;
 		let lastTimings: ChatMessageTimings | undefined;
 		let streamFinished = false;
 		let modelEmitted = false;
 		let firstValidChunkEmitted = false;
+		let toolCallIndexOffset = 0;
+		let hasOpenToolCallBatch = false;
+
+		const finalizeOpenToolCallBatch = () => {
+			if (!hasOpenToolCallBatch) {
+				return;
+			}
+
+			toolCallIndexOffset = aggregatedToolCalls.length;
+			hasOpenToolCallBatch = false;
+		};
+
+		const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
+			if (!toolCalls || toolCalls.length === 0) {
+				return;
+			}
+
+			aggregatedToolCalls = this.mergeToolCallDeltas(
+				aggregatedToolCalls,
+				toolCalls,
+				toolCallIndexOffset
+			);
+
+			if (aggregatedToolCalls.length === 0) {
+				return;
+			}
+
+			hasOpenToolCallBatch = true;
+
+			const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
+
+			if (!serializedToolCalls) {
+				return;
+			}
+
+			hasReceivedData = true;
+
+			if (!abortSignal?.aborted) {
+				onToolCallChunk?.(serializedToolCalls);
+			}
+		};

 		try {
 			let chunk = '';
@@ -325,6 +396,7 @@ export class ChatService {

 							const content = parsed.choices[0]?.delta?.content;
 							const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
+							const toolCalls = parsed.choices[0]?.delta?.tool_calls;
 							const timings = parsed.timings;
 							const promptProgress = parsed.prompt_progress;

@@ -342,6 +414,7 @@ export class ChatService {
 							}

 							if (content) {
+								finalizeOpenToolCallBatch();
 								hasReceivedData = true;
 								aggregatedContent += content;
 								if (!abortSignal?.aborted) {
@@ -350,12 +423,15 @@ export class ChatService {
 							}

 							if (reasoningContent) {
+								finalizeOpenToolCallBatch();
 								hasReceivedData = true;
 								fullReasoningContent += reasoningContent;
 								if (!abortSignal?.aborted) {
 									onReasoningChunk?.(reasoningContent);
 								}
 							}
+
+							processToolCallDelta(toolCalls);
 						} catch (e) {
 							console.error('Error parsing JSON chunk:', e);
 						}
@@ -368,12 +444,26 @@ export class ChatService {
 			if (abortSignal?.aborted) return;

 			if (streamFinished) {
-				if (!hasReceivedData && aggregatedContent.length === 0) {
+				finalizeOpenToolCallBatch();
+
+				if (
+					!hasReceivedData &&
+					aggregatedContent.length === 0 &&
+					aggregatedToolCalls.length === 0
+				) {
 					const noResponseError = new Error('No response received from server. Please try again.');
 					throw noResponseError;
 				}

-				onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
+				const finalToolCalls =
+					aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;
+
+				onComplete?.(
+					aggregatedContent,
+					fullReasoningContent || undefined,
+					lastTimings,
+					finalToolCalls
+				);
 			}
 		} catch (error) {
 			const err = error instanceof Error ? error : new Error('Stream error');
@@ -386,6 +476,54 @@ export class ChatService {
 		}
 	}

+	private mergeToolCallDeltas(
+		existing: ApiChatCompletionToolCall[],
+		deltas: ApiChatCompletionToolCallDelta[],
+		indexOffset = 0
+	): ApiChatCompletionToolCall[] {
+		const result = existing.map((call) => ({
+			...call,
+			function: call.function ? { ...call.function } : undefined
+		}));
+
+		for (const delta of deltas) {
+			const index =
+				typeof delta.index === 'number' && delta.index >= 0
+					? delta.index + indexOffset
+					: result.length;
+
+			while (result.length <= index) {
+				result.push({ function: undefined });
+			}
+
+			const target = result[index]!;
+
+			if (delta.id) {
+				target.id = delta.id;
+			}
+
+			if (delta.type) {
+				target.type = delta.type;
+			}
+
+			if (delta.function) {
+				const fn = target.function ? { ...target.function } : {};
+
+				if (delta.function.name) {
+					fn.name = delta.function.name;
+				}
+
+				if (delta.function.arguments) {
+					fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
+				}
+
+				target.function = fn;
+			}
+		}
+
+		return result;
+	}
+
 	/**
 	 * Handles non-streaming response from the chat completion API.
 	 * Parses the JSON response and extracts the generated content.
@@ -401,9 +539,11 @@ export class ChatService {
 		onComplete?: (
 			response: string,
 			reasoningContent?: string,
-			timings?: ChatMessageTimings
+			timings?: ChatMessageTimings,
+			toolCalls?: string
 		) => void,
 		onError?: (error: Error) => void,
+		onToolCallChunk?: (chunk: string) => void,
 		onModel?: (model: string) => void
 	): Promise<string> {
 		try {
@@ -423,17 +563,31 @@ export class ChatService {

 			const content = data.choices[0]?.message?.content || '';
 			const reasoningContent = data.choices[0]?.message?.reasoning_content;
+			const toolCalls = data.choices[0]?.message?.tool_calls;

 			if (reasoningContent) {
 				console.log('Full reasoning content:', reasoningContent);
 			}

-			if (!content.trim()) {
+			let serializedToolCalls: string | undefined;
+
+			if (toolCalls && toolCalls.length > 0) {
+				const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls);
+
+				if (mergedToolCalls.length > 0) {
+					serializedToolCalls = JSON.stringify(mergedToolCalls);
+					if (serializedToolCalls) {
+						onToolCallChunk?.(serializedToolCalls);
+					}
+				}
+			}
+
+			if (!content.trim() && !serializedToolCalls) {
 				const noResponseError = new Error('No response received from server. Please try again.');
 				throw noResponseError;
 			}

-			onComplete?.(content, reasoningContent);
+			onComplete?.(content, reasoningContent, undefined, serializedToolCalls);

 			return content;
 		} catch (error) {