llama.cpp/tools/server/webui/src/lib/services/chat.service.ts

import { getJsonHeaders } from '$lib/utils/api-headers';
import { formatAttachmentText } from '$lib/utils/formatters';
import { isAbortError } from '$lib/utils/abort';
import {
	ATTACHMENT_LABEL_PDF_FILE,
	ATTACHMENT_LABEL_MCP_PROMPT,
	ATTACHMENT_LABEL_MCP_RESOURCE,
	LEGACY_AGENTIC_REGEX
} from '$lib/constants';
import {
	AttachmentType,
	ContentPartType,
	MessageRole,
	ReasoningFormat,
	UrlProtocol
} from '$lib/enums';
import type { ApiChatMessageContentPart, ApiChatCompletionToolCall } from '$lib/types/api';
import type { DatabaseMessageExtraMcpPrompt, DatabaseMessageExtraMcpResource } from '$lib/types';
import { modelsStore } from '$lib/stores/models.svelte';

export class ChatService {
	/**
	 *
	 *
	 * Messaging
	 *
	 *
	 */

	/**
	 * Sends a chat completion request to the llama.cpp server.
	 * Supports both streaming and non-streaming responses with comprehensive parameter configuration.
	 * Automatically converts database messages with attachments to the appropriate API format.
	 *
	 * @param messages - Array of chat messages to send to the API (supports both ApiChatMessageData and DatabaseMessage with attachments)
	 * @param options - Configuration options for the chat completion request. See `SettingsChatServiceOptions` type for details.
	 * @returns {Promise<string | void>} that resolves to the complete response string (non-streaming) or void (streaming)
	 * @throws {Error} if the request fails or is aborted
	 */
	static async sendMessage(
		messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
		options: SettingsChatServiceOptions = {},
		conversationId?: string,
		signal?: AbortSignal
	): Promise<string | void> {
		const {
			stream,
			onChunk,
			onComplete,
			onError,
			onReasoningChunk,
			onToolCallChunk,
			onModel,
			onTimings,
			// Tools for function calling
			tools,
			// Generation parameters
			temperature,
			max_tokens,
			// Sampling parameters
			dynatemp_range,
			dynatemp_exponent,
			top_k,
			top_p,
			min_p,
			xtc_probability,
			xtc_threshold,
			typ_p,
			// Penalty parameters
			repeat_last_n,
			repeat_penalty,
			presence_penalty,
			frequency_penalty,
			dry_multiplier,
			dry_base,
			dry_allowed_length,
			dry_penalty_last_n,
			// Other parameters
			samplers,
			backend_sampling,
			custom,
			timings_per_token,
			// Config options
			disableReasoningParsing,
			excludeReasoningFromContext
		} = options;

		const normalizedMessages: ApiChatMessageData[] = messages
			.map((msg) => {
				if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
					const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };

					return ChatService.convertDbMessageToApiChatMessageData(dbMsg);
				} else {
					return msg as ApiChatMessageData;
				}
			})
			.filter((msg) => {
				// Filter out empty system messages
				if (msg.role === MessageRole.SYSTEM) {
					const content = typeof msg.content === 'string' ? msg.content : '';

					return content.trim().length > 0;
				}

				return true;
			});

		// Filter out image attachments if the model doesn't support vision
		if (options.model && !modelsStore.modelSupportsVision(options.model)) {
			normalizedMessages.forEach((msg) => {
				if (Array.isArray(msg.content)) {
					msg.content = msg.content.filter((part: ApiChatMessageContentPart) => {
						if (part.type === ContentPartType.IMAGE_URL) {
							console.info(
								`[ChatService] Skipping image attachment in message history (model "${options.model}" does not support vision)`
							);

							return false;
						}

						return true;
					});
					// If only text remains and it's a single part, simplify to string
					if (msg.content.length === 1 && msg.content[0].type === ContentPartType.TEXT) {
						msg.content = msg.content[0].text;
					}
				}
			});
		}

		const requestBody: ApiChatCompletionRequest = {
			messages: normalizedMessages.map((msg: ApiChatMessageData) => {
				const mapped: ApiChatCompletionRequest['messages'][0] = {
					role: msg.role,
					content: msg.content,
					tool_calls: msg.tool_calls,
					tool_call_id: msg.tool_call_id
				};
				// Include reasoning_content from the dedicated field
				if (!excludeReasoningFromContext && msg.reasoning_content) {
					mapped.reasoning_content = msg.reasoning_content;
				}
				return mapped;
			}),
			stream,
			return_progress: stream ? true : undefined,
			tools: tools && tools.length > 0 ? tools : undefined
		};

		// Include model in request if provided (required in ROUTER mode)
		if (options.model) {
			requestBody.model = options.model;
		}

		requestBody.reasoning_format = disableReasoningParsing
			? ReasoningFormat.NONE
			: ReasoningFormat.AUTO;

		if (temperature !== undefined) requestBody.temperature = temperature;
		if (max_tokens !== undefined) {
			// Set max_tokens to -1 (infinite) when explicitly configured as 0 or null
			requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1;
		}

		if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range;
		if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent;
		if (top_k !== undefined) requestBody.top_k = top_k;
		if (top_p !== undefined) requestBody.top_p = top_p;
		if (min_p !== undefined) requestBody.min_p = min_p;
		if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability;
		if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold;
		if (typ_p !== undefined) requestBody.typ_p = typ_p;

		if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n;
		if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty;
		if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty;
		if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty;
		if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier;
		if (dry_base !== undefined) requestBody.dry_base = dry_base;
		if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length;
		if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n;

		if (samplers !== undefined) {
			requestBody.samplers =
				typeof samplers === 'string'
					? samplers.split(';').filter((s: string) => s.trim())
					: samplers;
		}

		if (backend_sampling !== undefined) requestBody.backend_sampling = backend_sampling;

		if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token;

		if (custom) {
			try {
				const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom;
				Object.assign(requestBody, customParams);
			} catch (error) {
				console.warn('Failed to parse custom parameters:', error);
			}
		}

		try {
			const response = await fetch(`./v1/chat/completions`, {
				method: 'POST',
				headers: getJsonHeaders(),
				body: JSON.stringify(requestBody),
				signal
			});

			if (!response.ok) {
				const error = await ChatService.parseErrorResponse(response);

				if (onError) {
					onError(error);
				}

				throw error;
			}

			if (stream) {
				await ChatService.handleStreamResponse(
					response,
					onChunk,
					onComplete,
					onError,
					onReasoningChunk,
					onToolCallChunk,
					onModel,
					onTimings,
					conversationId,
					signal
				);

				return;
			} else {
				return ChatService.handleNonStreamResponse(
					response,
					onComplete,
					onError,
					onToolCallChunk,
					onModel
				);
			}
		} catch (error) {
			if (isAbortError(error)) {
				console.log('Chat completion request was aborted');
				return;
			}

			let userFriendlyError: Error;

			if (error instanceof Error) {
				if (error.name === 'TypeError' && error.message.includes('fetch')) {
					userFriendlyError = new Error(
						'Unable to connect to server - please check if the server is running'
					);
					userFriendlyError.name = 'NetworkError';
				} else if (error.message.includes('ECONNREFUSED')) {
					userFriendlyError = new Error('Connection refused - server may be offline');
					userFriendlyError.name = 'NetworkError';
				} else if (error.message.includes('ETIMEDOUT')) {
					userFriendlyError = new Error('Request timed out - the server took too long to respond');
					userFriendlyError.name = 'TimeoutError';
				} else {
					userFriendlyError = error;
				}
			} else {
				userFriendlyError = new Error('Unknown error occurred while sending message');
			}

			console.error('Error in sendMessage:', error);

			if (onError) {
				onError(userFriendlyError);
			}

			throw userFriendlyError;
		}
	}

	/**
	 * Checks whether all server slots are currently idle (not processing any requests).
	 * Queries the /slots endpoint (requires --slots flag on the server).
	 * Returns true if all slots are idle, false if any is processing.
	 * If the endpoint is unavailable or errors out, returns true (best-effort fallback).
	 *
	 * @param signal - Optional AbortSignal to cancel the request if needed
	 * @param model - Optional model name to check slots for (required in ROUTER mode)
	 * @returns {Promise<boolean>} Promise that resolves to true if all slots are idle, false if any is processing
	 */
	static async areAllSlotsIdle(model?: string | null, signal?: AbortSignal): Promise<boolean> {
		try {
			const url = model ? `./slots?model=${encodeURIComponent(model)}` : './slots';
			const res = await fetch(url, { signal });
			if (!res.ok) return true;

			const slots: { is_processing: boolean }[] = await res.json();
			return slots.every((s) => !s.is_processing);
		} catch {
			return true;
		}
	}

	/**
	 * Sends a fire-and-forget request to pre-encode the conversation in the server's KV cache.
	 * After a response completes, this re-submits the full conversation
	 * using n_predict=0 and stream=false so the server processes the prompt without generating tokens.
	 * This warms the cache for the next turn, making it faster.
	 *
	 * When excludeReasoningFromContext is true, reasoning content is stripped from the messages
	 * to match what sendMessage would send on the next turn (avoiding cache misses).
	 * When false, reasoning_content is preserved so the cached prompt matches the next request.
	 *
	 * @param messages - The full conversation including the latest assistant response
	 * @param model - Optional model name (required in ROUTER mode)
	 * @param excludeReasoning - Whether to strip reasoning content (should match excludeReasoningFromContext setting)
	 * @param signal - Optional AbortSignal to cancel the pre-encode request
	 */
	static async preEncode(
		messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[],
		model?: string | null,
		excludeReasoning?: boolean,
		signal?: AbortSignal
	): Promise<void> {
		const normalizedMessages: ApiChatMessageData[] = messages
			.map((msg) => {
				if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
					return ChatService.convertDbMessageToApiChatMessageData(
						msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] }
					);
				}

				return msg as ApiChatMessageData;
			})
			.filter((msg) => {
				if (msg.role === MessageRole.SYSTEM) {
					const content = typeof msg.content === 'string' ? msg.content : '';

					return content.trim().length > 0;
				}

				return true;
			});

		const requestBody: Record<string, unknown> = {
			messages: normalizedMessages.map((msg: ApiChatMessageData) => {
				const mapped: Record<string, unknown> = {
					role: msg.role,
					content: excludeReasoning ? ChatService.stripReasoningContent(msg.content) : msg.content,
					tool_calls: msg.tool_calls,
					tool_call_id: msg.tool_call_id
				};

				if (!excludeReasoning && msg.reasoning_content) {
					mapped.reasoning_content = msg.reasoning_content;
				}

				return mapped;
			}),
			stream: false,
			n_predict: 0
		};

		if (model) {
			requestBody.model = model;
		}

		try {
			await fetch(`./v1/chat/completions`, {
				method: 'POST',
				headers: getJsonHeaders(),
				body: JSON.stringify(requestBody),
				signal
			});
		} catch (error) {
			if (!isAbortError(error)) {
				console.warn('[ChatService] Pre-encode request failed:', error);
			}
		}
	}

	/**
	 *
	 *
	 * Streaming
	 *
	 *
	 */

	/**
	 * Handles streaming response from the chat completion API
	 * @param response - The Response object from the fetch request
	 * @param onChunk - Optional callback invoked for each content chunk received
	 * @param onComplete - Optional callback invoked when the stream is complete with full response
	 * @param onError - Optional callback invoked if an error occurs during streaming
	 * @param onReasoningChunk - Optional callback invoked for each reasoning content chunk
	 * @param conversationId - Optional conversation ID for per-conversation state tracking
	 * @returns {Promise<void>} Promise that resolves when streaming is complete
	 * @throws {Error} if the stream cannot be read or parsed
	 */
	private static async handleStreamResponse(
		response: Response,
		onChunk?: (chunk: string) => void,
		onComplete?: (
			response: string,
			reasoningContent?: string,
			timings?: ChatMessageTimings,
			toolCalls?: string
		) => void,
		onError?: (error: Error) => void,
		onReasoningChunk?: (chunk: string) => void,
		onToolCallChunk?: (chunk: string) => void,
		onModel?: (model: string) => void,
		onTimings?: (timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void,
		conversationId?: string,
		abortSignal?: AbortSignal
	): Promise<void> {
		const reader = response.body?.getReader();

		if (!reader) {
			throw new Error('No response body');
		}

		const decoder = new TextDecoder();
		let aggregatedContent = '';
		let fullReasoningContent = '';
		let aggregatedToolCalls: ApiChatCompletionToolCall[] = [];
		let lastTimings: ChatMessageTimings | undefined;
		let streamFinished = false;
		let modelEmitted = false;
		let toolCallIndexOffset = 0;
		let hasOpenToolCallBatch = false;

		const finalizeOpenToolCallBatch = () => {
			if (!hasOpenToolCallBatch) {
				return;
			}

			toolCallIndexOffset = aggregatedToolCalls.length;
			hasOpenToolCallBatch = false;
		};

		const processToolCallDelta = (toolCalls?: ApiChatCompletionToolCallDelta[]) => {
			if (!toolCalls || toolCalls.length === 0) {
				return;
			}

			aggregatedToolCalls = ChatService.mergeToolCallDeltas(
				aggregatedToolCalls,
				toolCalls,
				toolCallIndexOffset
			);

			if (aggregatedToolCalls.length === 0) {
				return;
			}

			hasOpenToolCallBatch = true;

			const serializedToolCalls = JSON.stringify(aggregatedToolCalls);

			if (import.meta.env.DEV) {
				console.log('[ChatService] Aggregated tool calls:', serializedToolCalls);
			}

			if (!serializedToolCalls) {
				return;
			}

			if (!abortSignal?.aborted) {
				onToolCallChunk?.(serializedToolCalls);
			}
		};

		try {
			let chunk = '';
			while (true) {
				if (abortSignal?.aborted) break;

				const { done, value } = await reader.read();
				if (done) break;

				if (abortSignal?.aborted) break;

				chunk += decoder.decode(value, { stream: true });
				const lines = chunk.split('\n');
				chunk = lines.pop() || '';

				for (const line of lines) {
					if (abortSignal?.aborted) break;

					if (line.startsWith(UrlProtocol.DATA)) {
						const data = line.slice(6);
						if (data === '[DONE]') {
							streamFinished = true;

							continue;
						}

						try {
							const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
							const content = parsed.choices[0]?.delta?.content;
							const reasoningContent = parsed.choices[0]?.delta?.reasoning_content;
							const toolCalls = parsed.choices[0]?.delta?.tool_calls;
							const timings = parsed.timings;
							const promptProgress = parsed.prompt_progress;

							const chunkModel = ChatService.extractModelName(parsed);
							if (chunkModel && !modelEmitted) {
								modelEmitted = true;
								onModel?.(chunkModel);
							}

							if (promptProgress) {
								ChatService.notifyTimings(undefined, promptProgress, onTimings);
							}

							if (timings) {
								ChatService.notifyTimings(timings, promptProgress, onTimings);
								lastTimings = timings;
							}

							if (content) {
								finalizeOpenToolCallBatch();
								aggregatedContent += content;
								if (!abortSignal?.aborted) {
									onChunk?.(content);
								}
							}

							if (reasoningContent) {
								finalizeOpenToolCallBatch();
								fullReasoningContent += reasoningContent;
								if (!abortSignal?.aborted) {
									onReasoningChunk?.(reasoningContent);
								}
							}

							processToolCallDelta(toolCalls);
						} catch (e) {
							console.error('Error parsing JSON chunk:', e);
						}
					}
				}

				if (abortSignal?.aborted) break;
			}

			if (abortSignal?.aborted) return;

			if (streamFinished) {
				finalizeOpenToolCallBatch();

				const finalToolCalls =
					aggregatedToolCalls.length > 0 ? JSON.stringify(aggregatedToolCalls) : undefined;

				onComplete?.(
					aggregatedContent,
					fullReasoningContent || undefined,
					lastTimings,
					finalToolCalls
				);
			}
		} catch (error) {
			const err = error instanceof Error ? error : new Error('Stream error');

			onError?.(err);

			throw err;
		} finally {
			reader.releaseLock();
		}
	}

	/**
	 * Handles non-streaming response from the chat completion API.
	 * Parses the JSON response and extracts the generated content.
	 *
	 * @param response - The fetch Response object containing the JSON data
	 * @param onComplete - Optional callback invoked when response is successfully parsed
	 * @param onError - Optional callback invoked if an error occurs during parsing
	 * @returns {Promise<string>} Promise that resolves to the generated content string
	 * @throws {Error} if the response cannot be parsed or is malformed
	 */
	private static async handleNonStreamResponse(
		response: Response,
		onComplete?: (
			response: string,
			reasoningContent?: string,
			timings?: ChatMessageTimings,
			toolCalls?: string
		) => void,
		onError?: (error: Error) => void,
		onToolCallChunk?: (chunk: string) => void,
		onModel?: (model: string) => void
	): Promise<string> {
		try {
			const responseText = await response.text();

			if (!responseText.trim()) {
				const noResponseError = new Error('No response received from server. Please try again.');

				throw noResponseError;
			}

			const data: ApiChatCompletionResponse = JSON.parse(responseText);

			const responseModel = ChatService.extractModelName(data);
			if (responseModel) {
				onModel?.(responseModel);
			}

			const content = data.choices[0]?.message?.content || '';
			const reasoningContent = data.choices[0]?.message?.reasoning_content;
			const toolCalls = data.choices[0]?.message?.tool_calls;

			let serializedToolCalls: string | undefined;

			if (toolCalls && toolCalls.length > 0) {
				const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls);

				if (mergedToolCalls.length > 0) {
					serializedToolCalls = JSON.stringify(mergedToolCalls);
					if (serializedToolCalls) {
						onToolCallChunk?.(serializedToolCalls);
					}
				}
			}

			if (!content.trim() && !serializedToolCalls) {
				const noResponseError = new Error('No response received from server. Please try again.');

				throw noResponseError;
			}

			onComplete?.(content, reasoningContent, undefined, serializedToolCalls);

			return content;
		} catch (error) {
			const err = error instanceof Error ? error : new Error('Parse error');

			onError?.(err);

			throw err;
		}
	}

	/**
	 * Merges tool call deltas into an existing array of tool calls.
	 * Handles both existing and new tool calls, updating existing ones and adding new ones.
	 *
	 * @param existing - The existing array of tool calls to merge into
	 * @param deltas - The array of tool call deltas to merge
	 * @param indexOffset - Optional offset to apply to the index of new tool calls
	 * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls
	 */
	private static mergeToolCallDeltas(
		existing: ApiChatCompletionToolCall[],
		deltas: ApiChatCompletionToolCallDelta[],
		indexOffset = 0
	): ApiChatCompletionToolCall[] {
		const result = existing.map((call) => ({
			...call,
			function: call.function ? { ...call.function } : undefined
		}));

		for (const delta of deltas) {
			const index =
				typeof delta.index === 'number' && delta.index >= 0
					? delta.index + indexOffset
					: result.length;

			while (result.length <= index) {
				result.push({ function: undefined });
			}

			const target = result[index]!;

			if (delta.id) {
				target.id = delta.id;
			}

			if (delta.type) {
				target.type = delta.type;
			}

			if (delta.function) {
				const fn = target.function ? { ...target.function } : {};

				if (delta.function.name) {
					fn.name = delta.function.name;
				}

				if (delta.function.arguments) {
					fn.arguments = (fn.arguments ?? '') + delta.function.arguments;
				}

				target.function = fn;
			}
		}

		return result;
	}

	/**
	 *
	 *
	 * Conversion
	 *
	 *
	 */

	/**
	 * Converts a database message with attachments to API chat message format.
	 * Processes various attachment types (images, text files, PDFs) and formats them
	 * as content parts suitable for the chat completion API.
	 *
	 * @param message - Database message object with optional extra attachments
	 * @param message.content - The text content of the message
	 * @param message.role - The role of the message sender (user, assistant, system)
	 * @param message.extra - Optional array of message attachments (images, files, etc.)
	 * @returns {ApiChatMessageData} object formatted for the chat completion API
	 * @static
	 */
	static convertDbMessageToApiChatMessageData(
		message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
	): ApiChatMessageData {
		// Handle tool result messages (role: 'tool')
		if (message.role === MessageRole.TOOL && message.toolCallId) {
			return {
				role: MessageRole.TOOL,
				content: message.content,
				tool_call_id: message.toolCallId
			};
		}

		// Parse tool calls for assistant messages
		let toolCalls: ApiChatCompletionToolCall[] | undefined;
		if (message.toolCalls) {
			try {
				toolCalls = JSON.parse(message.toolCalls);
			} catch {
				// Ignore parse errors for malformed tool calls
			}
		}

		if (!message.extra || message.extra.length === 0) {
			const result: ApiChatMessageData = {
				role: message.role as MessageRole,
				content: message.content
			};

			if (message.reasoningContent) {
				result.reasoning_content = message.reasoningContent;
			}

			if (toolCalls && toolCalls.length > 0) {
				result.tool_calls = toolCalls;
			}

			return result;
		}

		const contentParts: ApiChatMessageContentPart[] = [];

		if (message.content) {
			contentParts.push({
				type: ContentPartType.TEXT,
				text: message.content
			});
		}

		// Include images from all messages
		const imageFiles = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
				extra.type === AttachmentType.IMAGE
		);

		for (const image of imageFiles) {
			contentParts.push({
				type: ContentPartType.IMAGE_URL,
				image_url: { url: image.base64Url }
			});
		}

		const textFiles = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile =>
				extra.type === AttachmentType.TEXT
		);

		for (const textFile of textFiles) {
			contentParts.push({
				type: ContentPartType.TEXT,
				text: formatAttachmentText('File', textFile.name, textFile.content)
			});
		}

		// Handle legacy 'context' type from old webui (pasted content)
		const legacyContextFiles = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext =>
				extra.type === AttachmentType.LEGACY_CONTEXT
		);

		for (const legacyContextFile of legacyContextFiles) {
			contentParts.push({
				type: ContentPartType.TEXT,
				text: formatAttachmentText('File', legacyContextFile.name, legacyContextFile.content)
			});
		}

		const audioFiles = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile =>
				extra.type === AttachmentType.AUDIO
		);

		for (const audio of audioFiles) {
			contentParts.push({
				type: ContentPartType.INPUT_AUDIO,
				input_audio: {
					data: audio.base64Data,
					format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
				}
			});
		}

		const pdfFiles = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile =>
				extra.type === AttachmentType.PDF
		);

		for (const pdfFile of pdfFiles) {
			if (pdfFile.processedAsImages && pdfFile.images) {
				for (let i = 0; i < pdfFile.images.length; i++) {
					contentParts.push({
						type: ContentPartType.IMAGE_URL,
						image_url: { url: pdfFile.images[i] }
					});
				}
			} else {
				contentParts.push({
					type: ContentPartType.TEXT,
					text: formatAttachmentText(ATTACHMENT_LABEL_PDF_FILE, pdfFile.name, pdfFile.content)
				});
			}
		}

		const mcpPrompts = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraMcpPrompt =>
				extra.type === AttachmentType.MCP_PROMPT
		);

		for (const mcpPrompt of mcpPrompts) {
			contentParts.push({
				type: ContentPartType.TEXT,
				text: formatAttachmentText(
					ATTACHMENT_LABEL_MCP_PROMPT,
					mcpPrompt.name,
					mcpPrompt.content,
					mcpPrompt.serverName
				)
			});
		}

		const mcpResources = message.extra.filter(
			(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraMcpResource =>
				extra.type === AttachmentType.MCP_RESOURCE
		);

		for (const mcpResource of mcpResources) {
			contentParts.push({
				type: ContentPartType.TEXT,
				text: formatAttachmentText(
					ATTACHMENT_LABEL_MCP_RESOURCE,
					mcpResource.name,
					mcpResource.content,
					mcpResource.serverName
				)
			});
		}

		const result: ApiChatMessageData = {
			role: message.role as MessageRole,
			content: contentParts
		};
		if (message.reasoningContent) {
			result.reasoning_content = message.reasoningContent;
		}
		if (toolCalls && toolCalls.length > 0) {
			result.tool_calls = toolCalls;
		}
		return result;
	}

	/**
	 *
	 *
	 * Utilities
	 *
	 *
	 */

	/**
	 * Strips legacy inline reasoning content tags from message content.
	 * Handles both plain string content and multipart content arrays.
	 */
	private static stripReasoningContent(
		content: string | ApiChatMessageContentPart[]
	): string | ApiChatMessageContentPart[] {
		const stripFromString = (text: string): string =>
			text.replace(LEGACY_AGENTIC_REGEX.REASONING_BLOCK, '').trim();

		if (typeof content === 'string') {
			return stripFromString(content);
		}

		return content.map((part) => {
			if (part.type === ContentPartType.TEXT && part.text) {
				return { ...part, text: stripFromString(part.text) };
			}
			return part;
		});
	}

	/**
	 * Parses error response and creates appropriate error with context information
	 * @param response - HTTP response object
	 * @returns Promise<Error> - Parsed error with context info if available
	 */
	private static async parseErrorResponse(
		response: Response
	): Promise<Error & { contextInfo?: { n_prompt_tokens: number; n_ctx: number } }> {
		try {
			const errorText = await response.text();
			const errorData: ApiErrorResponse = JSON.parse(errorText);

			const message = errorData.error?.message || 'Unknown server error';
			const error = new Error(message) as Error & {
				contextInfo?: { n_prompt_tokens: number; n_ctx: number };
			};
			error.name = response.status === 400 ? 'ServerError' : 'HttpError';

			if (errorData.error && 'n_prompt_tokens' in errorData.error && 'n_ctx' in errorData.error) {
				error.contextInfo = {
					n_prompt_tokens: errorData.error.n_prompt_tokens,
					n_ctx: errorData.error.n_ctx
				};
			}

			return error;
		} catch {
			const fallback = new Error(
				`Server error (${response.status}): ${response.statusText}`
			) as Error & {
				contextInfo?: { n_prompt_tokens: number; n_ctx: number };
			};
			fallback.name = 'HttpError';

			return fallback;
		}
	}

	/**
	 * Extracts model name from Chat Completions API response data.
	 * Handles various response formats including streaming chunks and final responses.
	 *
	 * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name
	 * in the response. We override it with the actual model name from serverStore.
	 *
	 * @param data - Raw response data from the Chat Completions API
	 * @returns Model name string if found, undefined otherwise
	 * @private
	 */
	private static extractModelName(data: unknown): string | undefined {
		const asRecord = (value: unknown): Record<string, unknown> | undefined => {
			return typeof value === 'object' && value !== null
				? (value as Record<string, unknown>)
				: undefined;
		};

		const getTrimmedString = (value: unknown): string | undefined => {
			return typeof value === 'string' && value.trim() ? value.trim() : undefined;
		};

		const root = asRecord(data);
		if (!root) return undefined;

		// 1) root (some implementations provide `model` at the top level)
		const rootModel = getTrimmedString(root.model);
		if (rootModel) {
			return rootModel;
		}

		// 2) streaming choice (delta) or final response (message)
		const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
		if (!firstChoice) {
			return undefined;
		}

		// priority: delta.model (first chunk) else message.model (final response)
		const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
		if (deltaModel) {
			return deltaModel;
		}

		const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
		if (messageModel) {
			return messageModel;
		}

		// avoid guessing from non-standard locations (metadata, etc.)
		return undefined;
	}

	/**
	 * Calls the onTimings callback with timing data from streaming response.
	 *
	 * @param timings - Timing information from the Chat Completions API response
	 * @param promptProgress - Prompt processing progress data
	 * @param onTimingsCallback - Callback function to invoke with timing data
	 * @private
	 */
	private static notifyTimings(
		timings: ChatMessageTimings | undefined,
		promptProgress: ChatMessagePromptProgress | undefined,
		onTimingsCallback:
			| ((timings?: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void)
			| undefined
	): void {
		if (!onTimingsCallback || (!timings && !promptProgress)) return;

		onTimingsCallback(timings, promptProgress);
	}
}