Pre-MCP UI and architecture cleanup (#19689)

This commit is contained in:
Aleksander Grygier
2026-02-18 12:02:02 +01:00
committed by GitHub
parent d0061be838
commit ea003229d3
52 changed files with 5553 additions and 4325 deletions
@@ -1,42 +1,52 @@
import { getJsonHeaders } from '$lib/utils';
import { AttachmentType } from '$lib/enums';
import { getJsonHeaders, formatAttachmentText, isAbortError } from '$lib/utils';
import { ATTACHMENT_LABEL_PDF_FILE } from '$lib/constants/attachment-labels';
import {
AttachmentType,
ContentPartType,
MessageRole,
ReasoningFormat,
UrlPrefix
} from '$lib/enums';
import type { ApiChatMessageContentPart, ApiChatCompletionToolCall } from '$lib/types/api';
import { modelsStore } from '$lib/stores/models.svelte';
import { AGENTIC_REGEX } from '$lib/constants/agentic';
/**
* ChatService - Low-level API communication layer for Chat Completions
*
* **Terminology - Chat vs Conversation:**
* - **Chat**: The active interaction space with the Chat Completions API. This service
* handles the real-time communication with the AI backend - sending messages, receiving
* streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused.
* - **Conversation**: The persistent database entity storing all messages and metadata.
* Managed by ConversationsService/Store, conversations persist across sessions.
*
* This service handles direct communication with the llama-server's Chat Completions API.
* It provides the network layer abstraction for AI model interactions while remaining
* stateless and focused purely on API communication.
*
* **Architecture & Relationships:**
* - **ChatService** (this class): Stateless API communication layer
* - Handles HTTP requests/responses with the llama-server
* - Manages streaming and non-streaming response parsing
* - Provides per-conversation request abortion capabilities
* - Converts database messages to API format
* - Handles error translation for server responses
*
* - **chatStore**: Uses ChatService for all AI model communication
* - **conversationsStore**: Provides message context for API requests
*
* **Key Responsibilities:**
* - Message format conversion (DatabaseMessage API format)
* - Streaming response handling with real-time callbacks
* - Reasoning content extraction and processing
* - File attachment processing (images, PDFs, audio, text)
* - Request lifecycle management (abort via AbortSignal)
*/
export class ChatService {
// ─────────────────────────────────────────────────────────────────────────────
// Messaging
// ─────────────────────────────────────────────────────────────────────────────
private static stripReasoningContent(
content: ApiChatMessageData['content'] | null | undefined
): ApiChatMessageData['content'] | null | undefined {
if (!content) {
return content;
}
if (typeof content === 'string') {
return content
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '');
}
if (!Array.isArray(content)) {
return content;
}
return content.map((part: ApiChatMessageContentPart) => {
if (part.type !== ContentPartType.TEXT || !part.text) return part;
return {
...part,
text: part.text
.replace(AGENTIC_REGEX.REASONING_BLOCK, '')
.replace(AGENTIC_REGEX.REASONING_OPEN, '')
};
});
}
/**
*
*
* Messaging
*
*
*/
/**
* Sends a chat completion request to the llama.cpp server.
@@ -63,6 +73,8 @@ export class ChatService {
onToolCallChunk,
onModel,
onTimings,
// Tools for function calling
tools,
// Generation parameters
temperature,
max_tokens,
@@ -97,6 +109,7 @@ export class ChatService {
.map((msg) => {
if ('id' in msg && 'convId' in msg && 'timestamp' in msg) {
const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] };
return ChatService.convertDbMessageToApiChatMessageData(dbMsg);
} else {
return msg as ApiChatMessageData;
@@ -104,7 +117,7 @@ export class ChatService {
})
.filter((msg) => {
// Filter out empty system messages
if (msg.role === 'system') {
if (msg.role === MessageRole.SYSTEM) {
const content = typeof msg.content === 'string' ? msg.content : '';
return content.trim().length > 0;
@@ -113,13 +126,41 @@ export class ChatService {
return true;
});
// Filter out image attachments if the model doesn't support vision
if (options.model && !modelsStore.modelSupportsVision(options.model)) {
normalizedMessages.forEach((msg) => {
if (Array.isArray(msg.content)) {
msg.content = msg.content.filter((part: ApiChatMessageContentPart) => {
if (part.type === ContentPartType.IMAGE_URL) {
console.info(
`[ChatService] Skipping image attachment in message history (model "${options.model}" does not support vision)`
);
return false;
}
return true;
});
// If only text remains and it's a single part, simplify to string
if (msg.content.length === 1 && msg.content[0].type === ContentPartType.TEXT) {
msg.content = msg.content[0].text;
}
}
});
}
const requestBody: ApiChatCompletionRequest = {
messages: normalizedMessages.map((msg: ApiChatMessageData) => ({
role: msg.role,
content: msg.content
// Strip reasoning tags/content from the prompt to avoid polluting KV cache.
// TODO: investigate backend expectations for reasoning tags and add a toggle if needed.
content: ChatService.stripReasoningContent(msg.content),
tool_calls: msg.tool_calls,
tool_call_id: msg.tool_call_id
})),
stream,
return_progress: stream ? true : undefined
return_progress: stream ? true : undefined,
tools: tools && tools.length > 0 ? tools : undefined
};
// Include model in request if provided (required in ROUTER mode)
@@ -127,7 +168,9 @@ export class ChatService {
requestBody.model = options.model;
}
requestBody.reasoning_format = disableReasoningParsing ? 'none' : 'auto';
requestBody.reasoning_format = disableReasoningParsing
? ReasoningFormat.NONE
: ReasoningFormat.AUTO;
if (temperature !== undefined) requestBody.temperature = temperature;
if (max_tokens !== undefined) {
@@ -183,9 +226,11 @@ export class ChatService {
if (!response.ok) {
const error = await ChatService.parseErrorResponse(response);
if (onError) {
onError(error);
}
throw error;
}
@@ -202,6 +247,7 @@ export class ChatService {
conversationId,
signal
);
return;
} else {
return ChatService.handleNonStreamResponse(
@@ -213,7 +259,7 @@ export class ChatService {
);
}
} catch (error) {
if (error instanceof Error && error.name === 'AbortError') {
if (isAbortError(error)) {
console.log('Chat completion request was aborted');
return;
}
@@ -240,16 +286,22 @@ export class ChatService {
}
console.error('Error in sendMessage:', error);
if (onError) {
onError(userFriendlyError);
}
throw userFriendlyError;
}
}
// ─────────────────────────────────────────────────────────────────────────────
// Streaming
// ─────────────────────────────────────────────────────────────────────────────
/**
*
*
* Streaming
*
*
*/
/**
* Handles streaming response from the chat completion API
@@ -323,6 +375,10 @@ export class ChatService {
const serializedToolCalls = JSON.stringify(aggregatedToolCalls);
if (import.meta.env.DEV) {
console.log('[ChatService] Aggregated tool calls:', serializedToolCalls);
}
if (!serializedToolCalls) {
return;
}
@@ -349,10 +405,11 @@ export class ChatService {
for (const line of lines) {
if (abortSignal?.aborted) break;
if (line.startsWith('data: ')) {
if (line.startsWith(UrlPrefix.DATA)) {
const data = line.slice(6);
if (data === '[DONE]') {
streamFinished = true;
continue;
}
@@ -458,6 +515,7 @@ export class ChatService {
if (!responseText.trim()) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
@@ -472,10 +530,6 @@ export class ChatService {
const reasoningContent = data.choices[0]?.message?.reasoning_content;
const toolCalls = data.choices[0]?.message?.tool_calls;
if (reasoningContent) {
console.log('Full reasoning content:', reasoningContent);
}
let serializedToolCalls: string | undefined;
if (toolCalls && toolCalls.length > 0) {
@@ -491,6 +545,7 @@ export class ChatService {
if (!content.trim() && !serializedToolCalls) {
const noResponseError = new Error('No response received from server. Please try again.');
throw noResponseError;
}
@@ -563,9 +618,13 @@ export class ChatService {
return result;
}
// ─────────────────────────────────────────────────────────────────────────────
// Conversion
// ─────────────────────────────────────────────────────────────────────────────
/**
*
*
* Conversion
*
*
*/
/**
* Converts a database message with attachments to API chat message format.
@@ -582,22 +641,48 @@ export class ChatService {
static convertDbMessageToApiChatMessageData(
message: DatabaseMessage & { extra?: DatabaseMessageExtra[] }
): ApiChatMessageData {
if (!message.extra || message.extra.length === 0) {
// Handle tool result messages (role: 'tool')
if (message.role === MessageRole.TOOL && message.toolCallId) {
return {
role: message.role as 'user' | 'assistant' | 'system',
role: MessageRole.TOOL,
content: message.content,
tool_call_id: message.toolCallId
};
}
// Parse tool calls for assistant messages
let toolCalls: ApiChatCompletionToolCall[] | undefined;
if (message.toolCalls) {
try {
toolCalls = JSON.parse(message.toolCalls);
} catch {
// Ignore parse errors for malformed tool calls
}
}
if (!message.extra || message.extra.length === 0) {
const result: ApiChatMessageData = {
role: message.role as MessageRole,
content: message.content
};
if (toolCalls && toolCalls.length > 0) {
result.tool_calls = toolCalls;
}
return result;
}
const contentParts: ApiChatMessageContentPart[] = [];
if (message.content) {
contentParts.push({
type: 'text',
type: ContentPartType.TEXT,
text: message.content
});
}
// Include images from all messages
const imageFiles = message.extra.filter(
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile =>
extra.type === AttachmentType.IMAGE
@@ -605,7 +690,7 @@ export class ChatService {
for (const image of imageFiles) {
contentParts.push({
type: 'image_url',
type: ContentPartType.IMAGE_URL,
image_url: { url: image.base64Url }
});
}
@@ -617,8 +702,8 @@ export class ChatService {
for (const textFile of textFiles) {
contentParts.push({
type: 'text',
text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}`
type: ContentPartType.TEXT,
text: formatAttachmentText('File', textFile.name, textFile.content)
});
}
@@ -630,8 +715,8 @@ export class ChatService {
for (const legacyContextFile of legacyContextFiles) {
contentParts.push({
type: 'text',
text: `\n\n--- File: ${legacyContextFile.name} ---\n${legacyContextFile.content}`
type: ContentPartType.TEXT,
text: formatAttachmentText('File', legacyContextFile.name, legacyContextFile.content)
});
}
@@ -642,7 +727,7 @@ export class ChatService {
for (const audio of audioFiles) {
contentParts.push({
type: 'input_audio',
type: ContentPartType.INPUT_AUDIO,
input_audio: {
data: audio.base64Data,
format: audio.mimeType.includes('wav') ? 'wav' : 'mp3'
@@ -659,27 +744,33 @@ export class ChatService {
if (pdfFile.processedAsImages && pdfFile.images) {
for (let i = 0; i < pdfFile.images.length; i++) {
contentParts.push({
type: 'image_url',
type: ContentPartType.IMAGE_URL,
image_url: { url: pdfFile.images[i] }
});
}
} else {
contentParts.push({
type: 'text',
text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}`
type: ContentPartType.TEXT,
text: formatAttachmentText(ATTACHMENT_LABEL_PDF_FILE, pdfFile.name, pdfFile.content)
});
}
}
return {
role: message.role as 'user' | 'assistant' | 'system',
const result: ApiChatMessageData = {
role: message.role as MessageRole,
content: contentParts
};
return result;
}
// ─────────────────────────────────────────────────────────────────────────────
// Utilities
// ─────────────────────────────────────────────────────────────────────────────
/**
*
*
* Utilities
*
*
*/
/**
* Parses error response and creates appropriate error with context information
@@ -714,6 +805,7 @@ export class ChatService {
contextInfo?: { n_prompt_tokens: number; n_ctx: number };
};
fallback.name = 'HttpError';
return fallback;
}
}
@@ -745,18 +837,26 @@ export class ChatService {
// 1) root (some implementations provide `model` at the top level)
const rootModel = getTrimmedString(root.model);
if (rootModel) return rootModel;
if (rootModel) {
return rootModel;
}
// 2) streaming choice (delta) or final response (message)
const firstChoice = Array.isArray(root.choices) ? asRecord(root.choices[0]) : undefined;
if (!firstChoice) return undefined;
if (!firstChoice) {
return undefined;
}
// priority: delta.model (first chunk) else message.model (final response)
const deltaModel = getTrimmedString(asRecord(firstChoice.delta)?.model);
if (deltaModel) return deltaModel;
if (deltaModel) {
return deltaModel;
}
const messageModel = getTrimmedString(asRecord(firstChoice.message)?.model);
if (messageModel) return messageModel;
if (messageModel) {
return messageModel;
}
// avoid guessing from non-standard locations (metadata, etc.)
return undefined;
+211 -2
View File
@@ -1,5 +1,214 @@
export { ChatService } from './chat';
/**
*
* SERVICES
*
* Stateless service layer for API communication and data operations.
* Services handle protocol-level concerns (HTTP, WebSocket, MCP, IndexedDB)
* without managing reactive state — that responsibility belongs to stores.
*
* **Design Principles:**
* - All methods are static — no instance state
* - Pure I/O operations (network requests, database queries)
* - No Svelte runes or reactive primitives
* - Error handling at the protocol level; business-level error handling in stores
*
* **Architecture (bottom to top):**
* - **Services** (this layer): Stateless protocol communication
* - **Stores**: Reactive state management consuming services
* - **Components**: UI consuming stores
*
*/
/**
* **ChatService** - Chat Completions API communication layer
*
* Handles direct communication with the llama-server's `/v1/chat/completions` endpoint.
* Provides streaming and non-streaming response parsing, message format conversion
* (DatabaseMessage → API format), and request lifecycle management.
*
* **Terminology - Chat vs Conversation:**
* - **Chat**: The active interaction space with the Chat Completions API. Ephemeral and
* runtime-focused — sending messages, receiving streaming responses, managing request lifecycles.
* - **Conversation**: The persistent database entity storing all messages and metadata.
* Managed by conversationsStore, conversations persist across sessions.
*
* **Architecture & Relationships:**
* - **ChatService** (this class): Stateless API communication layer
* - Handles HTTP requests/responses with the llama-server
* - Manages streaming and non-streaming response parsing
* - Converts database messages to API format (multimodal, tool calls)
* - Handles error translation with user-friendly messages
*
* - **chatStore**: Primary consumer — uses ChatService for all AI model communication
* - **agenticStore**: Uses ChatService for multi-turn agentic loop streaming
* - **conversationsStore**: Provides message context for API requests
*
* **Key Responsibilities:**
* - Streaming response handling with real-time content/reasoning/tool-call callbacks
* - Non-streaming response parsing with complete response extraction
* - Database message to API format conversion (attachments, tool calls, multimodal)
* - Tool call delta merging for incremental streaming aggregation
* - Request parameter assembly (sampling, penalties, custom params)
* - File attachment processing (images, PDFs, audio, text, MCP prompts/resources)
* - Reasoning content stripping from prompt history to avoid KV cache pollution
* - Error translation (network, timeout, server errors → user-friendly messages)
*
* @see chatStore in stores/chat.svelte.ts — primary consumer for chat state management
* @see agenticStore in stores/agentic.svelte.ts — uses ChatService for agentic loop streaming
* @see conversationsStore in stores/conversations.svelte.ts — provides message context
*/
export { ChatService } from './chat.service';
/**
* **DatabaseService** - IndexedDB persistence layer via Dexie ORM
*
* Provides stateless data access for conversations and messages using IndexedDB.
* Handles all low-level storage operations including branching tree structures,
* cascade deletions, and transaction safety for multi-table operations.
*
* **Architecture & Relationships (bottom to top):**
* - **DatabaseService** (this class): Stateless IndexedDB operations
* - Lowest layer — direct Dexie/IndexedDB communication
* - Pure CRUD operations without business logic
* - Handles branching tree structure (parent-child relationships)
* - Provides transaction safety for multi-table operations
*
* - **conversationsStore**: Reactive state management layer
* - Uses DatabaseService for all persistence operations
* - Manages conversation list, active conversation, and messages in memory
*
* - **chatStore**: Active AI interaction management
* - Uses conversationsStore for conversation context
* - Directly uses DatabaseService for message CRUD during streaming
*
* **Key Responsibilities:**
* - Conversation CRUD (create, read, update, delete)
* - Message CRUD with branching support (parent-child relationships)
* - Root message and system prompt creation
* - Cascade deletion of message branches (descendants)
* - Transaction-safe multi-table operations
* - Conversation import with duplicate detection
*
* **Database Schema:**
* - `conversations`: id, lastModified, currNode, name
* - `messages`: id, convId, type, role, timestamp, parent, children
*
* **Branching Model:**
* Messages form a tree structure where each message can have multiple children,
* enabling conversation branching and alternative response paths. The conversation's
* `currNode` tracks the currently active branch endpoint.
*
* @see conversationsStore in stores/conversations.svelte.ts — reactive layer on top of DatabaseService
* @see chatStore in stores/chat.svelte.ts — uses DatabaseService directly for message CRUD during streaming
*/
export { DatabaseService } from './database.service';
/**
* **ModelsService** - Model management API communication
*
* Handles communication with model-related endpoints for both MODEL (single model)
* and ROUTER (multi-model) server modes. Provides model listing, loading/unloading,
* and status checking without managing any model state.
*
* **Architecture & Relationships:**
* - **ModelsService** (this class): Stateless HTTP communication
* - Sends requests to model endpoints
* - Parses and returns typed API responses
* - Provides model status utility methods
*
* - **modelsStore**: Primary consumer — manages reactive model state
* - Calls ModelsService for all model API operations
* - Handles polling, caching, and state updates
*
* **Key Responsibilities:**
* - List available models via OpenAI-compatible `/v1/models` endpoint
* - Load/unload models via `/models/load` and `/models/unload` (ROUTER mode)
* - Model status queries (loaded, loading)
*
* **Server Mode Behavior:**
* - **MODEL mode**: Only `list()` is relevant — single model always loaded
* - **ROUTER mode**: Full lifecycle — `list()`, `listRouter()`, `load()`, `unload()`
*
* **Endpoints:**
* - `GET /v1/models` — OpenAI-compatible model list (both modes)
* - `POST /models/load` — Load a model (ROUTER mode only)
* - `POST /models/unload` — Unload a model (ROUTER mode only)
*
* @see modelsStore in stores/models.svelte.ts — primary consumer for reactive model state
*/
export { ModelsService } from './models.service';
/**
* **PropsService** - Server properties and capabilities retrieval
*
* Fetches server configuration, model information, and capabilities from the `/props`
* endpoint. Supports both global server props and per-model props (ROUTER mode).
*
* **Architecture & Relationships:**
* - **PropsService** (this class): Stateless HTTP communication
* - Fetches server properties from `/props` endpoint
* - Handles authentication and request parameters
* - Returns typed `ApiLlamaCppServerProps` responses
*
* - **serverStore**: Consumes global server properties (role detection, connection state)
* - **modelsStore**: Consumes per-model properties (modalities, context size)
* - **settingsStore**: Syncs default generation parameters from props response
*
* **Key Responsibilities:**
* - Fetch global server properties (default generation settings, modalities)
* - Fetch per-model properties in ROUTER mode via `?model=<id>` parameter
* - Handle autoload control to prevent unintended model loading
*
* **API Behavior:**
* - `GET /props` → Global server props (MODEL mode: includes modalities)
* - `GET /props?model=<id>` → Per-model props (ROUTER mode: model-specific modalities)
* - `&autoload=false` → Prevents model auto-loading when querying props
*
* @see serverStore in stores/server.svelte.ts — consumes global server props
* @see modelsStore in stores/models.svelte.ts — consumes per-model props for modalities
* @see settingsStore in stores/settings.svelte.ts — syncs default generation params from props
*/
export { PropsService } from './props.service';
export { ParameterSyncService, SYNCABLE_PARAMETERS } from './parameter-sync.service';
/**
* **ParameterSyncService** - Server defaults and user settings synchronization
*
* Manages the complex logic of merging server-provided default parameters with
* user-configured overrides. Ensures the UI reflects the actual server state
* while preserving user customizations. Tracks parameter sources (server default
* vs user override) for display in the settings UI.
*
* **Architecture & Relationships:**
* - **ParameterSyncService** (this class): Stateless sync logic
* - Pure functions for parameter extraction, merging, and diffing
* - No side effects — receives data in, returns data out
* - Handles floating-point precision normalization
*
* - **settingsStore**: Primary consumer — calls sync methods during:
* - Initial load (`syncWithServerDefaults`)
* - Settings reset (`forceSyncWithServerDefaults`)
* - Parameter info queries (`getParameterInfo`)
*
* - **PropsService**: Provides raw server props that feed into extraction
*
* **Key Responsibilities:**
* - Extract syncable parameters from server `/props` response
* - Merge server defaults with user overrides (user wins)
* - Track parameter source (Custom vs Default) for UI badges
* - Validate server parameter values by type (number, string, boolean)
* - Create diffs between current settings and server defaults
* - Floating-point precision normalization for consistent comparisons
*
* **Parameter Source Priority:**
* 1. **User Override** (Custom badge) — explicitly set by user in settings
* 2. **Server Default** (Default badge) — from `/props` endpoint
* 3. **App Default** — hardcoded fallback when server props unavailable
*
* **Exports:**
* - `ParameterSyncService` class — static methods for sync logic
* - `SYNCABLE_PARAMETERS` — mapping of webui setting keys to server parameter keys
*
* @see settingsStore in stores/settings.svelte.ts — primary consumer for settings sync
* @see ChatSettingsParameterSourceIndicator — displays parameter source badges in UI
*/
export { ParameterSyncService } from './parameter-sync.service';