webui: remove client-side context pre-check and rely on backend for limits (#16506)
* fix: make SSE client robust to premature [DONE] in agentic proxy chains * webui: remove client-side context pre-check and rely on backend for limits Removed the client-side context window pre-check and now simply sends messages while keeping the dialog imports limited to core components, eliminating the maximum context alert path Simplified streaming and non-streaming chat error handling to surface a generic 'No response received from server' error whenever the backend returns no content Removed the obsolete maxContextError plumbing from the chat store so state management now focuses on the core message flow without special context-limit cases * webui: cosmetic rename of error messages * Update tools/server/webui/src/lib/stores/chat.svelte.ts Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/stores/chat.svelte.ts Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreen.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
@@ -13,7 +13,7 @@ import { slotsService } from './slots';
|
||||
* - Manages streaming and non-streaming response parsing
|
||||
* - Provides request abortion capabilities
|
||||
* - Converts database messages to API format
|
||||
* - Handles error translation and context detection
|
||||
* - Handles error translation for server responses
|
||||
*
|
||||
* - **ChatStore**: Stateful orchestration and UI state management
|
||||
* - Uses ChatService for all AI model communication
|
||||
@@ -26,7 +26,6 @@ import { slotsService } from './slots';
|
||||
* - Streaming response handling with real-time callbacks
|
||||
* - Reasoning content extraction and processing
|
||||
* - File attachment processing (images, PDFs, audio, text)
|
||||
* - Context error detection and reporting
|
||||
* - Request lifecycle management (abort, cleanup)
|
||||
*/
|
||||
export class ChatService {
|
||||
@@ -209,10 +208,13 @@ export class ChatService {
|
||||
userFriendlyError = new Error(
|
||||
'Unable to connect to server - please check if the server is running'
|
||||
);
|
||||
userFriendlyError.name = 'NetworkError';
|
||||
} else if (error.message.includes('ECONNREFUSED')) {
|
||||
userFriendlyError = new Error('Connection refused - server may be offline');
|
||||
userFriendlyError.name = 'NetworkError';
|
||||
} else if (error.message.includes('ETIMEDOUT')) {
|
||||
userFriendlyError = new Error('Request timeout - server may be overloaded');
|
||||
userFriendlyError = new Error('Request timed out - the server took too long to respond');
|
||||
userFriendlyError.name = 'TimeoutError';
|
||||
} else {
|
||||
userFriendlyError = error;
|
||||
}
|
||||
@@ -262,6 +264,7 @@ export class ChatService {
|
||||
let fullReasoningContent = '';
|
||||
let hasReceivedData = false;
|
||||
let lastTimings: ChatMessageTimings | undefined;
|
||||
let streamFinished = false;
|
||||
|
||||
try {
|
||||
let chunk = '';
|
||||
@@ -277,18 +280,8 @@ export class ChatService {
|
||||
if (line.startsWith('data: ')) {
|
||||
const data = line.slice(6);
|
||||
if (data === '[DONE]') {
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
return;
|
||||
}
|
||||
|
||||
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
||||
|
||||
return;
|
||||
streamFinished = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -326,13 +319,13 @@ export class ChatService {
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
return;
|
||||
if (streamFinished) {
|
||||
if (!hasReceivedData && aggregatedContent.length === 0) {
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings);
|
||||
}
|
||||
} catch (error) {
|
||||
const err = error instanceof Error ? error : new Error('Stream error');
|
||||
@@ -368,12 +361,8 @@ export class ChatService {
|
||||
const responseText = await response.text();
|
||||
|
||||
if (!responseText.trim()) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
throw contextError;
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
const data: ApiChatCompletionResponse = JSON.parse(responseText);
|
||||
@@ -385,22 +374,14 @@ export class ChatService {
|
||||
}
|
||||
|
||||
if (!content.trim()) {
|
||||
const contextError = new Error(
|
||||
'The request exceeds the available context size. Try increasing the context size or enable context shift.'
|
||||
);
|
||||
contextError.name = 'ContextError';
|
||||
onError?.(contextError);
|
||||
throw contextError;
|
||||
const noResponseError = new Error('No response received from server. Please try again.');
|
||||
throw noResponseError;
|
||||
}
|
||||
|
||||
onComplete?.(content, reasoningContent);
|
||||
|
||||
return content;
|
||||
} catch (error) {
|
||||
if (error instanceof Error && error.name === 'ContextError') {
|
||||
throw error;
|
||||
}
|
||||
|
||||
const err = error instanceof Error ? error : new Error('Parse error');
|
||||
|
||||
onError?.(err);
|
||||
@@ -594,37 +575,19 @@ export class ChatService {
|
||||
const errorText = await response.text();
|
||||
const errorData: ApiErrorResponse = JSON.parse(errorText);
|
||||
|
||||
if (errorData.error?.type === 'exceed_context_size_error') {
|
||||
const contextError = errorData.error as ApiContextSizeError;
|
||||
const error = new Error(contextError.message);
|
||||
error.name = 'ContextError';
|
||||
// Attach structured context information
|
||||
(
|
||||
error as Error & {
|
||||
contextInfo?: { promptTokens: number; maxContext: number; estimatedTokens: number };
|
||||
}
|
||||
).contextInfo = {
|
||||
promptTokens: contextError.n_prompt_tokens,
|
||||
maxContext: contextError.n_ctx,
|
||||
estimatedTokens: contextError.n_prompt_tokens
|
||||
};
|
||||
return error;
|
||||
}
|
||||
|
||||
// Fallback for other error types
|
||||
const message = errorData.error?.message || 'Unknown server error';
|
||||
return new Error(message);
|
||||
const error = new Error(message);
|
||||
error.name = response.status === 400 ? 'ServerError' : 'HttpError';
|
||||
|
||||
return error;
|
||||
} catch {
|
||||
// If we can't parse the error response, return a generic error
|
||||
return new Error(`Server error (${response.status}): ${response.statusText}`);
|
||||
const fallback = new Error(`Server error (${response.status}): ${response.statusText}`);
|
||||
fallback.name = 'HttpError';
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the processing state with timing information from the server response
|
||||
* @param timings - Timing data from the API response
|
||||
* @param promptProgress - Progress data from the API response
|
||||
*/
|
||||
private updateProcessingState(
|
||||
timings?: ChatMessageTimings,
|
||||
promptProgress?: ChatMessagePromptProgress
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
import { slotsService } from './slots';
|
||||
|
||||
export interface ContextCheckResult {
|
||||
wouldExceed: boolean;
|
||||
currentUsage: number;
|
||||
maxContext: number;
|
||||
availableTokens: number;
|
||||
reservedTokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* ContextService - Context window management and limit checking
|
||||
*
|
||||
* This service provides context window monitoring and limit checking using real-time
|
||||
* server data from the slots service. It helps prevent context overflow by tracking
|
||||
* current usage and calculating available space for new content.
|
||||
*
|
||||
* **Architecture & Relationships:**
|
||||
* - **ContextService** (this class): Context limit monitoring
|
||||
* - Uses SlotsService for real-time context usage data
|
||||
* - Calculates available tokens with configurable reserves
|
||||
* - Provides context limit checking and error messaging
|
||||
* - Helps prevent context window overflow
|
||||
*
|
||||
* - **SlotsService**: Provides current context usage from server slots
|
||||
* - **ChatStore**: Uses context checking before sending messages
|
||||
* - **UI Components**: Display context usage warnings and limits
|
||||
*
|
||||
* **Key Features:**
|
||||
* - **Real-time Context Checking**: Uses live server data for accuracy
|
||||
* - **Token Reservation**: Reserves tokens for response generation
|
||||
* - **Limit Detection**: Prevents context window overflow
|
||||
* - **Usage Reporting**: Detailed context usage statistics
|
||||
* - **Error Messaging**: User-friendly context limit messages
|
||||
* - **Configurable Reserves**: Adjustable token reservation for responses
|
||||
*
|
||||
* **Context Management:**
|
||||
* - Monitors current context usage from active slots
|
||||
* - Calculates available space considering reserved tokens
|
||||
* - Provides early warning before context limits are reached
|
||||
* - Helps optimize conversation length and content
|
||||
*/
|
||||
export class ContextService {
|
||||
private reserveTokens: number;
|
||||
|
||||
constructor(reserveTokens = 512) {
|
||||
this.reserveTokens = reserveTokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the context limit would be exceeded
|
||||
*
|
||||
* @returns {Promise<ContextCheckResult | null>} Promise that resolves to the context check result or null if an error occurs
|
||||
*/
|
||||
async checkContextLimit(): Promise<ContextCheckResult | null> {
|
||||
try {
|
||||
const currentState = await slotsService.getCurrentState();
|
||||
|
||||
if (!currentState) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const maxContext = currentState.contextTotal;
|
||||
const currentUsage = currentState.contextUsed;
|
||||
const availableTokens = maxContext - currentUsage - this.reserveTokens;
|
||||
const wouldExceed = availableTokens <= 0;
|
||||
|
||||
return {
|
||||
wouldExceed,
|
||||
currentUsage,
|
||||
maxContext,
|
||||
availableTokens: Math.max(0, availableTokens),
|
||||
reservedTokens: this.reserveTokens
|
||||
};
|
||||
} catch (error) {
|
||||
console.warn('Error checking context limit:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a formatted error message for context limit exceeded
|
||||
*
|
||||
* @param {ContextCheckResult} result - Context check result
|
||||
* @returns {string} Formatted error message
|
||||
*/
|
||||
getContextErrorMessage(result: ContextCheckResult): string {
|
||||
const usagePercent = Math.round((result.currentUsage / result.maxContext) * 100);
|
||||
return `Context window is nearly full. Current usage: ${result.currentUsage.toLocaleString()}/${result.maxContext.toLocaleString()} tokens (${usagePercent}%). Available space: ${result.availableTokens.toLocaleString()} tokens (${result.reservedTokens} reserved for response).`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of tokens to reserve for response generation
|
||||
*
|
||||
* @param {number} tokens - Number of tokens to reserve
|
||||
*/
|
||||
setReserveTokens(tokens: number): void {
|
||||
this.reserveTokens = tokens;
|
||||
}
|
||||
}
|
||||
|
||||
export const contextService = new ContextService();
|
||||
@@ -1,3 +1,2 @@
|
||||
export { chatService } from './chat';
|
||||
export { contextService } from './context';
|
||||
export { slotsService } from './slots';
|
||||
|
||||
Reference in New Issue
Block a user