webui: auto-refresh /props on inference start to resync model metadata (#16784)
* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
This commit is contained in:
@@ -54,6 +54,7 @@ export class ChatService {
|
||||
onError,
|
||||
onReasoningChunk,
|
||||
onModel,
|
||||
onFirstValidChunk,
|
||||
// Generation parameters
|
||||
temperature,
|
||||
max_tokens,
|
||||
@@ -201,6 +202,7 @@ export class ChatService {
|
||||
onError,
|
||||
onReasoningChunk,
|
||||
onModel,
|
||||
onFirstValidChunk,
|
||||
conversationId,
|
||||
abortController.signal
|
||||
);
|
||||
@@ -267,6 +269,7 @@ export class ChatService {
|
||||
onError?: (error: Error) => void,
|
||||
onReasoningChunk?: (chunk: string) => void,
|
||||
onModel?: (model: string) => void,
|
||||
onFirstValidChunk?: () => void,
|
||||
conversationId?: string,
|
||||
abortSignal?: AbortSignal
|
||||
): Promise<void> {
|
||||
@@ -283,6 +286,7 @@ export class ChatService {
|
||||
let lastTimings: ChatMessageTimings | undefined;
|
||||
let streamFinished = false;
|
||||
let modelEmitted = false;
|
||||
let firstValidChunkEmitted = false;
|
||||
|
||||
try {
|
||||
let chunk = '';
|
||||
@@ -311,10 +315,12 @@ export class ChatService {
|
||||
try {
|
||||
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
|
||||
|
||||
const chunkModel = this.extractModelName(parsed);
|
||||
if (chunkModel && !modelEmitted) {
|
||||
modelEmitted = true;
|
||||
onModel?.(chunkModel);
|
||||
if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
|
||||
firstValidChunkEmitted = true;
|
||||
|
||||
if (!abortSignal?.aborted) {
|
||||
onFirstValidChunk?.();
|
||||
}
|
||||
}
|
||||
|
||||
const content = parsed.choices[0]?.delta?.content;
|
||||
@@ -322,6 +328,12 @@ export class ChatService {
|
||||
const timings = parsed.timings;
|
||||
const promptProgress = parsed.prompt_progress;
|
||||
|
||||
const chunkModel = this.extractModelName(parsed);
|
||||
if (chunkModel && !modelEmitted) {
|
||||
modelEmitted = true;
|
||||
onModel?.(chunkModel);
|
||||
}
|
||||
|
||||
if (timings || promptProgress) {
|
||||
this.updateProcessingState(timings, promptProgress, conversationId);
|
||||
if (timings) {
|
||||
|
||||
Reference in New Issue
Block a user