webui: auto-refresh /props on inference start to resync model metadata (#16784)

* webui: auto-refresh /props on inference start to resync model metadata

- Add no-cache headers to /props and /slots
- Throttle slot checks to 30s
- Prevent concurrent fetches with promise guard
- Trigger refresh from chat streaming for legacy and ModelSelector
- Show dynamic serverWarning when using cached data

* fix: restore proper legacy behavior in webui by using unified /props refresh

Updated assistant message bubbles to show each message's stored model when available,
falling back to the current server model only when the per-message value is missing

When the model selector is disabled, now fetches /props and prioritizes that model name
over chunk metadata, then persists it with the streamed message so legacy mode properly
reflects the backend configuration

* fix: detect first valid SSE chunk and refresh server props once

* fix: removed the slots availability throttle constant and state

* webui: purge ai-generated cruft

* chore: update webui static build
This commit is contained in:
Pascal
2025-11-01 19:49:51 +01:00
committed by GitHub
parent e4a71599e5
commit 2f68ce7cfd
7 changed files with 180 additions and 70 deletions
+16 -4
View File
@@ -54,6 +54,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
onFirstValidChunk,
// Generation parameters
temperature,
max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
onError,
onReasoningChunk,
onModel,
onFirstValidChunk,
conversationId,
abortController.signal
);
@@ -267,6 +269,7 @@ export class ChatService {
onError?: (error: Error) => void,
onReasoningChunk?: (chunk: string) => void,
onModel?: (model: string) => void,
onFirstValidChunk?: () => void,
conversationId?: string,
abortSignal?: AbortSignal
): Promise<void> {
@@ -283,6 +286,7 @@ export class ChatService {
let lastTimings: ChatMessageTimings | undefined;
let streamFinished = false;
let modelEmitted = false;
let firstValidChunkEmitted = false;
try {
let chunk = '';
@@ -311,10 +315,12 @@ export class ChatService {
try {
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);
const chunkModel = this.extractModelName(parsed);
if (chunkModel && !modelEmitted) {
modelEmitted = true;
onModel?.(chunkModel);
if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
firstValidChunkEmitted = true;
if (!abortSignal?.aborted) {
onFirstValidChunk?.();
}
}
const content = parsed.choices[0]?.delta?.content;
@@ -322,6 +328,12 @@ export class ChatService {
const timings = parsed.timings;
const promptProgress = parsed.prompt_progress;
const chunkModel = this.extractModelName(parsed);
if (chunkModel && !modelEmitted) {
modelEmitted = true;
onModel?.(chunkModel);
}
if (timings || promptProgress) {
this.updateProcessingState(timings, promptProgress, conversationId);
if (timings) {