webui: auto-refresh /props on inference start to resync model metadata (#16784)

* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
2025-11-01 19:49:51 +01:00
parent e4a71599e5
commit 2f68ce7cfd
7 changed files with 180 additions and 70 deletions
@@ -54,6 +54,7 @@ export class ChatService {
 			onError,
 			onReasoningChunk,
 			onModel,
+			onFirstValidChunk,
 			// Generation parameters
 			temperature,
 			max_tokens,
@@ -201,6 +202,7 @@ export class ChatService {
 					onError,
 					onReasoningChunk,
 					onModel,
+					onFirstValidChunk,
 					conversationId,
 					abortController.signal
 				);
@@ -267,6 +269,7 @@ export class ChatService {
 		onError?: (error: Error) => void,
 		onReasoningChunk?: (chunk: string) => void,
 		onModel?: (model: string) => void,
+		onFirstValidChunk?: () => void,
 		conversationId?: string,
 		abortSignal?: AbortSignal
 	): Promise<void> {
@@ -283,6 +286,7 @@ export class ChatService {
 		let lastTimings: ChatMessageTimings | undefined;
 		let streamFinished = false;
 		let modelEmitted = false;
+		let firstValidChunkEmitted = false;

 		try {
 			let chunk = '';
@@ -311,10 +315,12 @@ export class ChatService {
 						try {
 							const parsed: ApiChatCompletionStreamChunk = JSON.parse(data);

-							const chunkModel = this.extractModelName(parsed);
-							if (chunkModel && !modelEmitted) {
-								modelEmitted = true;
-								onModel?.(chunkModel);
+							if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') {
+								firstValidChunkEmitted = true;
+
+								if (!abortSignal?.aborted) {
+									onFirstValidChunk?.();
+								}
 							}

 							const content = parsed.choices[0]?.delta?.content;
@@ -322,6 +328,12 @@ export class ChatService {
 							const timings = parsed.timings;
 							const promptProgress = parsed.prompt_progress;

+							const chunkModel = this.extractModelName(parsed);
+							if (chunkModel && !modelEmitted) {
+								modelEmitted = true;
+								onModel?.(chunkModel);
+							}
+
 							if (timings || promptProgress) {
 								this.updateProcessingState(timings, promptProgress, conversationId);
 								if (timings) {