webui: auto-refresh /props on inference start to resync model metadata (#16784)

* webui: auto-refresh /props on inference start to resync model metadata

- Add no-cache headers to /props and /slots
- Throttle slot checks to 30s
- Prevent concurrent fetches with promise guard
- Trigger refresh from chat streaming for legacy and ModelSelector
- Show dynamic serverWarning when using cached data

* fix: restore proper legacy behavior in webui by using unified /props refresh

Updated assistant message bubbles to show each message's stored model when available,
falling back to the current server model only when the per-message value is missing

When the model selector is disabled, now fetches /props and prioritizes that model name
over chunk metadata, then persists it with the streamed message so legacy mode properly
reflects the backend configuration

* fix: detect first valid SSE chunk and refresh server props once

* fix: removed the slots availability throttle constant and state

* webui: purge ai-generated cruft

* chore: update webui static build
This commit is contained in:
Pascal
2025-11-01 19:49:51 +01:00
committed by GitHub
parent e4a71599e5
commit 2f68ce7cfd
7 changed files with 180 additions and 70 deletions
@@ -1,6 +1,7 @@
import { DatabaseStore } from '$lib/stores/database';
import { chatService, slotsService } from '$lib/services';
import { config } from '$lib/stores/settings.svelte';
import { serverStore } from '$lib/stores/server.svelte';
import { normalizeModelName } from '$lib/utils/model-names';
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
import { browser } from '$app/environment';
@@ -362,9 +363,41 @@ class ChatStore {
let resolvedModel: string | null = null;
let modelPersisted = false;
const currentConfig = config();
const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
let serverPropsRefreshed = false;
let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
const recordModel = (modelName: string, persistImmediately = true): void => {
const normalizedModel = normalizeModelName(modelName);
const refreshServerPropsOnce = () => {
if (serverPropsRefreshed) {
return;
}
serverPropsRefreshed = true;
const hasExistingProps = serverStore.serverProps !== null;
serverStore
.fetchServerProps({ silent: hasExistingProps })
.then(() => {
updateModelFromServerProps?.(true);
})
.catch((error) => {
console.warn('Failed to refresh server props after streaming started:', error);
});
};
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
const serverModelName = serverStore.modelName;
const preferredModelSource = preferServerPropsModel
? (serverModelName ?? modelName ?? null)
: (modelName ?? serverModelName ?? null);
if (!preferredModelSource) {
return;
}
const normalizedModel = normalizeModelName(preferredModelSource);
if (!normalizedModel || normalizedModel === resolvedModel) {
return;
@@ -388,6 +421,20 @@ class ChatStore {
}
};
if (preferServerPropsModel) {
updateModelFromServerProps = (persistImmediately = true) => {
const currentServerModel = serverStore.modelName;
if (!currentServerModel) {
return;
}
recordModel(currentServerModel, persistImmediately);
};
updateModelFromServerProps(false);
}
slotsService.startStreaming();
slotsService.setActiveConversation(assistantMessage.convId);
@@ -396,6 +443,9 @@ class ChatStore {
{
...this.getApiOptions(),
onFirstValidChunk: () => {
refreshServerPropsOnce();
},
onChunk: (chunk: string) => {
streamedContent += chunk;
this.setConversationStreaming(