webui: auto-refresh /props on inference start to resync model metadata (#16784)
* webui: auto-refresh /props on inference start to resync model metadata - Add no-cache headers to /props and /slots - Throttle slot checks to 30s - Prevent concurrent fetches with promise guard - Trigger refresh from chat streaming for legacy and ModelSelector - Show dynamic serverWarning when using cached data * fix: restore proper legacy behavior in webui by using unified /props refresh Updated assistant message bubbles to show each message's stored model when available, falling back to the current server model only when the per-message value is missing When the model selector is disabled, now fetches /props and prioritizes that model name over chunk metadata, then persists it with the streamed message so legacy mode properly reflects the backend configuration * fix: detect first valid SSE chunk and refresh server props once * fix: removed the slots availability throttle constant and state * webui: purge ai-generated cruft * chore: update webui static build
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import { DatabaseStore } from '$lib/stores/database';
|
||||
import { chatService, slotsService } from '$lib/services';
|
||||
import { config } from '$lib/stores/settings.svelte';
|
||||
import { serverStore } from '$lib/stores/server.svelte';
|
||||
import { normalizeModelName } from '$lib/utils/model-names';
|
||||
import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching';
|
||||
import { browser } from '$app/environment';
|
||||
@@ -362,9 +363,41 @@ class ChatStore {
|
||||
|
||||
let resolvedModel: string | null = null;
|
||||
let modelPersisted = false;
|
||||
const currentConfig = config();
|
||||
const preferServerPropsModel = !currentConfig.modelSelectorEnabled;
|
||||
let serverPropsRefreshed = false;
|
||||
let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null;
|
||||
|
||||
const recordModel = (modelName: string, persistImmediately = true): void => {
|
||||
const normalizedModel = normalizeModelName(modelName);
|
||||
const refreshServerPropsOnce = () => {
|
||||
if (serverPropsRefreshed) {
|
||||
return;
|
||||
}
|
||||
|
||||
serverPropsRefreshed = true;
|
||||
|
||||
const hasExistingProps = serverStore.serverProps !== null;
|
||||
|
||||
serverStore
|
||||
.fetchServerProps({ silent: hasExistingProps })
|
||||
.then(() => {
|
||||
updateModelFromServerProps?.(true);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.warn('Failed to refresh server props after streaming started:', error);
|
||||
});
|
||||
};
|
||||
|
||||
const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => {
|
||||
const serverModelName = serverStore.modelName;
|
||||
const preferredModelSource = preferServerPropsModel
|
||||
? (serverModelName ?? modelName ?? null)
|
||||
: (modelName ?? serverModelName ?? null);
|
||||
|
||||
if (!preferredModelSource) {
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedModel = normalizeModelName(preferredModelSource);
|
||||
|
||||
if (!normalizedModel || normalizedModel === resolvedModel) {
|
||||
return;
|
||||
@@ -388,6 +421,20 @@ class ChatStore {
|
||||
}
|
||||
};
|
||||
|
||||
if (preferServerPropsModel) {
|
||||
updateModelFromServerProps = (persistImmediately = true) => {
|
||||
const currentServerModel = serverStore.modelName;
|
||||
|
||||
if (!currentServerModel) {
|
||||
return;
|
||||
}
|
||||
|
||||
recordModel(currentServerModel, persistImmediately);
|
||||
};
|
||||
|
||||
updateModelFromServerProps(false);
|
||||
}
|
||||
|
||||
slotsService.startStreaming();
|
||||
slotsService.setActiveConversation(assistantMessage.convId);
|
||||
|
||||
@@ -396,6 +443,9 @@ class ChatStore {
|
||||
{
|
||||
...this.getApiOptions(),
|
||||
|
||||
onFirstValidChunk: () => {
|
||||
refreshServerPropsOnce();
|
||||
},
|
||||
onChunk: (chunk: string) => {
|
||||
streamedContent += chunk;
|
||||
this.setConversationStreaming(
|
||||
|
||||
Reference in New Issue
Block a user