Webui/prompt processing progress (#18300)

* webui: display prompt preprocessing progress

* webui: add percentage/ETA and exclude cached tokens from progress

Address review feedback from ngxson

* webui: add minutes and first chunk (0%) case

* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>

* Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte

Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>

* webui: address review feedback from allozaur

* chore: update webui build output

* webui: address review feedback from allozaur

* nit

* chore: update webui build output

* feat: Enhance chat processing state

* feat: Improve chat processing statistics UI

* chore: update webui build output

* feat: Add live generation statistics to processing state hook

* feat: Persist prompt processing stats in hook for better UX

* refactor: Enhance ChatMessageStatistics for live stream display

* feat: Implement enhanced live chat statistics into assistant message

* chore: update webui build output

* fix: Proper tab for each stage of prompt processing/generation

* chore: update webui build output

* fix: Improved ETA calculation & display logic

* chore: update webui build output

* feat: Simplify logic & remove ETA from prompt progress

* chore: update webui build output

---------

Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
Pascal
2025-12-29 19:32:21 +01:00
committed by GitHub
parent 0bd1212a43
commit c9a3b40d65
8 changed files with 218 additions and 36 deletions
@@ -89,6 +89,7 @@
const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
const processingState = useProcessingState();
let currentConfig = $derived(config());
let isRouter = $derived(isRouterMode());
let displayedModel = $derived((): string | null => {
@@ -116,6 +117,12 @@
}
});
$effect(() => {
if (isLoading() && !message?.content?.trim()) {
processingState.startMonitoring();
}
});
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
const callNumber = index + 1;
const functionName = toolCall.function?.name?.trim();
@@ -186,7 +193,7 @@
<div class="mt-6 w-full max-w-[48rem]" in:fade>
<div class="processing-container">
<span class="processing-text">
{processingState.getProcessingMessage()}
{processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
</span>
</div>
</div>
@@ -263,6 +270,23 @@
predictedTokens={message.timings.predicted_n}
predictedMs={message.timings.predicted_ms}
/>
{:else if isLoading() && currentConfig.showMessageStats}
{@const liveStats = processingState.getLiveProcessingStats()}
{@const genStats = processingState.getLiveGenerationStats()}
{@const promptProgress = processingState.processingState?.promptProgress}
{@const isStillProcessingPrompt =
promptProgress && promptProgress.processed < promptProgress.total}
{#if liveStats || genStats}
<ChatMessageStatistics
isLive={true}
isProcessingPrompt={!!isStillProcessingPrompt}
promptTokens={liveStats?.tokensProcessed}
promptMs={liveStats?.timeMs}
predictedTokens={genStats?.tokensGenerated}
predictedMs={genStats?.timeMs}
/>
{/if}
{/if}
</div>
{/if}
@@ -5,21 +5,64 @@
import { ChatMessageStatsView } from '$lib/enums';
interface Props {
predictedTokens: number;
predictedMs: number;
predictedTokens?: number;
predictedMs?: number;
promptTokens?: number;
promptMs?: number;
// Live mode: when true, shows stats during streaming
isLive?: boolean;
// Whether prompt processing is still in progress
isProcessingPrompt?: boolean;
// Initial view to show (defaults to READING in live mode)
initialView?: ChatMessageStatsView;
}
let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
let {
predictedTokens,
predictedMs,
promptTokens,
promptMs,
isLive = false,
isProcessingPrompt = false,
initialView = ChatMessageStatsView.GENERATION
}: Props = $props();
let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
let activeView: ChatMessageStatsView = $state(initialView);
let hasAutoSwitchedToGeneration = $state(false);
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
// In live mode: auto-switch to GENERATION tab when prompt processing completes
$effect(() => {
if (isLive) {
// Auto-switch to generation tab only when prompt processing is done (once)
if (
!hasAutoSwitchedToGeneration &&
!isProcessingPrompt &&
predictedTokens &&
predictedTokens > 0
) {
activeView = ChatMessageStatsView.GENERATION;
hasAutoSwitchedToGeneration = true;
} else if (!hasAutoSwitchedToGeneration) {
// Stay on READING while prompt is still being processed
activeView = ChatMessageStatsView.READING;
}
}
});
let hasGenerationStats = $derived(
predictedTokens !== undefined &&
predictedTokens > 0 &&
predictedMs !== undefined &&
predictedMs > 0
);
let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
let timeInSeconds = $derived(
predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
);
let promptTokensPerSecond = $derived(
promptTokens !== undefined && promptMs !== undefined
promptTokens !== undefined && promptMs !== undefined && promptMs > 0
? (promptTokens / promptMs) * 1000
: undefined
);
@@ -34,11 +77,14 @@
promptTokensPerSecond !== undefined &&
promptTimeInSeconds !== undefined
);
// In live mode, generation tab is disabled until we have generation stats
let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
</script>
<div class="inline-flex items-center text-xs text-muted-foreground">
<div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
{#if hasPromptStats}
{#if hasPromptStats || isLive}
<Tooltip.Root>
<Tooltip.Trigger>
<button
@@ -65,25 +111,32 @@
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
ChatMessageStatsView.GENERATION
? 'bg-background text-foreground shadow-sm'
: 'hover:text-foreground'}"
onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
: isGenerationDisabled
? 'cursor-not-allowed opacity-40'
: 'hover:text-foreground'}"
onclick={() => !isGenerationDisabled && (activeView = ChatMessageStatsView.GENERATION)}
disabled={isGenerationDisabled}
>
<Sparkles class="h-3 w-3" />
<span class="sr-only">Generation</span>
</button>
</Tooltip.Trigger>
<Tooltip.Content>
<p>Generation (token output)</p>
<p>
{isGenerationDisabled
? 'Generation (waiting for tokens...)'
: 'Generation (token output)'}
</p>
</Tooltip.Content>
</Tooltip.Root>
</div>
<div class="flex items-center gap-1 px-2">
{#if activeView === ChatMessageStatsView.GENERATION}
{#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
<BadgeChatStatistic
class="bg-transparent"
icon={WholeWord}
value="{predictedTokens} tokens"
value="{predictedTokens?.toLocaleString()} tokens"
tooltipLabel="Generated tokens"
/>
<BadgeChatStatistic