Webui/prompt processing progress (#18300)
* webui: display prompt preprocessing progress * webui: add percentage/ETA and exclude cached tokens from progress Address review feedback from ngxson * webui: add minutes and first chunk (0%) case * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * Update tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageAssistant.svelte Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com> * webui: address review feedback from allozaur * chore: update webui build output * webui: address review feedback from allozaur * nit * chore: update webui build output * feat: Enhance chat processing state * feat: Improve chat processing statistics UI * chore: update webui build output * feat: Add live generation statistics to processing state hook * feat: Persist prompt processing stats in hook for better UX * refactor: Enhance ChatMessageStatistics for live stream display * feat: Implement enhanced live chat statistics into assistant message * chore: update webui build output * fix: Proper tab for each stage of prompt processing/generation * chore: update webui build output * fix: Improved ETA calculation & display logic * chore: update webui build output * feat: Simplify logic & remove ETA from prompt progress * chore: update webui build output --------- Co-authored-by: Aleksander Grygier <aleksander.grygier@gmail.com>
This commit is contained in:
+25
-1
@@ -89,6 +89,7 @@
|
||||
const fallbackToolCalls = $derived(typeof toolCallContent === 'string' ? toolCallContent : null);
|
||||
|
||||
const processingState = useProcessingState();
|
||||
|
||||
let currentConfig = $derived(config());
|
||||
let isRouter = $derived(isRouterMode());
|
||||
let displayedModel = $derived((): string | null => {
|
||||
@@ -116,6 +117,12 @@
|
||||
}
|
||||
});
|
||||
|
||||
$effect(() => {
|
||||
if (isLoading() && !message?.content?.trim()) {
|
||||
processingState.startMonitoring();
|
||||
}
|
||||
});
|
||||
|
||||
function formatToolCallBadge(toolCall: ApiChatCompletionToolCall, index: number) {
|
||||
const callNumber = index + 1;
|
||||
const functionName = toolCall.function?.name?.trim();
|
||||
@@ -186,7 +193,7 @@
|
||||
<div class="mt-6 w-full max-w-[48rem]" in:fade>
|
||||
<div class="processing-container">
|
||||
<span class="processing-text">
|
||||
{processingState.getProcessingMessage()}
|
||||
{processingState.getPromptProgressText() ?? processingState.getProcessingMessage()}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -263,6 +270,23 @@
|
||||
predictedTokens={message.timings.predicted_n}
|
||||
predictedMs={message.timings.predicted_ms}
|
||||
/>
|
||||
{:else if isLoading() && currentConfig.showMessageStats}
|
||||
{@const liveStats = processingState.getLiveProcessingStats()}
|
||||
{@const genStats = processingState.getLiveGenerationStats()}
|
||||
{@const promptProgress = processingState.processingState?.promptProgress}
|
||||
{@const isStillProcessingPrompt =
|
||||
promptProgress && promptProgress.processed < promptProgress.total}
|
||||
|
||||
{#if liveStats || genStats}
|
||||
<ChatMessageStatistics
|
||||
isLive={true}
|
||||
isProcessingPrompt={!!isStillProcessingPrompt}
|
||||
promptTokens={liveStats?.tokensProcessed}
|
||||
promptMs={liveStats?.timeMs}
|
||||
predictedTokens={genStats?.tokensGenerated}
|
||||
predictedMs={genStats?.timeMs}
|
||||
/>
|
||||
{/if}
|
||||
{/if}
|
||||
</div>
|
||||
{/if}
|
||||
|
||||
+66
-13
@@ -5,21 +5,64 @@
|
||||
import { ChatMessageStatsView } from '$lib/enums';
|
||||
|
||||
interface Props {
|
||||
predictedTokens: number;
|
||||
predictedMs: number;
|
||||
predictedTokens?: number;
|
||||
predictedMs?: number;
|
||||
promptTokens?: number;
|
||||
promptMs?: number;
|
||||
// Live mode: when true, shows stats during streaming
|
||||
isLive?: boolean;
|
||||
// Whether prompt processing is still in progress
|
||||
isProcessingPrompt?: boolean;
|
||||
// Initial view to show (defaults to READING in live mode)
|
||||
initialView?: ChatMessageStatsView;
|
||||
}
|
||||
|
||||
let { predictedTokens, predictedMs, promptTokens, promptMs }: Props = $props();
|
||||
let {
|
||||
predictedTokens,
|
||||
predictedMs,
|
||||
promptTokens,
|
||||
promptMs,
|
||||
isLive = false,
|
||||
isProcessingPrompt = false,
|
||||
initialView = ChatMessageStatsView.GENERATION
|
||||
}: Props = $props();
|
||||
|
||||
let activeView: ChatMessageStatsView = $state(ChatMessageStatsView.GENERATION);
|
||||
let activeView: ChatMessageStatsView = $state(initialView);
|
||||
let hasAutoSwitchedToGeneration = $state(false);
|
||||
|
||||
let tokensPerSecond = $derived((predictedTokens / predictedMs) * 1000);
|
||||
let timeInSeconds = $derived((predictedMs / 1000).toFixed(2));
|
||||
// In live mode: auto-switch to GENERATION tab when prompt processing completes
|
||||
$effect(() => {
|
||||
if (isLive) {
|
||||
// Auto-switch to generation tab only when prompt processing is done (once)
|
||||
if (
|
||||
!hasAutoSwitchedToGeneration &&
|
||||
!isProcessingPrompt &&
|
||||
predictedTokens &&
|
||||
predictedTokens > 0
|
||||
) {
|
||||
activeView = ChatMessageStatsView.GENERATION;
|
||||
hasAutoSwitchedToGeneration = true;
|
||||
} else if (!hasAutoSwitchedToGeneration) {
|
||||
// Stay on READING while prompt is still being processed
|
||||
activeView = ChatMessageStatsView.READING;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let hasGenerationStats = $derived(
|
||||
predictedTokens !== undefined &&
|
||||
predictedTokens > 0 &&
|
||||
predictedMs !== undefined &&
|
||||
predictedMs > 0
|
||||
);
|
||||
|
||||
let tokensPerSecond = $derived(hasGenerationStats ? (predictedTokens! / predictedMs!) * 1000 : 0);
|
||||
let timeInSeconds = $derived(
|
||||
predictedMs !== undefined ? (predictedMs / 1000).toFixed(2) : '0.00'
|
||||
);
|
||||
|
||||
let promptTokensPerSecond = $derived(
|
||||
promptTokens !== undefined && promptMs !== undefined
|
||||
promptTokens !== undefined && promptMs !== undefined && promptMs > 0
|
||||
? (promptTokens / promptMs) * 1000
|
||||
: undefined
|
||||
);
|
||||
@@ -34,11 +77,14 @@
|
||||
promptTokensPerSecond !== undefined &&
|
||||
promptTimeInSeconds !== undefined
|
||||
);
|
||||
|
||||
// In live mode, generation tab is disabled until we have generation stats
|
||||
let isGenerationDisabled = $derived(isLive && !hasGenerationStats);
|
||||
</script>
|
||||
|
||||
<div class="inline-flex items-center text-xs text-muted-foreground">
|
||||
<div class="inline-flex items-center rounded-sm bg-muted-foreground/15 p-0.5">
|
||||
{#if hasPromptStats}
|
||||
{#if hasPromptStats || isLive}
|
||||
<Tooltip.Root>
|
||||
<Tooltip.Trigger>
|
||||
<button
|
||||
@@ -65,25 +111,32 @@
|
||||
class="inline-flex h-5 w-5 items-center justify-center rounded-sm transition-colors {activeView ===
|
||||
ChatMessageStatsView.GENERATION
|
||||
? 'bg-background text-foreground shadow-sm'
|
||||
: 'hover:text-foreground'}"
|
||||
onclick={() => (activeView = ChatMessageStatsView.GENERATION)}
|
||||
: isGenerationDisabled
|
||||
? 'cursor-not-allowed opacity-40'
|
||||
: 'hover:text-foreground'}"
|
||||
onclick={() => !isGenerationDisabled && (activeView = ChatMessageStatsView.GENERATION)}
|
||||
disabled={isGenerationDisabled}
|
||||
>
|
||||
<Sparkles class="h-3 w-3" />
|
||||
<span class="sr-only">Generation</span>
|
||||
</button>
|
||||
</Tooltip.Trigger>
|
||||
<Tooltip.Content>
|
||||
<p>Generation (token output)</p>
|
||||
<p>
|
||||
{isGenerationDisabled
|
||||
? 'Generation (waiting for tokens...)'
|
||||
: 'Generation (token output)'}
|
||||
</p>
|
||||
</Tooltip.Content>
|
||||
</Tooltip.Root>
|
||||
</div>
|
||||
|
||||
<div class="flex items-center gap-1 px-2">
|
||||
{#if activeView === ChatMessageStatsView.GENERATION}
|
||||
{#if activeView === ChatMessageStatsView.GENERATION && hasGenerationStats}
|
||||
<BadgeChatStatistic
|
||||
class="bg-transparent"
|
||||
icon={WholeWord}
|
||||
value="{predictedTokens} tokens"
|
||||
value="{predictedTokens?.toLocaleString()} tokens"
|
||||
tooltipLabel="Generated tokens"
|
||||
/>
|
||||
<BadgeChatStatistic
|
||||
|
||||
Reference in New Issue
Block a user