feat: Add tokens/sec tracking to streaming output

- Track timing during streaming to calculate t/s
- Estimate tokens from characters (4 chars/token)
- Display t/s in stream completion message
- Remove debug logging from worker
This commit is contained in:
2026-02-25 11:55:27 +01:00
parent 929f069d14
commit 58e4b2c645
2 changed files with 9 additions and 6 deletions
+9 -1
View File
@@ -357,13 +357,21 @@ class SwarmManager:
if not self.mcp_mode:
print(f"🔄 Starting stream from {fastest_worker.name}...")
chunk_count = 0
total_chars = 0
start_time = asyncio.get_event_loop().time()
async for chunk in fastest_worker.generate_with_progress_stream(request):
chunk_count += 1
total_chars += len(chunk)
if not self.mcp_mode and chunk_count % 50 == 0: # Print progress every 50 chunks
print(f" Streamed {chunk_count} chunks...")
yield chunk
end_time = asyncio.get_event_loop().time()
duration = end_time - start_time
# Estimate tokens (roughly 4 chars per token)
estimated_tokens = total_chars // 4
tps = estimated_tokens / duration if duration > 0 else 0
if not self.mcp_mode:
print(f" Stream complete: {chunk_count} chunks total")
print(f" Stream complete: {chunk_count} chunks, {estimated_tokens} tokens, {tps:.1f} t/s")
def get_status(self) -> SwarmStatus:
"""Get current swarm status."""
-5
View File
@@ -158,11 +158,6 @@ class SwarmWorker:
prompt_tokens = len(request.prompt) // 4
self._context_used = min(prompt_tokens + request.max_tokens, 131072) # Cap at reasonable max
# Debug: Log the actual prompt size received
import logging
logger = logging.getLogger(__name__)
logger.info(f"🔍 WORKER {self.name}: Received prompt with {len(request.prompt)} chars, ~{prompt_tokens} tokens + {request.max_tokens} max = {self._context_used} context")
try:
start_time = time.time()