feat: Add tokens/sec tracking to streaming output
- Track timing during streaming to calculate t/s - Estimate tokens from characters (4 chars/token) - Display t/s in stream completion message - Remove debug logging from worker
This commit is contained in:
@@ -357,13 +357,21 @@ class SwarmManager:
|
||||
if not self.mcp_mode:
|
||||
print(f"🔄 Starting stream from {fastest_worker.name}...")
|
||||
chunk_count = 0
|
||||
total_chars = 0
|
||||
start_time = asyncio.get_event_loop().time()
|
||||
async for chunk in fastest_worker.generate_with_progress_stream(request):
|
||||
chunk_count += 1
|
||||
total_chars += len(chunk)
|
||||
if not self.mcp_mode and chunk_count % 50 == 0: # Print progress every 50 chunks
|
||||
print(f" Streamed {chunk_count} chunks...")
|
||||
yield chunk
|
||||
end_time = asyncio.get_event_loop().time()
|
||||
duration = end_time - start_time
|
||||
# Estimate tokens (roughly 4 chars per token)
|
||||
estimated_tokens = total_chars // 4
|
||||
tps = estimated_tokens / duration if duration > 0 else 0
|
||||
if not self.mcp_mode:
|
||||
print(f" Stream complete: {chunk_count} chunks total")
|
||||
print(f" Stream complete: {chunk_count} chunks, {estimated_tokens} tokens, {tps:.1f} t/s")
|
||||
|
||||
def get_status(self) -> SwarmStatus:
|
||||
"""Get current swarm status."""
|
||||
|
||||
@@ -158,11 +158,6 @@ class SwarmWorker:
|
||||
prompt_tokens = len(request.prompt) // 4
|
||||
|
||||
self._context_used = min(prompt_tokens + request.max_tokens, 131072) # Cap at reasonable max
|
||||
|
||||
# Debug: Log the actual prompt size received
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"🔍 WORKER {self.name}: Received prompt with {len(request.prompt)} chars, ~{prompt_tokens} tokens + {request.max_tokens} max = {self._context_used} context")
|
||||
|
||||
try:
|
||||
start_time = time.time()
|
||||
|
||||
Reference in New Issue
Block a user