diff --git a/src/swarm/manager.py b/src/swarm/manager.py index 579efef..9e3f146 100644 --- a/src/swarm/manager.py +++ b/src/swarm/manager.py @@ -357,13 +357,21 @@ class SwarmManager: if not self.mcp_mode: print(f"🔄 Starting stream from {fastest_worker.name}...") chunk_count = 0 + total_chars = 0 + start_time = asyncio.get_event_loop().time() async for chunk in fastest_worker.generate_with_progress_stream(request): chunk_count += 1 + total_chars += len(chunk) if not self.mcp_mode and chunk_count % 50 == 0: # Print progress every 50 chunks print(f" Streamed {chunk_count} chunks...") yield chunk + end_time = asyncio.get_event_loop().time() + duration = end_time - start_time + # Estimate tokens (roughly 4 chars per token) + estimated_tokens = total_chars // 4 + tps = estimated_tokens / duration if duration > 0 else 0 if not self.mcp_mode: - print(f" Stream complete: {chunk_count} chunks total") + print(f" Stream complete: {chunk_count} chunks, {estimated_tokens} tokens, {tps:.1f} t/s") def get_status(self) -> SwarmStatus: """Get current swarm status.""" diff --git a/src/swarm/worker.py b/src/swarm/worker.py index 09b877b..3eb9fa7 100644 --- a/src/swarm/worker.py +++ b/src/swarm/worker.py @@ -158,11 +158,6 @@ class SwarmWorker: prompt_tokens = len(request.prompt) // 4 self._context_used = min(prompt_tokens + request.max_tokens, 131072) # Cap at reasonable max - - # Debug: Log the actual prompt size received - import logging - logger = logging.getLogger(__name__) - logger.info(f"🔍 WORKER {self.name}: Received prompt with {len(request.prompt)} chars, ~{prompt_tokens} tokens + {request.max_tokens} max = {self._context_used} context") try: start_time = time.time()