From 414cb444f3f9d00cac1ec301caad4a4e4e1e3703 Mon Sep 17 00:00:00 2001 From: Kaloyan Nikolov Date: Wed, 25 Feb 2026 23:06:37 +0100 Subject: [PATCH] fix: integrate federation with tool execution loop - Federation was returning directly without executing tools - Now federation is used for initial generation (iteration 1) - Tool execution loop still runs for all iterations - Subsequent iterations use local swarm (for tool result processing) - This fixes federation + tools not working together - All 41 tests passing --- src/api/chat_handlers.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/api/chat_handlers.py b/src/api/chat_handlers.py index b5814e5..a0bd51a 100644 --- a/src/api/chat_handlers.py +++ b/src/api/chat_handlers.py @@ -523,21 +523,14 @@ async def handle_chat_completion( logger.info(f" messages={len(request.messages)}") logger.info(f"{'='*60}") - # Use federation if available - if federated_swarm is not None: - peers = federated_swarm.discovery.get_peers() - if peers: - logger.info(f"🌐 Using federation with {len(peers)} peer(s)...") - content, tool_calls, finish_reason = await _generate_with_federation( - federated_swarm, prompt, request.max_tokens or 1024, request.temperature or 0.7 - ) - return _create_response(content, tool_calls, finish_reason, prompt, request, swarm_manager) - - - # Build conversation history messages = list(request.messages) + # Determine if we should use federation for generation + use_federation = federated_swarm is not None and len(federated_swarm.discovery.get_peers()) > 0 + if use_federation: + logger.info(f"🌐 Federation available with peers") + # Track thinking content for streaming (OpenCode reasoning_content) thinking_content: Optional[str] = None thinking_captured = False @@ -551,11 +544,22 @@ async def handle_chat_completion( iteration += 1 logger.info(f"--- Tool Execution Iteration {iteration} ---") - # Generate response + # Generate response (use federation if available) logger.debug(f"Generating response...") - response_text, tokens_generated, tps = await _generate_with_local_swarm( - swarm_manager, prompt, request.max_tokens or 1024, request.temperature or 0.7 - ) + if use_federation and iteration == 1: + # First iteration: use federation for consensus + logger.info(f"🌐 Using federation for generation...") + content, tool_calls, finish_reason = await _generate_with_federation( + federated_swarm, prompt, request.max_tokens or 1024, request.temperature or 0.7 + ) + response_text = content + tokens_generated = 0 # Will be calculated from usage if needed + tps = 0.0 + else: + # Subsequent iterations or no federation: use local swarm + response_text, tokens_generated, tps = await _generate_with_local_swarm( + swarm_manager, prompt, request.max_tokens or 1024, request.temperature or 0.7 + ) logger.info(f"Generated response ({len(response_text)} chars, {tokens_generated} tokens)") logger.debug(f"Response: {response_text[:200]}...")