feat: wire up federation to use peer swarms for generation

- Modify /v1/chat/completions endpoint to check for federation
- If federation enabled with peers, use generate_with_federation()
- Otherwise fall back to local generation
- Add --peer example to help text

Now when federation is enabled and peers are discovered/manually added,
generation requests will be distributed across local and peer swarms,
with consensus voting to select the best response.
This commit is contained in:
2026-02-24 05:12:54 +01:00
parent 857241135c
commit 6e06304b70
2 changed files with 41 additions and 0 deletions
+1
View File
@@ -123,6 +123,7 @@ Examples:
python main.py --test # Test with sample prompt
python main.py --mcp # Enable MCP server
python main.py --federation # Enable federation with other instances
python main.py --federation --peer 192.168.1.10:17615 # Manual peer
"""
)
+40
View File
@@ -297,6 +297,46 @@ async def chat_completions(request: ChatCompletionRequest):
else:
# Regular response with consensus
try:
# Use federation if enabled and peers are available
if federated_swarm is not None:
peers = federated_swarm.discovery.get_peers()
if peers:
print(f"🌐 Using federation with {len(peers)} peer(s)...")
result = await federated_swarm.generate_with_federation(
prompt=prompt,
max_tokens=request.max_tokens or 1024,
temperature=request.temperature or 0.7,
min_peers=0 # Allow local fallback if no peers respond
)
response_text = result.final_response
tokens_generated = len(response_text.split()) # Rough estimate
# Estimate prompt tokens (rough approximation)
prompt_tokens = len(prompt) // 4
return ChatCompletionResponse(
id=completion_id,
created=created,
model=request.model,
choices=[
ChatCompletionChoice(
index=0,
message=ChatMessage(
role="assistant",
content=response_text,
tool_calls=[]
),
finish_reason="stop"
)
],
usage=UsageInfo(
prompt_tokens=prompt_tokens,
completion_tokens=tokens_generated,
total_tokens=prompt_tokens + tokens_generated
)
)
# Fallback to local generation
result = await swarm_manager.generate(
prompt=prompt,
max_tokens=request.max_tokens or 1024,