feat: wire up federation to use peer swarms for generation
- Modify /v1/chat/completions endpoint to check for federation - If federation enabled with peers, use generate_with_federation() - Otherwise fall back to local generation - Add --peer example to help text Now when federation is enabled and peers are discovered/manually added, generation requests will be distributed across local and peer swarms, with consensus voting to select the best response.
This commit is contained in:
@@ -123,6 +123,7 @@ Examples:
|
||||
python main.py --test # Test with sample prompt
|
||||
python main.py --mcp # Enable MCP server
|
||||
python main.py --federation # Enable federation with other instances
|
||||
python main.py --federation --peer 192.168.1.10:17615 # Manual peer
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
@@ -297,6 +297,46 @@ async def chat_completions(request: ChatCompletionRequest):
|
||||
else:
|
||||
# Regular response with consensus
|
||||
try:
|
||||
# Use federation if enabled and peers are available
|
||||
if federated_swarm is not None:
|
||||
peers = federated_swarm.discovery.get_peers()
|
||||
if peers:
|
||||
print(f"🌐 Using federation with {len(peers)} peer(s)...")
|
||||
result = await federated_swarm.generate_with_federation(
|
||||
prompt=prompt,
|
||||
max_tokens=request.max_tokens or 1024,
|
||||
temperature=request.temperature or 0.7,
|
||||
min_peers=0 # Allow local fallback if no peers respond
|
||||
)
|
||||
response_text = result.final_response
|
||||
tokens_generated = len(response_text.split()) # Rough estimate
|
||||
|
||||
# Estimate prompt tokens (rough approximation)
|
||||
prompt_tokens = len(prompt) // 4
|
||||
|
||||
return ChatCompletionResponse(
|
||||
id=completion_id,
|
||||
created=created,
|
||||
model=request.model,
|
||||
choices=[
|
||||
ChatCompletionChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content=response_text,
|
||||
tool_calls=[]
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=UsageInfo(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=tokens_generated,
|
||||
total_tokens=prompt_tokens + tokens_generated
|
||||
)
|
||||
)
|
||||
|
||||
# Fallback to local generation
|
||||
result = await swarm_manager.generate(
|
||||
prompt=prompt,
|
||||
max_tokens=request.max_tokens or 1024,
|
||||
|
||||
Reference in New Issue
Block a user