c18c20487c
- Add --use-opencode-tools flag to main.py - Default: local tool server mode (~125 tokens, saves ~27k tokens) - Optional: opencode tools mode (~27k tokens, full tool definitions) - Create .opencodeignore to exclude large docs from context - Update design doc with token bloat analysis This allows users to choose between: - Local tool server: Minimal tool instructions, saves 27k tokens - Opencode tools: Full tool definitions, more robust but expensive
557 lines
20 KiB
Python
557 lines
20 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Local Swarm - Automatically configure and run a swarm of small coding LLMs
|
|
|
|
NOTE: On macOS with Apple Silicon, we use multiprocessing with spawn method
|
|
to safely handle multiple MLX models. This prevents GPU conflicts.
|
|
"""
|
|
|
|
import sys
|
|
import multiprocessing as mp
|
|
|
|
# CRITICAL: Set spawn method BEFORE any other imports on macOS
|
|
# This prevents fork-related issues with Metal GPU
|
|
if sys.platform == "darwin":
|
|
try:
|
|
mp.set_start_method("spawn", force=True)
|
|
except RuntimeError:
|
|
pass # Already set
|
|
|
|
import argparse
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
# Add src to path - resolve for Windows compatibility
|
|
src_path = Path(__file__).parent.resolve() / "src"
|
|
sys.path.insert(0, str(src_path))
|
|
|
|
# Also add parent dir for Windows import issues
|
|
if str(Path(__file__).parent.resolve()) not in sys.path:
|
|
sys.path.insert(0, str(Path(__file__).parent.resolve()))
|
|
|
|
# These imports must come AFTER setting spawn method on macOS
|
|
from hardware.detector import detect_hardware
|
|
from models.selector import select_optimal_model
|
|
from models.downloader import download_model_for_config
|
|
from swarm import SwarmManager
|
|
from api import create_server
|
|
from api.routes import set_federated_swarm
|
|
from mcp_server import create_mcp_server
|
|
from interactive import (
|
|
interactive_model_selection,
|
|
show_startup_summary,
|
|
show_runtime_menu,
|
|
custom_configuration,
|
|
)
|
|
from network import create_discovery_service, FederatedSwarm
|
|
from tools.executor import ToolExecutor, set_tool_executor
|
|
from utils.logging_config import setup_logging
|
|
|
|
# Set up logging (DEBUG level for development)
|
|
setup_logging()
|
|
|
|
|
|
async def setup_swarm(model_config, hardware):
|
|
"""Download model and initialize swarm."""
|
|
# Download model
|
|
print("\n⬇️ Downloading model...")
|
|
try:
|
|
model_path = download_model_for_config(model_config)
|
|
print(f"✓ Model ready at: {model_path}")
|
|
except Exception as e:
|
|
print(f"\n❌ Error downloading model: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
# Initialize swarm
|
|
print("\n🚀 Initializing swarm...")
|
|
try:
|
|
swarm = SwarmManager(
|
|
model_config=model_config,
|
|
hardware=hardware,
|
|
consensus_strategy="similarity"
|
|
)
|
|
|
|
success = await swarm.initialize(str(model_path))
|
|
if not success:
|
|
print("❌ Failed to initialize swarm")
|
|
return None
|
|
|
|
return swarm
|
|
except Exception as e:
|
|
print(f"\n❌ Error initializing swarm: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
|
|
|
|
def get_local_ip():
|
|
"""Get the local network IP address (private networks only)."""
|
|
import socket
|
|
try:
|
|
# Create a socket and connect to a public DNS server
|
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
s.settimeout(2)
|
|
# Try to connect to Google's DNS - this doesn't actually send data
|
|
s.connect(("8.8.8.8", 80))
|
|
ip = s.getsockname()[0]
|
|
s.close()
|
|
|
|
# Check if it's a private IP (only 192.168.x.x for this network)
|
|
is_private = (
|
|
ip.startswith('192.168.')
|
|
)
|
|
|
|
if is_private:
|
|
print(f" 📡 Detected local IP: {ip}")
|
|
return ip
|
|
else:
|
|
# If not private, return localhost for safety
|
|
print(f" ⚠️ IP {ip} is not a private network, binding to localhost")
|
|
return "127.0.0.1"
|
|
except Exception as e:
|
|
print(f" ⚠️ Could not detect local IP: {e}, using localhost")
|
|
return "127.0.0.1"
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Local Swarm - AI-powered coding LLM swarm",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python main.py # Interactive setup and start
|
|
python main.py --auto # Auto-detect and start without menu
|
|
python main.py --detect # Show hardware detection only
|
|
python main.py --model qwen:3b:q4 # Use specific model (skip menu)
|
|
python main.py --port 17615 # Use custom port (default: 17615)
|
|
python main.py --host 192.168.1.5 # Bind to specific IP
|
|
python main.py --instances 4 # Force number of instances
|
|
python main.py --download-only # Download model only
|
|
python main.py --test # Test with sample prompt
|
|
python main.py --mcp # Enable MCP server
|
|
python main.py --federation # Enable federation with other instances
|
|
python main.py --federation --peer 192.168.1.10:17615 # Manual peer
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--auto",
|
|
action="store_true",
|
|
help="Auto-detect best configuration without interactive menu"
|
|
)
|
|
parser.add_argument(
|
|
"--detect",
|
|
action="store_true",
|
|
help="Show hardware detection and exit"
|
|
)
|
|
parser.add_argument(
|
|
"--model",
|
|
type=str,
|
|
help="Model to use (format: name:size:quant, e.g., qwen:3b:q4)"
|
|
)
|
|
parser.add_argument(
|
|
"--port",
|
|
type=int,
|
|
default=17615,
|
|
help="Port to run the API server on (default: 17615)"
|
|
)
|
|
parser.add_argument(
|
|
"--instances",
|
|
type=int,
|
|
help="Force number of instances (overrides auto-calculation)"
|
|
)
|
|
parser.add_argument(
|
|
"--download-only",
|
|
action="store_true",
|
|
help="Download models only, don't start server"
|
|
)
|
|
parser.add_argument(
|
|
"--test",
|
|
action="store_true",
|
|
help="Test with a sample prompt"
|
|
)
|
|
parser.add_argument(
|
|
"--mcp",
|
|
action="store_true",
|
|
help="Enable MCP server alongside HTTP API"
|
|
)
|
|
parser.add_argument(
|
|
"--config",
|
|
type=str,
|
|
default="config.yaml",
|
|
help="Path to config file"
|
|
)
|
|
parser.add_argument(
|
|
"--host",
|
|
type=str,
|
|
default=None,
|
|
help="Host IP to bind to (default: auto-detect)"
|
|
)
|
|
parser.add_argument(
|
|
"--federation",
|
|
action="store_true",
|
|
help="Enable federation with other Local Swarm instances on the network"
|
|
)
|
|
parser.add_argument(
|
|
"--peer",
|
|
action="append",
|
|
dest="peers",
|
|
help="Manually add a peer (format: host:port, can be used multiple times)"
|
|
)
|
|
parser.add_argument(
|
|
"--tool-server",
|
|
action="store_true",
|
|
help="Run as dedicated tool execution server (executes read/write/bash tools)"
|
|
)
|
|
parser.add_argument(
|
|
"--tool-port",
|
|
type=int,
|
|
default=17616,
|
|
help="Port for tool execution server (default: 17616)"
|
|
)
|
|
parser.add_argument(
|
|
"--tool-host",
|
|
type=str,
|
|
default=None,
|
|
nargs='?',
|
|
const='', # When --tool-host is used without a value, use empty string
|
|
help="URL of tool execution server. Use without value for auto-detected local IP (http://<local-ip>:17616), or provide explicit URL."
|
|
)
|
|
parser.add_argument(
|
|
"--use-opencode-tools",
|
|
action="store_true",
|
|
help="Use opencode's tool definitions (adds ~27k tokens to context). Default: use local tool server (saves tokens)"
|
|
)
|
|
parser.add_argument(
|
|
"--version",
|
|
action="version",
|
|
version="%(prog)s 0.1.0"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Detect hardware first
|
|
print("\n🔍 Detecting hardware...")
|
|
try:
|
|
hardware = detect_hardware()
|
|
except Exception as e:
|
|
print(f"\n❌ Error detecting hardware: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if args.detect:
|
|
# Just show hardware info
|
|
from interactive import print_hardware_info
|
|
print_hardware_info(hardware)
|
|
print("\n✅ Detection complete")
|
|
return
|
|
|
|
# Tool server mode - run minimal tool-only server
|
|
if args.tool_server:
|
|
print("\n🔧 Starting Tool Execution Server...")
|
|
from fastapi import FastAPI
|
|
import uvicorn
|
|
|
|
# Initialize local tool executor
|
|
tool_executor = ToolExecutor(tool_host_url=None)
|
|
set_tool_executor(tool_executor)
|
|
|
|
app = FastAPI(title="Local Swarm Tool Server")
|
|
|
|
@app.post("/v1/tools/execute")
|
|
async def execute_tool(request: dict):
|
|
tool_name = request.get("tool", "")
|
|
tool_args = request.get("arguments", {})
|
|
result = await tool_executor.execute(tool_name, tool_args)
|
|
return {"result": result}
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {"status": "healthy", "mode": "tool-server"}
|
|
|
|
host = args.host if args.host else get_local_ip()
|
|
tool_port = args.tool_port
|
|
print(f"🔗 Tool server running at http://{host}:{tool_port}")
|
|
print(f" Endpoints:")
|
|
print(f" - POST /v1/tools/execute")
|
|
print(f" - GET /health")
|
|
print(f"\n✅ Tool server ready!")
|
|
|
|
uvicorn.run(app, host=host, port=tool_port)
|
|
return
|
|
|
|
# Determine model configuration
|
|
config = None
|
|
|
|
if args.model or args.instances or args.auto:
|
|
# Use command-line arguments or auto-detect
|
|
print("\n📊 Calculating optimal configuration...")
|
|
try:
|
|
config = select_optimal_model(
|
|
hardware,
|
|
preferred_model=args.model,
|
|
force_instances=args.instances
|
|
)
|
|
|
|
if not config:
|
|
print("\n❌ No suitable model found for your hardware")
|
|
print(" Minimum requirement: 2 GB available memory")
|
|
sys.exit(1)
|
|
|
|
# Show brief summary
|
|
print(f"\n✓ Selected: {config.display_name}")
|
|
print(f" Instances: {config.instances}")
|
|
print(f" Memory: {config.total_memory_gb:.1f} GB")
|
|
|
|
except Exception as e:
|
|
print(f"\n❌ Error selecting model: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
else:
|
|
# Interactive mode - show menu
|
|
config = interactive_model_selection(hardware)
|
|
|
|
if not config:
|
|
print("\n❌ No configuration selected")
|
|
sys.exit(1)
|
|
|
|
if args.download_only:
|
|
# Download model only
|
|
print("\n" + "=" * 70)
|
|
print("⬇️ Download Mode: Downloading model only")
|
|
print("=" * 70)
|
|
|
|
try:
|
|
model_path = download_model_for_config(config)
|
|
print(f"✓ Model downloaded to: {model_path}")
|
|
print("\n" + "=" * 70)
|
|
print("✅ Download complete")
|
|
print("=" * 70)
|
|
except Exception as e:
|
|
print(f"\n❌ Download failed: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
elif args.test:
|
|
# Test mode with sample prompt
|
|
print("\n" + "=" * 70)
|
|
print("🧪 Test Mode: Running sample inference")
|
|
print("=" * 70)
|
|
|
|
async def test_inference():
|
|
show_startup_summary(hardware, config)
|
|
swarm = await setup_swarm(config, hardware)
|
|
if not swarm:
|
|
return False
|
|
|
|
try:
|
|
# Test prompt
|
|
prompt = "Write a Python function to calculate factorial:"
|
|
print(f"\nPrompt: {prompt}\n")
|
|
print("Generating responses...\n")
|
|
|
|
result = await swarm.generate(prompt, max_tokens=200)
|
|
|
|
print("\n" + "=" * 70)
|
|
print("SELECTED RESPONSE:")
|
|
print("=" * 70)
|
|
print(result.selected_response.text)
|
|
print("\n" + "=" * 70)
|
|
print(f"Strategy: {result.strategy}")
|
|
print(f"Confidence: {result.confidence:.2f}")
|
|
print(f"Latency: {result.selected_response.latency_ms:.1f}ms")
|
|
print(f"Tokens/sec: {result.selected_response.tokens_per_second:.1f}")
|
|
|
|
# Show all responses
|
|
print("\nAll responses received:")
|
|
for i, resp in enumerate(result.all_responses):
|
|
preview = resp.text[:60].replace('\n', ' ')
|
|
print(f" Worker {i}: {preview}... ({resp.latency_ms:.1f}ms)")
|
|
|
|
return True
|
|
finally:
|
|
await swarm.shutdown()
|
|
|
|
success = asyncio.run(test_inference())
|
|
|
|
if success:
|
|
print("\n" + "=" * 70)
|
|
print("✅ Test complete")
|
|
print("=" * 70)
|
|
else:
|
|
print("\n❌ Test failed")
|
|
sys.exit(1)
|
|
|
|
else:
|
|
# Full mode (download + start API server + optional MCP)
|
|
show_startup_summary(hardware, config)
|
|
|
|
async def run_server():
|
|
swarm = await setup_swarm(config, hardware)
|
|
if not swarm:
|
|
return False
|
|
|
|
# Initialize tool executor
|
|
if args.tool_host is not None:
|
|
# --tool-host was provided
|
|
if args.tool_host == "":
|
|
# --tool-host with no value - use local IP with default port
|
|
local_ip = get_local_ip()
|
|
tool_host_url = f"http://{local_ip}:17616"
|
|
print(f"\n🔧 Using remote tool host: {tool_host_url} (auto-detected local IP)")
|
|
else:
|
|
# --tool-host with explicit value
|
|
tool_host_url = args.tool_host
|
|
print(f"\n🔧 Using remote tool host: {tool_host_url}")
|
|
tool_executor = ToolExecutor(tool_host_url=tool_host_url)
|
|
set_tool_executor(tool_executor)
|
|
else:
|
|
# Local tool execution (default)
|
|
tool_executor = ToolExecutor(tool_host_url=None)
|
|
set_tool_executor(tool_executor)
|
|
|
|
# Update summary with runtime info
|
|
show_startup_summary(hardware, config, swarm)
|
|
|
|
# Initialize federation if enabled
|
|
discovery = None
|
|
federated_swarm = None
|
|
if args.federation:
|
|
print("\n🌐 Initializing federation...")
|
|
try:
|
|
# Use specified host for advertising if provided
|
|
advertise_ip = args.host if args.host else None
|
|
discovery = await create_discovery_service(args.port, advertise_ip=advertise_ip)
|
|
|
|
# Get swarm info for advertising
|
|
swarm_info = {
|
|
"version": "0.1.0",
|
|
"instances": config.instances,
|
|
"model_id": config.model_id,
|
|
"hardware_summary": f"{hardware.cpu_cores} CPU, {hardware.ram_gb:.1f}GB RAM"
|
|
}
|
|
|
|
await discovery.start_advertising(swarm_info)
|
|
await discovery.start_listening()
|
|
|
|
# Add manual peers if specified
|
|
if args.peers:
|
|
print(f" 📍 Adding {len(args.peers)} manual peer(s)...")
|
|
from network.discovery import PeerInfo
|
|
from datetime import datetime
|
|
for peer_str in args.peers:
|
|
try:
|
|
host, port = peer_str.rsplit(':', 1)
|
|
port = int(port)
|
|
peer = PeerInfo(
|
|
host=host,
|
|
port=port,
|
|
name=f"manual_{host}_{port}",
|
|
version="0.1.0",
|
|
instances=0,
|
|
model_id="unknown",
|
|
hardware_summary="manual",
|
|
last_seen=datetime.now()
|
|
)
|
|
discovery.peers[peer.name] = peer
|
|
print(f" ✓ Added peer: {host}:{port}")
|
|
except Exception as e:
|
|
print(f" ⚠️ Failed to add peer {peer_str}: {e}")
|
|
|
|
# Create federated swarm wrapper
|
|
federated_swarm = FederatedSwarm(swarm, discovery)
|
|
set_federated_swarm(federated_swarm)
|
|
|
|
# Start health check loop in background
|
|
asyncio.create_task(discovery.start_health_check_loop(interval_seconds=10))
|
|
|
|
print(f" ✓ Federation enabled")
|
|
print(f" ✓ Discovery active on port {discovery.discovery_port}")
|
|
print(f" ✓ Peer health checks every 10s")
|
|
except Exception as e:
|
|
print(f" ⚠️ Failed to initialize federation: {e}")
|
|
print(" Continuing without federation...")
|
|
|
|
mcp_server = None
|
|
try:
|
|
# Create and start API server
|
|
print("\n🌐 Starting HTTP API server...")
|
|
# Use provided host or auto-detect
|
|
if args.host:
|
|
host = args.host
|
|
print(f"🔗 Using specified host: {host}:{args.port}")
|
|
else:
|
|
# Use local network IP instead of 0.0.0.0 for security
|
|
host = get_local_ip()
|
|
print(f"🔗 Binding to {host}:{args.port}")
|
|
|
|
# Show tool mode being used
|
|
if args.use_opencode_tools:
|
|
print(f"🔧 Tool mode: opencode tools (~27k tokens, full capabilities)")
|
|
else:
|
|
print(f"🔧 Tool mode: local tool server (~125 tokens, saves tokens)")
|
|
|
|
server = create_server(swarm, host=host, port=args.port, use_opencode_tools=args.use_opencode_tools)
|
|
|
|
print(f"\n✅ Local Swarm is running!")
|
|
print(f" API: http://{host}:{args.port}/v1")
|
|
print(f" Health: http://{host}:{args.port}/health")
|
|
|
|
if args.federation and discovery:
|
|
peers = discovery.get_peers()
|
|
print(f"\n🌐 Federation: Enabled")
|
|
print(f" Discovery port: {discovery.discovery_port}")
|
|
if peers:
|
|
print(f" Peers discovered: {len(peers)}")
|
|
for peer in peers:
|
|
print(f" - {peer.name} ({peer.model_id})")
|
|
else:
|
|
print(f" Peers discovered: 0 (waiting for peers...)")
|
|
|
|
# Show tool server status
|
|
if args.tool_host is not None:
|
|
print(f"\n🔧 Tool Server: Remote")
|
|
if args.tool_host == "":
|
|
local_ip = get_local_ip()
|
|
print(f" URL: http://{local_ip}:17616 (auto-detected)")
|
|
else:
|
|
print(f" URL: {args.tool_host}")
|
|
print(f" Mode: Tools executed remotely on tool host")
|
|
else:
|
|
print(f"\n🔧 Tool Server: Local")
|
|
print(f" Mode: Tools executed on this machine")
|
|
|
|
if args.mcp:
|
|
# Start MCP server alongside HTTP API
|
|
print("\n🤖 Starting MCP server...")
|
|
mcp_server = await create_mcp_server(swarm)
|
|
print(" MCP server active (stdio)")
|
|
|
|
print(f"\n💡 Configure opencode to use:")
|
|
print(f' base_url: http://127.0.0.1:{args.port}/v1')
|
|
print(f' api_key: any (not used)')
|
|
print(f"\nPress Ctrl+C to stop...\n")
|
|
|
|
# Start HTTP server (this will block)
|
|
await server.start()
|
|
|
|
except KeyboardInterrupt:
|
|
print("\n\nReceived stop signal")
|
|
finally:
|
|
if federated_swarm:
|
|
await federated_swarm.close()
|
|
if discovery:
|
|
await discovery.stop()
|
|
await swarm.shutdown()
|
|
|
|
return True
|
|
|
|
try:
|
|
success = asyncio.run(run_server())
|
|
if success:
|
|
print("\n" + "=" * 70)
|
|
print("✅ Server stopped gracefully")
|
|
print("=" * 70)
|
|
except Exception as e:
|
|
print(f"\n❌ Error running server: {e}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|