local_swarm/main.py

#!/usr/bin/env python3
"""
Local Swarm - Automatically configure and run a swarm of small coding LLMs

NOTE: On macOS with Apple Silicon, we use multiprocessing with spawn method
to safely handle multiple MLX models. This prevents GPU conflicts.
"""

import sys
import multiprocessing as mp

# CRITICAL: Set spawn method BEFORE any other imports on macOS
# This prevents fork-related issues with Metal GPU
if sys.platform == "darwin":
    try:
        mp.set_start_method("spawn", force=True)
    except RuntimeError:
        pass  # Already set

import argparse
import asyncio
from pathlib import Path

# Add src to path - resolve for Windows compatibility
src_path = Path(__file__).parent.resolve() / "src"
sys.path.insert(0, str(src_path))

# Also add parent dir for Windows import issues
if str(Path(__file__).parent.resolve()) not in sys.path:
    sys.path.insert(0, str(Path(__file__).parent.resolve()))

# These imports must come AFTER setting spawn method on macOS
from hardware.detector import detect_hardware
from models.selector import select_optimal_model
from models.downloader import download_model_for_config
from swarm import SwarmManager
from api import create_server
from api.routes import set_federated_swarm
from mcp_server import create_mcp_server
from interactive import (
    interactive_model_selection,
    show_startup_summary,
    show_runtime_menu,
    custom_configuration,
)
from network import create_discovery_service, FederatedSwarm
from tools.executor import ToolExecutor, set_tool_executor
from utils.logging_config import setup_logging

# Set up logging (DEBUG level for development)
setup_logging()


async def setup_swarm(model_config, hardware):
    """Download model and initialize swarm."""
    # Download model
    print("\n⬇️  Downloading model...")
    try:
        model_path = download_model_for_config(model_config)
        print(f"✓ Model ready at: {model_path}")
    except Exception as e:
        print(f"\n❌ Error downloading model: {e}", file=sys.stderr)
        return None

    # Initialize swarm
    print("\n🚀 Initializing swarm...")
    try:
        swarm = SwarmManager(
            model_config=model_config,
            hardware=hardware,
            consensus_strategy="similarity"
        )

        success = await swarm.initialize(str(model_path))
        if not success:
            print("❌ Failed to initialize swarm")
            return None

        return swarm
    except Exception as e:
        print(f"\n❌ Error initializing swarm: {e}", file=sys.stderr)
        return None


def get_local_ip():
    """Get the local network IP address (private networks only)."""
    import socket
    try:
        # Create a socket and connect to a public DNS server
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.settimeout(2)
        # Try to connect to Google's DNS - this doesn't actually send data
        s.connect(("8.8.8.8", 80))
        ip = s.getsockname()[0]
        s.close()

        # Check if it's a private IP (only 192.168.x.x for this network)
        is_private = (
            ip.startswith('192.168.')
        )

        if is_private:
            print(f"  📡 Detected local IP: {ip}")
            return ip
        else:
            # If not private, return localhost for safety
            print(f"  ⚠️  IP {ip} is not a private network, binding to localhost")
            return "127.0.0.1"
    except Exception as e:
        print(f"  ⚠️  Could not detect local IP: {e}, using localhost")
        return "127.0.0.1"

def main():
    parser = argparse.ArgumentParser(
        description="Local Swarm - AI-powered coding LLM swarm",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python main.py                    # Interactive setup and start
  python main.py --auto             # Auto-detect and start without menu
  python main.py --detect           # Show hardware detection only
  python main.py --model qwen:3b:q4 # Use specific model (skip menu)
  python main.py --port 17615       # Use custom port (default: 17615)
  python main.py --host 192.168.1.5 # Bind to specific IP
  python main.py --instances 4      # Force number of instances
  python main.py --download-only    # Download model only
  python main.py --test             # Test with sample prompt
  python main.py --mcp              # Enable MCP server
  python main.py --federation       # Enable federation with other instances
  python main.py --federation --peer 192.168.1.10:17615  # Manual peer
        """
    )

    parser.add_argument(
        "--auto",
        action="store_true",
        help="Auto-detect best configuration without interactive menu"
    )
    parser.add_argument(
        "--detect",
        action="store_true",
        help="Show hardware detection and exit"
    )
    parser.add_argument(
        "--model",
        type=str,
        help="Model to use (format: name:size:quant, e.g., qwen:3b:q4)"
    )
    parser.add_argument(
        "--port",
        type=int,
        default=17615,
        help="Port to run the API server on (default: 17615)"
    )
    parser.add_argument(
        "--instances",
        type=int,
        help="Force number of instances (overrides auto-calculation)"
    )
    parser.add_argument(
        "--download-only",
        action="store_true",
        help="Download models only, don't start server"
    )
    parser.add_argument(
        "--test",
        action="store_true",
        help="Test with a sample prompt"
    )
    parser.add_argument(
        "--mcp",
        action="store_true",
        help="Enable MCP server alongside HTTP API"
    )
    parser.add_argument(
        "--config",
        type=str,
        default="config.yaml",
        help="Path to config file"
    )
    parser.add_argument(
        "--host",
        type=str,
        default=None,
        help="Host IP to bind to (default: auto-detect)"
    )
    parser.add_argument(
        "--federation",
        action="store_true",
        help="Enable federation with other Local Swarm instances on the network"
    )
    parser.add_argument(
        "--peer",
        action="append",
        dest="peers",
        help="Manually add a peer (format: host:port, can be used multiple times)"
    )
    parser.add_argument(
        "--tool-server",
        action="store_true",
        help="Run as dedicated tool execution server (executes read/write/bash tools)"
    )
    parser.add_argument(
        "--tool-port",
        type=int,
        default=17616,
        help="Port for tool execution server (default: 17616)"
    )
    parser.add_argument(
        "--tool-host",
        type=str,
        default=None,
        nargs='?',
        const='',  # When --tool-host is used without a value, use empty string
        help="URL of tool execution server. Use without value for auto-detected local IP (http://<local-ip>:17616), or provide explicit URL."
    )
    parser.add_argument(
        "--use-opencode-tools",
        action="store_true",
        help="Use opencode's tool definitions (adds ~27k tokens to context). Default: use local tool server (saves tokens)"
    )
    parser.add_argument(
        "--version",
        action="version",
        version="%(prog)s 0.1.0"
    )

    args = parser.parse_args()

    # Detect hardware first
    print("\n🔍 Detecting hardware...")
    try:
        hardware = detect_hardware()
    except Exception as e:
        print(f"\n❌ Error detecting hardware: {e}", file=sys.stderr)
        sys.exit(1)

    if args.detect:
        # Just show hardware info
        from interactive import print_hardware_info
        print_hardware_info(hardware)
        print("\n✅ Detection complete")
        return

    # Tool server mode - run minimal tool-only server
    if args.tool_server:
        print("\n🔧 Starting Tool Execution Server...")
        from fastapi import FastAPI
        import uvicorn

        # Initialize local tool executor
        tool_executor = ToolExecutor(tool_host_url=None)
        set_tool_executor(tool_executor)

        app = FastAPI(title="Local Swarm Tool Server")

        @app.post("/v1/tools/execute")
        async def execute_tool(request: dict):
            tool_name = request.get("tool", "")
            tool_args = request.get("arguments", {})
            result = await tool_executor.execute(tool_name, tool_args)
            return {"result": result}

        @app.get("/health")
        async def health():
            return {"status": "healthy", "mode": "tool-server"}

        host = args.host if args.host else get_local_ip()
        tool_port = args.tool_port
        print(f"🔗 Tool server running at http://{host}:{tool_port}")
        print(f"   Endpoints:")
        print(f"   - POST /v1/tools/execute")
        print(f"   - GET  /health")
        print(f"\n✅ Tool server ready!")

        uvicorn.run(app, host=host, port=tool_port)
        return

    # Determine model configuration
    config = None

    if args.model or args.instances or args.auto:
        # Use command-line arguments or auto-detect
        print("\n📊 Calculating optimal configuration...")
        try:
            config = select_optimal_model(
                hardware,
                preferred_model=args.model,
                force_instances=args.instances
            )

            if not config:
                print("\n❌ No suitable model found for your hardware")
                print("   Minimum requirement: 2 GB available memory")
                sys.exit(1)

            # Show brief summary
            print(f"\n✓ Selected: {config.display_name}")
            print(f"  Instances: {config.instances}")
            print(f"  Memory: {config.total_memory_gb:.1f} GB")

        except Exception as e:
            print(f"\n❌ Error selecting model: {e}", file=sys.stderr)
            sys.exit(1)
    else:
        # Interactive mode - show menu
        config = interactive_model_selection(hardware)

        if not config:
            print("\n❌ No configuration selected")
            sys.exit(1)

    if args.download_only:
        # Download model only
        print("\n" + "=" * 70)
        print("⬇️  Download Mode: Downloading model only")
        print("=" * 70)

        try:
            model_path = download_model_for_config(config)
            print(f"✓ Model downloaded to: {model_path}")
            print("\n" + "=" * 70)
            print("✅ Download complete")
            print("=" * 70)
        except Exception as e:
            print(f"\n❌ Download failed: {e}", file=sys.stderr)
            sys.exit(1)

    elif args.test:
        # Test mode with sample prompt
        print("\n" + "=" * 70)
        print("🧪 Test Mode: Running sample inference")
        print("=" * 70)

        async def test_inference():
            show_startup_summary(hardware, config)
            swarm = await setup_swarm(config, hardware)
            if not swarm:
                return False

            try:
                # Test prompt
                prompt = "Write a Python function to calculate factorial:"
                print(f"\nPrompt: {prompt}\n")
                print("Generating responses...\n")

                result = await swarm.generate(prompt, max_tokens=200)

                print("\n" + "=" * 70)
                print("SELECTED RESPONSE:")
                print("=" * 70)
                print(result.selected_response.text)
                print("\n" + "=" * 70)
                print(f"Strategy: {result.strategy}")
                print(f"Confidence: {result.confidence:.2f}")
                print(f"Latency: {result.selected_response.latency_ms:.1f}ms")
                print(f"Tokens/sec: {result.selected_response.tokens_per_second:.1f}")

                # Show all responses
                print("\nAll responses received:")
                for i, resp in enumerate(result.all_responses):
                    preview = resp.text[:60].replace('\n', ' ')
                    print(f"  Worker {i}: {preview}... ({resp.latency_ms:.1f}ms)")

                return True
            finally:
                await swarm.shutdown()

        success = asyncio.run(test_inference())

        if success:
            print("\n" + "=" * 70)
            print("✅ Test complete")
            print("=" * 70)
        else:
            print("\n❌ Test failed")
            sys.exit(1)

    else:
        # Full mode (download + start API server + optional MCP)
        show_startup_summary(hardware, config)

        async def run_server():
            swarm = await setup_swarm(config, hardware)
            if not swarm:
                return False

            # Initialize tool executor
            if args.tool_host is not None:
                # --tool-host was provided
                if args.tool_host == "":
                    # --tool-host with no value - use local IP with default port
                    local_ip = get_local_ip()
                    tool_host_url = f"http://{local_ip}:17616"
                    print(f"\n🔧 Using remote tool host: {tool_host_url} (auto-detected local IP)")
                else:
                    # --tool-host with explicit value
                    tool_host_url = args.tool_host
                    print(f"\n🔧 Using remote tool host: {tool_host_url}")
                tool_executor = ToolExecutor(tool_host_url=tool_host_url)
                set_tool_executor(tool_executor)
            else:
                # Local tool execution (default)
                tool_executor = ToolExecutor(tool_host_url=None)
                set_tool_executor(tool_executor)

            # Update summary with runtime info
            show_startup_summary(hardware, config, swarm)

            # Initialize federation if enabled
            discovery = None
            federated_swarm = None
            if args.federation:
                print("\n🌐 Initializing federation...")
                try:
                    # Use specified host for advertising if provided
                    advertise_ip = args.host if args.host else None
                    discovery = await create_discovery_service(args.port, advertise_ip=advertise_ip)

                    # Get swarm info for advertising
                    swarm_info = {
                        "version": "0.1.0",
                        "instances": config.instances,
                        "model_id": config.model_id,
                        "hardware_summary": f"{hardware.cpu_cores} CPU, {hardware.ram_gb:.1f}GB RAM"
                    }

                    await discovery.start_advertising(swarm_info)
                    await discovery.start_listening()

                    # Add manual peers if specified
                    if args.peers:
                        print(f"  📍 Adding {len(args.peers)} manual peer(s)...")
                        from network.discovery import PeerInfo
                        from datetime import datetime
                        for peer_str in args.peers:
                            try:
                                host, port = peer_str.rsplit(':', 1)
                                port = int(port)
                                peer = PeerInfo(
                                    host=host,
                                    port=port,
                                    name=f"manual_{host}_{port}",
                                    version="0.1.0",
                                    instances=0,
                                    model_id="unknown",
                                    hardware_summary="manual",
                                    last_seen=datetime.now()
                                )
                                discovery.peers[peer.name] = peer
                                print(f"    ✓ Added peer: {host}:{port}")
                            except Exception as e:
                                print(f"    ⚠️  Failed to add peer {peer_str}: {e}")

                    # Create federated swarm wrapper
                    federated_swarm = FederatedSwarm(swarm, discovery)
                    set_federated_swarm(federated_swarm)

                    # Start health check loop in background
                    asyncio.create_task(discovery.start_health_check_loop(interval_seconds=10))

                    print(f"  ✓ Federation enabled")
                    print(f"  ✓ Discovery active on port {discovery.discovery_port}")
                    print(f"  ✓ Peer health checks every 10s")
                except Exception as e:
                    print(f"  ⚠️  Failed to initialize federation: {e}")
                    print("     Continuing without federation...")

            mcp_server = None
            try:
                # Create and start API server
                print("\n🌐 Starting HTTP API server...")
                # Use provided host or auto-detect
                if args.host:
                    host = args.host
                    print(f"🔗 Using specified host: {host}:{args.port}")
                else:
                    # Use local network IP instead of 0.0.0.0 for security
                    host = get_local_ip()
                    print(f"🔗 Binding to {host}:{args.port}")

                # Show tool mode being used
                if args.use_opencode_tools:
                    print(f"🔧 Tool mode: opencode tools (~27k tokens, full capabilities)")
                else:
                    print(f"🔧 Tool mode: local tool server (~125 tokens, saves tokens)")

                server = create_server(swarm, host=host, port=args.port, use_opencode_tools=args.use_opencode_tools)

                print(f"\n✅ Local Swarm is running!")
                print(f"   API: http://{host}:{args.port}/v1")
                print(f"   Health: http://{host}:{args.port}/health")

                if args.federation and discovery:
                    peers = discovery.get_peers()
                    print(f"\n🌐 Federation: Enabled")
                    print(f"   Discovery port: {discovery.discovery_port}")
                    if peers:
                        print(f"   Peers discovered: {len(peers)}")
                        for peer in peers:
                            print(f"     - {peer.name} ({peer.model_id})")
                    else:
                        print(f"   Peers discovered: 0 (waiting for peers...)")

                # Show tool server status
                if args.tool_host is not None:
                    print(f"\n🔧 Tool Server: Remote")
                    if args.tool_host == "":
                        local_ip = get_local_ip()
                        print(f"   URL: http://{local_ip}:17616 (auto-detected)")
                    else:
                        print(f"   URL: {args.tool_host}")
                    print(f"   Mode: Tools executed remotely on tool host")
                else:
                    print(f"\n🔧 Tool Server: Local")
                    print(f"   Mode: Tools executed on this machine")

                if args.mcp:
                    # Start MCP server alongside HTTP API
                    print("\n🤖 Starting MCP server...")
                    mcp_server = await create_mcp_server(swarm)
                    print("   MCP server active (stdio)")

                print(f"\n💡 Configure opencode to use:")
                print(f'   base_url: http://127.0.0.1:{args.port}/v1')
                print(f'   api_key: any (not used)')
                print(f"\nPress Ctrl+C to stop...\n")

                # Start HTTP server (this will block)
                await server.start()

            except KeyboardInterrupt:
                print("\n\nReceived stop signal")
            finally:
                if federated_swarm:
                    await federated_swarm.close()
                if discovery:
                    await discovery.stop()
                await swarm.shutdown()

            return True

        try:
            success = asyncio.run(run_server())
            if success:
                print("\n" + "=" * 70)
                print("✅ Server stopped gracefully")
                print("=" * 70)
        except Exception as e:
            print(f"\n❌ Error running server: {e}", file=sys.stderr)
            sys.exit(1)


if __name__ == "__main__":
    main()