diff --git a/main.py b/main.py
index 42a776a..c849a19 100644
--- a/main.py
+++ b/main.py
@@ -10,223 +10,63 @@ import sys
 import multiprocessing as mp
 
 # CRITICAL: Set spawn method BEFORE any other imports on macOS
-# This prevents fork-related issues with Metal GPU
 if sys.platform == "darwin":
     try:
         mp.set_start_method("spawn", force=True)
     except RuntimeError:
-        pass  # Already set
+        pass
 
-import argparse
 import asyncio
 from pathlib import Path
 
-# Add src to path - resolve for Windows compatibility
+# Add src to path
 src_path = Path(__file__).parent.resolve() / "src"
 sys.path.insert(0, str(src_path))
-
-# Also add parent dir for Windows import issues
 if str(Path(__file__).parent.resolve()) not in sys.path:
     sys.path.insert(0, str(Path(__file__).parent.resolve()))
 
-# These imports must come AFTER setting spawn method on macOS
-from hardware.detector import detect_hardware
-from models.selector import select_optimal_model
-from models.downloader import download_model_for_config
-from swarm import SwarmManager
-from api import create_server
-from api.routes import set_federated_swarm
-from mcp_server import create_mcp_server
-from interactive import (
-    interactive_model_selection,
-    show_startup_summary,
-    show_runtime_menu,
-    custom_configuration,
-)
-from network import create_discovery_service, FederatedSwarm
-from tools.executor import ToolExecutor, set_tool_executor
+from cli.parser import parse_args
+from cli.tool_server import run_tool_server
+from utils.network import get_local_ip
 from utils.logging_config import setup_logging
+from hardware.detector import detect_hardware
+from interactive import print_hardware_info
 
-# Set up logging (DEBUG level for development)
+# Set up logging
 setup_logging()
 
 
-async def setup_swarm(model_config, hardware):
-    """Download model and initialize swarm."""
-    # Download model
-    print("\n⬇️  Downloading model...")
-    try:
-        model_path = download_model_for_config(model_config)
-        print(f"✓ Model ready at: {model_path}")
-    except Exception as e:
-        print(f"\n❌ Error downloading model: {e}", file=sys.stderr)
-        return None
+def handle_detect_mode(hardware) -> int:
+    """Handle --detect mode."""
+    print_hardware_info(hardware)
+    print("\n✅ Detection complete")
+    return 0
+
+
+def handle_tool_server_mode(args, hardware) -> int:
+    """Handle --tool-server mode."""
+    print("\n🔧 Starting Tool Execution Server...")
+    host = args.host if args.host else get_local_ip()
     
-    # Initialize swarm
-    print("\n🚀 Initializing swarm...")
     try:
-        swarm = SwarmManager(
-            model_config=model_config,
-            hardware=hardware,
-            consensus_strategy="similarity"
-        )
-        
-        success = await swarm.initialize(str(model_path))
-        if not success:
-            print("❌ Failed to initialize swarm")
-            return None
-        
-        return swarm
-    except Exception as e:
-        print(f"\n❌ Error initializing swarm: {e}", file=sys.stderr)
-        return None
+        asyncio.run(run_tool_server(host, args.tool_port))
+        return 0
+    except KeyboardInterrupt:
+        print("\n\nTool server stopped")
+        return 0
 
 
-
-def get_local_ip():
-    """Get the local network IP address (private networks only)."""
-    import socket
-    try:
-        # Create a socket and connect to a public DNS server
-        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
-        s.settimeout(2)
-        # Try to connect to Google's DNS - this doesn't actually send data
-        s.connect(("8.8.8.8", 80))
-        ip = s.getsockname()[0]
-        s.close()
-        
-        # Check if it's a private IP (only 192.168.x.x for this network)
-        is_private = (
-            ip.startswith('192.168.')
-        )
-        
-        if is_private:
-            print(f"  📡 Detected local IP: {ip}")
-            return ip
-        else:
-            # If not private, return localhost for safety
-            print(f"  ⚠️  IP {ip} is not a private network, binding to localhost")
-            return "127.0.0.1"
-    except Exception as e:
-        print(f"  ⚠️  Could not detect local IP: {e}, using localhost")
-        return "127.0.0.1"
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Local Swarm - AI-powered coding LLM swarm",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  python main.py                    # Interactive setup and start
-  python main.py --auto             # Auto-detect and start without menu
-  python main.py --detect           # Show hardware detection only
-  python main.py --model qwen:3b:q4 # Use specific model (skip menu)
-  python main.py --port 17615       # Use custom port (default: 17615)
-  python main.py --host 192.168.1.5 # Bind to specific IP
-  python main.py --instances 4      # Force number of instances
-  python main.py --download-only    # Download model only
-  python main.py --test             # Test with sample prompt
-  python main.py --mcp              # Enable MCP server
-  python main.py --federation       # Enable federation with other instances
-  python main.py --federation --peer 192.168.1.10:17615  # Manual peer
-        """
-    )
+async def run_main_mode(args, hardware) -> int:
+    """Run the main application mode."""
+    from cli.main_runner import MainRunner
     
-    parser.add_argument(
-        "--auto",
-        action="store_true",
-        help="Auto-detect best configuration without interactive menu"
-    )
-    parser.add_argument(
-        "--detect", 
-        action="store_true",
-        help="Show hardware detection and exit"
-    )
-    parser.add_argument(
-        "--model",
-        type=str,
-        help="Model to use (format: name:size:quant, e.g., qwen:3b:q4)"
-    )
-    parser.add_argument(
-        "--port",
-        type=int,
-        default=17615,
-        help="Port to run the API server on (default: 17615)"
-    )
-    parser.add_argument(
-        "--instances",
-        type=int,
-        help="Force number of instances (overrides auto-calculation)"
-    )
-    parser.add_argument(
-        "--download-only",
-        action="store_true",
-        help="Download models only, don't start server"
-    )
-    parser.add_argument(
-        "--test",
-        action="store_true",
-        help="Test with a sample prompt"
-    )
-    parser.add_argument(
-        "--mcp",
-        action="store_true",
-        help="Enable MCP server alongside HTTP API"
-    )
-    parser.add_argument(
-        "--config",
-        type=str,
-        default="config.yaml",
-        help="Path to config file"
-    )
-    parser.add_argument(
-        "--host",
-        type=str,
-        default=None,
-        help="Host IP to bind to (default: auto-detect)"
-    )
-    parser.add_argument(
-        "--federation",
-        action="store_true",
-        help="Enable federation with other Local Swarm instances on the network"
-    )
-    parser.add_argument(
-        "--peer",
-        action="append",
-        dest="peers",
-        help="Manually add a peer (format: host:port, can be used multiple times)"
-    )
-    parser.add_argument(
-        "--tool-server",
-        action="store_true",
-        help="Run as dedicated tool execution server (executes read/write/bash tools)"
-    )
-    parser.add_argument(
-        "--tool-port",
-        type=int,
-        default=17616,
-        help="Port for tool execution server (default: 17616)"
-    )
-    parser.add_argument(
-        "--tool-host",
-        type=str,
-        default=None,
-        nargs='?',
-        const='',  # When --tool-host is used without a value, use empty string
-        help="URL of tool execution server. Use without value for auto-detected local IP (http://<local-ip>:17616), or provide explicit URL."
-    )
-    parser.add_argument(
-        "--use-opencode-tools",
-        action="store_true",
-        help="Use opencode's tool definitions (adds ~27k tokens to context). Default: use local tool server (saves tokens)"
-    )
-    parser.add_argument(
-        "--version",
-        action="version",
-        version="%(prog)s 0.1.0"
-    )
-    
-    args = parser.parse_args()
+    runner = MainRunner(hardware, args)
+    return await runner.run()
+
+
+def main() -> int:
+    """Main entry point."""
+    args = parse_args()
     
     # Detect hardware first
     print("\n🔍 Detecting hardware...")
@@ -234,323 +74,26 @@ Examples:
         hardware = detect_hardware()
     except Exception as e:
         print(f"\n❌ Error detecting hardware: {e}", file=sys.stderr)
-        sys.exit(1)
+        return 1
     
+    # Handle detect mode
     if args.detect:
-        # Just show hardware info
-        from interactive import print_hardware_info
-        print_hardware_info(hardware)
-        print("\n✅ Detection complete")
-        return
+        return handle_detect_mode(hardware)
     
-    # Tool server mode - run minimal tool-only server
+    # Handle tool server mode
     if args.tool_server:
-        print("\n🔧 Starting Tool Execution Server...")
-        from fastapi import FastAPI
-        import uvicorn
-        
-        # Initialize local tool executor
-        tool_executor = ToolExecutor(tool_host_url=None)
-        set_tool_executor(tool_executor)
-        
-        app = FastAPI(title="Local Swarm Tool Server")
-        
-        @app.post("/v1/tools/execute")
-        async def execute_tool(request: dict):
-            tool_name = request.get("tool", "")
-            tool_args = request.get("arguments", {})
-            result = await tool_executor.execute(tool_name, tool_args)
-            return {"result": result}
-        
-        @app.get("/health")
-        async def health():
-            return {"status": "healthy", "mode": "tool-server"}
-        
-        host = args.host if args.host else get_local_ip()
-        tool_port = args.tool_port
-        print(f"🔗 Tool server running at http://{host}:{tool_port}")
-        print(f"   Endpoints:")
-        print(f"   - POST /v1/tools/execute")
-        print(f"   - GET  /health")
-        print(f"\n✅ Tool server ready!")
-        
-        uvicorn.run(app, host=host, port=tool_port)
-        return
+        return handle_tool_server_mode(args, hardware)
     
-    # Determine model configuration
-    config = None
-    
-    if args.model or args.instances or args.auto:
-        # Use command-line arguments or auto-detect
-        print("\n📊 Calculating optimal configuration...")
-        try:
-            config = select_optimal_model(
-                hardware,
-                preferred_model=args.model,
-                force_instances=args.instances
-            )
-            
-            if not config:
-                print("\n❌ No suitable model found for your hardware")
-                print("   Minimum requirement: 2 GB available memory")
-                sys.exit(1)
-            
-            # Show brief summary
-            print(f"\n✓ Selected: {config.display_name}")
-            print(f"  Instances: {config.instances}")
-            print(f"  Memory: {config.total_memory_gb:.1f} GB")
-            
-        except Exception as e:
-            print(f"\n❌ Error selecting model: {e}", file=sys.stderr)
-            sys.exit(1)
-    else:
-        # Interactive mode - show menu
-        config = interactive_model_selection(hardware)
-        
-        if not config:
-            print("\n❌ No configuration selected")
-            sys.exit(1)
-    
-    if args.download_only:
-        # Download model only
-        print("\n" + "=" * 70)
-        print("⬇️  Download Mode: Downloading model only")
-        print("=" * 70)
-        
-        try:
-            model_path = download_model_for_config(config)
-            print(f"✓ Model downloaded to: {model_path}")
-            print("\n" + "=" * 70)
-            print("✅ Download complete")
-            print("=" * 70)
-        except Exception as e:
-            print(f"\n❌ Download failed: {e}", file=sys.stderr)
-            sys.exit(1)
-    
-    elif args.test:
-        # Test mode with sample prompt
-        print("\n" + "=" * 70)
-        print("🧪 Test Mode: Running sample inference")
-        print("=" * 70)
-        
-        async def test_inference():
-            show_startup_summary(hardware, config)
-            swarm = await setup_swarm(config, hardware)
-            if not swarm:
-                return False
-            
-            try:
-                # Test prompt
-                prompt = "Write a Python function to calculate factorial:"
-                print(f"\nPrompt: {prompt}\n")
-                print("Generating responses...\n")
-                
-                result = await swarm.generate(prompt, max_tokens=200)
-                
-                print("\n" + "=" * 70)
-                print("SELECTED RESPONSE:")
-                print("=" * 70)
-                print(result.selected_response.text)
-                print("\n" + "=" * 70)
-                print(f"Strategy: {result.strategy}")
-                print(f"Confidence: {result.confidence:.2f}")
-                print(f"Latency: {result.selected_response.latency_ms:.1f}ms")
-                print(f"Tokens/sec: {result.selected_response.tokens_per_second:.1f}")
-                
-                # Show all responses
-                print("\nAll responses received:")
-                for i, resp in enumerate(result.all_responses):
-                    preview = resp.text[:60].replace('\n', ' ')
-                    print(f"  Worker {i}: {preview}... ({resp.latency_ms:.1f}ms)")
-                
-                return True
-            finally:
-                await swarm.shutdown()
-        
-        success = asyncio.run(test_inference())
-        
-        if success:
-            print("\n" + "=" * 70)
-            print("✅ Test complete")
-            print("=" * 70)
-        else:
-            print("\n❌ Test failed")
-            sys.exit(1)
-    
-    else:
-        # Full mode (download + start API server + optional MCP)
-        show_startup_summary(hardware, config)
-        
-        async def run_server():
-            swarm = await setup_swarm(config, hardware)
-            if not swarm:
-                return False
-            
-            # Initialize tool executor
-            if args.tool_host is not None:
-                # --tool-host was provided
-                if args.tool_host == "":
-                    # --tool-host with no value - use local IP with default port
-                    local_ip = get_local_ip()
-                    tool_host_url = f"http://{local_ip}:17616"
-                    print(f"\n🔧 Using remote tool host: {tool_host_url} (auto-detected local IP)")
-                else:
-                    # --tool-host with explicit value
-                    tool_host_url = args.tool_host
-                    print(f"\n🔧 Using remote tool host: {tool_host_url}")
-                tool_executor = ToolExecutor(tool_host_url=tool_host_url)
-                set_tool_executor(tool_executor)
-            else:
-                # Local tool execution (default)
-                tool_executor = ToolExecutor(tool_host_url=None)
-                set_tool_executor(tool_executor)
-            
-            # Update summary with runtime info
-            show_startup_summary(hardware, config, swarm)
-            
-            # Initialize federation if enabled
-            discovery = None
-            federated_swarm = None
-            if args.federation:
-                print("\n🌐 Initializing federation...")
-                try:
-                    # Use specified host for advertising if provided
-                    advertise_ip = args.host if args.host else None
-                    discovery = await create_discovery_service(args.port, advertise_ip=advertise_ip)
-                    
-                    # Get swarm info for advertising
-                    swarm_info = {
-                        "version": "0.1.0",
-                        "instances": config.instances,
-                        "model_id": config.model_id,
-                        "hardware_summary": f"{hardware.cpu_cores} CPU, {hardware.ram_gb:.1f}GB RAM"
-                    }
-                    
-                    await discovery.start_advertising(swarm_info)
-                    await discovery.start_listening()
-                    
-                    # Add manual peers if specified
-                    if args.peers:
-                        print(f"  📍 Adding {len(args.peers)} manual peer(s)...")
-                        from network.discovery import PeerInfo
-                        from datetime import datetime
-                        for peer_str in args.peers:
-                            try:
-                                host, port = peer_str.rsplit(':', 1)
-                                port = int(port)
-                                peer = PeerInfo(
-                                    host=host,
-                                    port=port,
-                                    name=f"manual_{host}_{port}",
-                                    version="0.1.0",
-                                    instances=0,
-                                    model_id="unknown",
-                                    hardware_summary="manual",
-                                    last_seen=datetime.now()
-                                )
-                                discovery.peers[peer.name] = peer
-                                print(f"    ✓ Added peer: {host}:{port}")
-                            except Exception as e:
-                                print(f"    ⚠️  Failed to add peer {peer_str}: {e}")
-                    
-                    # Create federated swarm wrapper
-                    federated_swarm = FederatedSwarm(swarm, discovery)
-                    set_federated_swarm(federated_swarm)
-                    
-                    # Start health check loop in background
-                    asyncio.create_task(discovery.start_health_check_loop(interval_seconds=10))
-                    
-                    print(f"  ✓ Federation enabled")
-                    print(f"  ✓ Discovery active on port {discovery.discovery_port}")
-                    print(f"  ✓ Peer health checks every 10s")
-                except Exception as e:
-                    print(f"  ⚠️  Failed to initialize federation: {e}")
-                    print("     Continuing without federation...")
-            
-            mcp_server = None
-            try:
-                # Create and start API server
-                print("\n🌐 Starting HTTP API server...")
-                # Use provided host or auto-detect
-                if args.host:
-                    host = args.host
-                    print(f"🔗 Using specified host: {host}:{args.port}")
-                else:
-                    # Use local network IP instead of 0.0.0.0 for security
-                    host = get_local_ip()
-                    print(f"🔗 Binding to {host}:{args.port}")
-
-                # Show tool mode being used
-                if args.use_opencode_tools:
-                    print(f"🔧 Tool mode: opencode tools (~27k tokens, full capabilities)")
-                else:
-                    print(f"🔧 Tool mode: local tool server (~125 tokens, saves tokens)")
-
-                server = create_server(swarm, host=host, port=args.port, use_opencode_tools=args.use_opencode_tools)
-                
-                print(f"\n✅ Local Swarm is running!")
-                print(f"   API: http://{host}:{args.port}/v1")
-                print(f"   Health: http://{host}:{args.port}/health")
-                
-                if args.federation and discovery:
-                    peers = discovery.get_peers()
-                    print(f"\n🌐 Federation: Enabled")
-                    print(f"   Discovery port: {discovery.discovery_port}")
-                    if peers:
-                        print(f"   Peers discovered: {len(peers)}")
-                        for peer in peers:
-                            print(f"     - {peer.name} ({peer.model_id})")
-                    else:
-                        print(f"   Peers discovered: 0 (waiting for peers...)")
-                
-                # Show tool server status
-                if args.tool_host is not None:
-                    print(f"\n🔧 Tool Server: Remote")
-                    if args.tool_host == "":
-                        local_ip = get_local_ip()
-                        print(f"   URL: http://{local_ip}:17616 (auto-detected)")
-                    else:
-                        print(f"   URL: {args.tool_host}")
-                    print(f"   Mode: Tools executed remotely on tool host")
-                else:
-                    print(f"\n🔧 Tool Server: Local")
-                    print(f"   Mode: Tools executed on this machine")
-                
-                if args.mcp:
-                    # Start MCP server alongside HTTP API
-                    print("\n🤖 Starting MCP server...")
-                    mcp_server = await create_mcp_server(swarm)
-                    print("   MCP server active (stdio)")
-                
-                print(f"\n💡 Configure opencode to use:")
-                print(f'   base_url: http://127.0.0.1:{args.port}/v1')
-                print(f'   api_key: any (not used)')
-                print(f"\nPress Ctrl+C to stop...\n")
-                
-                # Start HTTP server (this will block)
-                await server.start()
-                
-            except KeyboardInterrupt:
-                print("\n\nReceived stop signal")
-            finally:
-                if federated_swarm:
-                    await federated_swarm.close()
-                if discovery:
-                    await discovery.stop()
-                await swarm.shutdown()
-            
-            return True
-        
-        try:
-            success = asyncio.run(run_server())
-            if success:
-                print("\n" + "=" * 70)
-                print("✅ Server stopped gracefully")
-                print("=" * 70)
-        except Exception as e:
-            print(f"\n❌ Error running server: {e}", file=sys.stderr)
-            sys.exit(1)
+    # Run main mode
+    try:
+        return asyncio.run(run_main_mode(args, hardware))
+    except KeyboardInterrupt:
+        print("\n\nReceived stop signal")
+        return 0
+    except Exception as e:
+        print(f"\n❌ Error: {e}", file=sys.stderr)
+        return 1
 
 
 if __name__ == "__main__":
-    main()
+    sys.exit(main())
diff --git a/src/cli/main_runner.py b/src/cli/main_runner.py
new file mode 100644
index 0000000..f213828
--- /dev/null
+++ b/src/cli/main_runner.py
@@ -0,0 +1,320 @@
+"""Main application runner for Local Swarm.
+
+Handles the primary application modes: download-only, test, and full server mode.
+"""
+
+import asyncio
+import sys
+from typing import Optional
+
+from models.selector import select_optimal_model, ModelConfig
+from models.downloader import download_model_for_config
+from swarm import SwarmManager
+from api import create_server
+from api.routes import set_federated_swarm
+from interactive import (
+    interactive_model_selection,
+    show_startup_summary,
+    show_runtime_menu,
+)
+from network import create_discovery_service, FederatedSwarm
+from tools.executor import ToolExecutor, set_tool_executor
+from utils.network import get_local_ip
+
+
+class MainRunner:
+    """Runs the main application logic."""
+    
+    def __init__(self, hardware, args):
+        """Initialize the main runner.
+        
+        Args:
+            hardware: Hardware profile
+            args: Parsed command line arguments
+        """
+        self.hardware = hardware
+        self.args = args
+        self.config: Optional[ModelConfig] = None
+        self.swarm: Optional[SwarmManager] = None
+        self.discovery = None
+        self.federated_swarm = None
+        self.mcp_server = None
+    
+    async def run(self) -> int:
+        """Run the main application.
+        
+        Returns:
+            Exit code (0 for success, 1 for error)
+        """
+        # Get configuration
+        self.config = self._get_configuration()
+        if not self.config:
+            return 1
+        
+        # Handle download-only mode
+        if self.args.download_only:
+            return await self._run_download_mode()
+        
+        # Handle test mode
+        if self.args.test:
+            return await self._run_test_mode()
+        
+        # Run full server mode
+        return await self._run_server_mode()
+    
+    def _get_configuration(self) -> Optional[ModelConfig]:
+        """Get the model configuration."""
+        if self.args.model or self.args.instances or self.args.auto:
+            return self._get_auto_config()
+        else:
+            return interactive_model_selection(self.hardware)
+    
+    def _get_auto_config(self) -> Optional[ModelConfig]:
+        """Get auto-detected configuration."""
+        print("\n📊 Calculating optimal configuration...")
+        try:
+            config = select_optimal_model(
+                self.hardware,
+                preferred_model=self.args.model,
+                force_instances=self.args.instances
+            )
+            
+            if not config:
+                print("\n❌ No suitable model found for your hardware")
+                print("   Minimum requirement: 2 GB available memory")
+                return None
+            
+            print(f"\n✓ Selected: {config.display_name}")
+            print(f"  Instances: {config.instances}")
+            print(f"  Memory: {config.total_memory_gb:.1f} GB")
+            return config
+            
+        except Exception as e:
+            print(f"\n❌ Error selecting model: {e}", file=sys.stderr)
+            return None
+    
+    async def _run_download_mode(self) -> int:
+        """Run download-only mode."""
+        print("\n" + "=" * 70)
+        print("⬇️  Download Mode: Downloading model only")
+        print("=" * 70)
+        
+        try:
+            model_path = download_model_for_config(self.config)
+            print(f"✓ Model downloaded to: {model_path}")
+            print("\n" + "=" * 70)
+            print("✅ Download complete")
+            print("=" * 70)
+            return 0
+        except Exception as e:
+            print(f"\n❌ Download failed: {e}", file=sys.stderr)
+            return 1
+    
+    async def _run_test_mode(self) -> int:
+        """Run test mode with sample prompt."""
+        from cli.test_runner import run_test
+        return await run_test(self.hardware, self.config)
+    
+    async def _run_server_mode(self) -> int:
+        """Run full server mode."""
+        show_startup_summary(self.hardware, self.config)
+        
+        # Setup swarm
+        if not await self._setup_swarm():
+            return 1
+        
+        # Initialize tool executor
+        self._setup_tool_executor()
+        
+        # Show updated summary with runtime info
+        show_startup_summary(self.hardware, self.config, self.swarm)
+        
+        # Initialize federation if enabled
+        if self.args.federation:
+            await self._setup_federation()
+        
+        # Start MCP server if enabled
+        if self.args.mcp:
+            await self._setup_mcp()
+        
+        # Run server
+        return await self._run_server()
+    
+    async def _setup_swarm(self) -> bool:
+        """Setup the swarm.
+        
+        Returns:
+            True if successful
+        """
+        print("\n⬇️  Downloading model...")
+        try:
+            model_path = download_model_for_config(self.config)
+            print(f"✓ Model ready at: {model_path}")
+        except Exception as e:
+            print(f"\n❌ Error downloading model: {e}", file=sys.stderr)
+            return False
+        
+        print("\n🚀 Initializing swarm...")
+        try:
+            self.swarm = SwarmManager(
+                model_config=self.config,
+                hardware=self.hardware,
+                consensus_strategy="similarity"
+            )
+            
+            success = await self.swarm.initialize(str(model_path))
+            if not success:
+                print("❌ Failed to initialize swarm")
+                return False
+            
+            return True
+        except Exception as e:
+            print(f"\n❌ Error initializing swarm: {e}", file=sys.stderr)
+            return False
+    
+    def _setup_tool_executor(self) -> None:
+        """Setup the tool executor."""
+        if self.args.tool_host is not None:
+            if self.args.tool_host == "":
+                tool_host_url = f"http://{get_local_ip()}:17616"
+                print(f"\n🔧 Using remote tool host: {tool_host_url} (auto-detected)")
+            else:
+                tool_host_url = self.args.tool_host
+                print(f"\n🔧 Using remote tool host: {tool_host_url}")
+            executor = ToolExecutor(tool_host_url=tool_host_url)
+        else:
+            executor = ToolExecutor(tool_host_url=None)
+            print("\n🔧 Tool Server: Local")
+        
+        set_tool_executor(executor)
+    
+    async def _setup_federation(self) -> None:
+        """Setup federation."""
+        print("\n🌐 Initializing federation...")
+        try:
+            advertise_ip = self.args.host if self.args.host else None
+            self.discovery = await create_discovery_service(
+                self.args.port,
+                advertise_ip=advertise_ip
+            )
+            
+            swarm_info = {
+                "version": "0.1.0",
+                "instances": self.config.instances,
+                "model_id": self.config.model_id,
+                "hardware_summary": f"{self.hardware.cpu_cores} CPU, {self.hardware.ram_gb:.1f}GB RAM"
+            }
+            
+            await self.discovery.start_advertising(swarm_info)
+            await self.discovery.start_listening()
+            
+            # Add manual peers
+            if self.args.peers:
+                await self._add_manual_peers()
+            
+            self.federated_swarm = FederatedSwarm(self.swarm, self.discovery)
+            set_federated_swarm(self.federated_swarm)
+            
+            # Start health check loop
+            asyncio.create_task(
+                self.discovery.start_health_check_loop(interval_seconds=10)
+            )
+            
+            print(f"  ✓ Federation enabled")
+            print(f"  ✓ Discovery active on port {self.discovery.discovery_port}")
+            print(f"  ✓ Peer health checks every 10s")
+        except Exception as e:
+            print(f"  ⚠️  Failed to initialize federation: {e}")
+            print("     Continuing without federation...")
+    
+    async def _add_manual_peers(self) -> None:
+        """Add manual peers from command line."""
+        print(f"  📍 Adding {len(self.args.peers)} manual peer(s)...")
+        from network.discovery import PeerInfo
+        from datetime import datetime
+        
+        for peer_str in self.args.peers:
+            try:
+                host, port = peer_str.rsplit(':', 1)
+                port = int(port)
+                peer = PeerInfo(
+                    host=host,
+                    port=port,
+                    name=f"manual_{host}_{port}",
+                    version="0.1.0",
+                    instances=0,
+                    model_id="unknown",
+                    hardware_summary="manual",
+                    last_seen=datetime.now()
+                )
+                self.discovery.peers[peer.name] = peer
+                print(f"    ✓ Added peer: {host}:{port}")
+            except Exception as e:
+                print(f"    ⚠️  Failed to add peer {peer_str}: {e}")
+    
+    async def _setup_mcp(self) -> None:
+        """Setup MCP server."""
+        print("\n🤖 Starting MCP server...")
+        from mcp_server import create_mcp_server
+        self.mcp_server = await create_mcp_server(self.swarm)
+        print("   MCP server active (stdio)")
+    
+    async def _run_server(self) -> int:
+        """Run the API server."""
+        print("\n🌐 Starting HTTP API server...")
+        
+        # Determine host
+        if self.args.host:
+            host = self.args.host
+            print(f"🔗 Using specified host: {host}:{self.args.port}")
+        else:
+            host = get_local_ip()
+            print(f"🔗 Binding to {host}:{self.args.port}")
+        
+        # Show tool mode
+        if self.args.use_opencode_tools:
+            print(f"🔧 Tool mode: opencode tools (~27k tokens)")
+        else:
+            print(f"🔧 Tool mode: local tool server (~125 tokens)")
+        
+        # Create server
+        server = create_server(
+            self.swarm,
+            host=host,
+            port=self.args.port,
+            use_opencode_tools=self.args.use_opencode_tools
+        )
+        
+        # Print connection info
+        print(f"\n✅ Local Swarm is running!")
+        print(f"   API: http://{host}:{self.args.port}/v1")
+        print(f"   Health: http://{host}:{self.args.port}/health")
+        
+        if self.args.federation and self.discovery:
+            peers = self.discovery.get_peers()
+            print(f"\n🌐 Federation: Enabled")
+            print(f"   Discovery port: {self.discovery.discovery_port}")
+            if peers:
+                print(f"   Peers discovered: {len(peers)}")
+        
+        print(f"\n💡 Configure opencode to use:")
+        print(f'   base_url: http://127.0.0.1:{self.args.port}/v1')
+        print(f'   api_key: any (not used)')
+        print(f"\nPress Ctrl+C to stop...\n")
+        
+        # Start server
+        try:
+            await server.start()
+        finally:
+            await self._shutdown()
+        
+        return 0
+    
+    async def _shutdown(self) -> None:
+        """Shutdown all services."""
+        if self.federated_swarm:
+            await self.federated_swarm.close()
+        if self.discovery:
+            await self.discovery.stop()
+        if self.swarm:
+            await self.swarm.shutdown()
diff --git a/src/cli/parser.py b/src/cli/parser.py
new file mode 100644
index 0000000..397f5ca
--- /dev/null
+++ b/src/cli/parser.py
@@ -0,0 +1,151 @@
+"""CLI argument parsing for Local Swarm."""
+
+import argparse
+from typing import Optional
+
+
+def create_parser() -> argparse.ArgumentParser:
+    """Create and configure the argument parser."""
+    parser = argparse.ArgumentParser(
+        description="Local Swarm - AI-powered coding LLM swarm",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python main.py                    # Interactive setup and start
+  python main.py --auto             # Auto-detect and start without menu
+  python main.py --detect           # Show hardware detection only
+  python main.py --model qwen:3b:q4 # Use specific model (skip menu)
+  python main.py --port 17615       # Use custom port (default: 17615)
+  python main.py --host 192.168.1.5 # Bind to specific IP
+  python main.py --instances 4      # Force number of instances
+  python main.py --download-only    # Download model only
+  python main.py --test             # Test with sample prompt
+  python main.py --mcp              # Enable MCP server
+  python main.py --federation       # Enable federation with other instances
+  python main.py --federation --peer 192.168.1.10:17615  # Manual peer
+        """
+    )
+    
+    # Mode options
+    parser.add_argument(
+        "--auto",
+        action="store_true",
+        help="Auto-detect best configuration without interactive menu"
+    )
+    parser.add_argument(
+        "--detect", 
+        action="store_true",
+        help="Show hardware detection and exit"
+    )
+    
+    # Model options
+    parser.add_argument(
+        "--model",
+        type=str,
+        help="Model to use (format: name:size:quant, e.g., qwen:3b:q4)"
+    )
+    parser.add_argument(
+        "--instances",
+        type=int,
+        help="Force number of instances (overrides auto-calculation)"
+    )
+    
+    # Server options
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=17615,
+        help="Port to run the API server on (default: 17615)"
+    )
+    parser.add_argument(
+        "--host",
+        type=str,
+        default=None,
+        help="Host IP to bind to (default: auto-detect)"
+    )
+    
+    # Operation modes
+    parser.add_argument(
+        "--download-only",
+        action="store_true",
+        help="Download models only, don't start server"
+    )
+    parser.add_argument(
+        "--test",
+        action="store_true",
+        help="Test with a sample prompt"
+    )
+    parser.add_argument(
+        "--mcp",
+        action="store_true",
+        help="Enable MCP server alongside HTTP API"
+    )
+    
+    # Configuration
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="config.yaml",
+        help="Path to config file"
+    )
+    
+    # Federation options
+    parser.add_argument(
+        "--federation",
+        action="store_true",
+        help="Enable federation with other Local Swarm instances on the network"
+    )
+    parser.add_argument(
+        "--peer",
+        action="append",
+        dest="peers",
+        help="Manually add a peer (format: host:port, can be used multiple times)"
+    )
+    
+    # Tool server options
+    parser.add_argument(
+        "--tool-server",
+        action="store_true",
+        help="Run as dedicated tool execution server (executes read/write/bash tools)"
+    )
+    parser.add_argument(
+        "--tool-port",
+        type=int,
+        default=17616,
+        help="Port for tool execution server (default: 17616)"
+    )
+    parser.add_argument(
+        "--tool-host",
+        type=str,
+        default=None,
+        nargs='?',
+        const='',
+        help="URL of tool execution server. Use without value for auto-detected local IP"
+    )
+    parser.add_argument(
+        "--use-opencode-tools",
+        action="store_true",
+        help="Use opencode's tool definitions (~27k tokens). Default: use local tool server"
+    )
+    
+    # Version
+    parser.add_argument(
+        "--version",
+        action="version",
+        version="%(prog)s 0.1.0"
+    )
+    
+    return parser
+
+
+def parse_args(args: Optional[list] = None):
+    """Parse command line arguments.
+    
+    Args:
+        args: Command line arguments (defaults to sys.argv)
+        
+    Returns:
+        Parsed arguments namespace
+    """
+    parser = create_parser()
+    return parser.parse_args(args)
diff --git a/src/cli/test_runner.py b/src/cli/test_runner.py
new file mode 100644
index 0000000..b42ac16
--- /dev/null
+++ b/src/cli/test_runner.py
@@ -0,0 +1,81 @@
+"""Test mode runner for Local Swarm."""
+
+import asyncio
+from models.downloader import download_model_for_config
+from swarm import SwarmManager
+from interactive import show_startup_summary
+
+
+async def run_test(hardware, config) -> int:
+    """Run test mode with sample prompt.
+    
+    Args:
+        hardware: Hardware profile
+        config: Model configuration
+        
+    Returns:
+        Exit code (0 for success, 1 for error)
+    """
+    print("\n" + "=" * 70)
+    print("🧪 Test Mode: Running sample inference")
+    print("=" * 70)
+    
+    show_startup_summary(hardware, config)
+    
+    # Download model
+    print("\n⬇️  Downloading model...")
+    try:
+        model_path = download_model_for_config(config)
+        print(f"✓ Model ready at: {model_path}")
+    except Exception as e:
+        print(f"\n❌ Error downloading model: {e}")
+        return 1
+    
+    # Initialize swarm
+    print("\n🚀 Initializing swarm...")
+    try:
+        swarm = SwarmManager(
+            model_config=config,
+            hardware=hardware,
+            consensus_strategy="similarity"
+        )
+        
+        success = await swarm.initialize(str(model_path))
+        if not success:
+            print("❌ Failed to initialize swarm")
+            return 1
+    except Exception as e:
+        print(f"\n❌ Error initializing swarm: {e}")
+        return 1
+    
+    try:
+        # Test prompt
+        prompt = "Write a Python function to calculate factorial:"
+        print(f"\nPrompt: {prompt}\n")
+        print("Generating responses...\n")
+        
+        result = await swarm.generate(prompt, max_tokens=200)
+        
+        print("\n" + "=" * 70)
+        print("SELECTED RESPONSE:")
+        print("=" * 70)
+        print(result.selected_response.text)
+        print("\n" + "=" * 70)
+        print(f"Strategy: {result.strategy}")
+        print(f"Confidence: {result.confidence:.2f}")
+        print(f"Latency: {result.selected_response.latency_ms:.1f}ms")
+        print(f"Tokens/sec: {result.selected_response.tokens_per_second:.1f}")
+        
+        # Show all responses
+        print("\nAll responses received:")
+        for i, resp in enumerate(result.all_responses):
+            preview = resp.text[:60].replace('\n', ' ')
+            print(f"  Worker {i}: {preview}... ({resp.latency_ms:.1f}ms)")
+        
+        print("\n" + "=" * 70)
+        print("✅ Test complete")
+        print("=" * 70)
+        return 0
+        
+    finally:
+        await swarm.shutdown()
diff --git a/src/cli/tool_server.py b/src/cli/tool_server.py
new file mode 100644
index 0000000..78a3d19
--- /dev/null
+++ b/src/cli/tool_server.py
@@ -0,0 +1,69 @@
+"""Tool server for Local Swarm.
+
+Standalone tool execution server for distributed setups.
+"""
+
+import logging
+from typing import Optional
+
+from fastapi import FastAPI
+import uvicorn
+
+from tools.executor import ToolExecutor, set_tool_executor
+
+
+logger = logging.getLogger(__name__)
+
+
+def create_tool_server_app() -> FastAPI:
+    """Create the tool server FastAPI application.
+    
+    Returns:
+        Configured FastAPI application
+    """
+    app = FastAPI(title="Local Swarm Tool Server")
+    
+    @app.post("/v1/tools/execute")
+    async def execute_tool(request: dict):
+        tool_name = request.get("tool", "")
+        tool_args = request.get("arguments", {})
+        
+        # Get the global executor
+        from tools.executor import get_tool_executor
+        executor = get_tool_executor()
+        
+        if executor is None:
+            return {"result": "Error: No tool executor configured"}
+        
+        result = await executor.execute(tool_name, tool_args)
+        return {"result": result}
+    
+    @app.get("/health")
+    async def health():
+        return {"status": "healthy", "mode": "tool-server"}
+    
+    return app
+
+
+async def run_tool_server(host: str, port: int) -> None:
+    """Run the tool server.
+    
+    Args:
+        host: Host to bind to
+        port: Port to listen on
+    """
+    # Initialize local tool executor
+    tool_executor = ToolExecutor(tool_host_url=None)
+    set_tool_executor(tool_executor)
+    
+    app = create_tool_server_app()
+    
+    print(f"🔗 Tool server running at http://{host}:{port}")
+    print(f"   Endpoints:")
+    print(f"   - POST /v1/tools/execute")
+    print(f"   - GET  /health")
+    print(f"\n✅ Tool server ready!")
+    
+    config = uvicorn.Config(app, host=host, port=port, log_level="warning")
+    server = uvicorn.Server(config)
+    await server.serve()
diff --git a/src/utils/network.py b/src/utils/network.py
new file mode 100644
index 0000000..47e715e
--- /dev/null
+++ b/src/utils/network.py
@@ -0,0 +1,45 @@
+"""Network utilities for Local Swarm."""
+
+import socket
+from typing import Optional
+
+
+def get_local_ip() -> str:
+    """Get the local network IP address (private networks only).
+    
+    Returns:
+        Local IP address or 127.0.0.1 if detection fails
+    """
+    try:
+        # Create a socket and connect to a public DNS server
+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        s.settimeout(2)
+        # Try to connect to Google's DNS - this doesn't actually send data
+        s.connect(("8.8.8.8", 80))
+        ip = s.getsockname()[0]
+        s.close()
+        
+        # Check if it's a private IP
+        is_private = ip.startswith('192.168.')
+        
+        if is_private:
+            print(f"  📡 Detected local IP: {ip}")
+            return ip
+        else:
+            print(f"  ⚠️  IP {ip} is not a private network, binding to localhost")
+            return "127.0.0.1"
+    except Exception as e:
+        print(f"  ⚠️  Could not detect local IP: {e}, using localhost")
+        return "127.0.0.1"
+
+
+def is_private_ip(ip: str) -> bool:
+    """Check if an IP address is private.
+    
+    Args:
+        ip: IP address string
+        
+    Returns:
+        True if IP is private
+    """
+    return ip.startswith('192.168.') or ip.startswith('10.') or ip.startswith('172.16.')