{ "_comment": "Model selection configuration constants", "constraints": { "min_instances": 1, "max_instances": 8, "optimal_max_instances": 5, "memory_overhead_factor": 0.95, "mlx_max_instances": 1 }, "context_options": { "16384": "16K tokens", "32768": "32K tokens (default)", "65536": "64K tokens", "131072": "128K tokens" }, "offload_options": { "0.0": "No offload (default) - 100% GPU", "0.2": "20% offload - 80% GPU, 20% RAM", "0.5": "50% offload - 50% GPU, 50% RAM" } }