fix: Update selector to check available quantizations on Mac

This commit is contained in:
2026-02-23 23:52:29 +01:00
parent cb8e05e627
commit f2d0fddfa4
2 changed files with 7 additions and 2 deletions
+5 -2
View File
@@ -215,12 +215,15 @@ def select_optimal_model(
available_vram, available_ram = get_available_memory_with_offload(hardware, offload_percent)
# Get models to try (with appropriate quantizations)
# On Mac, check which quantizations are actually available
check_available = use_mlx
if preferred_model:
from models.registry import get_model
preferred = get_model(preferred_model, use_mlx=use_mlx)
preferred = get_model(preferred_model, use_mlx=use_mlx, check_available=check_available)
models = [preferred] if preferred else []
else:
models = list_models(use_mlx=use_mlx)
models = list_models(use_mlx=use_mlx, check_available=check_available)
# Note: On Apple Silicon with MLX, multiple instances work fine in sequential mode
# The swarm manager will handle sequential execution to avoid GPU conflicts