fix: Update selector to check available quantizations on Mac
This commit is contained in:
@@ -215,12 +215,15 @@ def select_optimal_model(
|
||||
available_vram, available_ram = get_available_memory_with_offload(hardware, offload_percent)
|
||||
|
||||
# Get models to try (with appropriate quantizations)
|
||||
# On Mac, check which quantizations are actually available
|
||||
check_available = use_mlx
|
||||
|
||||
if preferred_model:
|
||||
from models.registry import get_model
|
||||
preferred = get_model(preferred_model, use_mlx=use_mlx)
|
||||
preferred = get_model(preferred_model, use_mlx=use_mlx, check_available=check_available)
|
||||
models = [preferred] if preferred else []
|
||||
else:
|
||||
models = list_models(use_mlx=use_mlx)
|
||||
models = list_models(use_mlx=use_mlx, check_available=check_available)
|
||||
|
||||
# Note: On Apple Silicon with MLX, multiple instances work fine in sequential mode
|
||||
# The swarm manager will handle sequential execution to avoid GPU conflicts
|
||||
|
||||
Reference in New Issue
Block a user