fix: Remove slow HF API check from recommended config selection

- select_optimal_model was checking HF API for available quantizations
- This caused menu to hang/slow down when changing context
- Now only checks availability when browsing or custom config
- Recommended config uses default quantizations (faster)
This commit is contained in:
2026-02-23 23:54:57 +01:00
parent f2d0fddfa4
commit 2461f45ca8
2 changed files with 6 additions and 5 deletions
+2
View File
@@ -64,3 +64,5 @@
{"t":"reindex","f":"src/interactive.py","s":0}
{"t":"watch","files":33}
{"t":"reindex","f":"src/models/selector.py","s":0}
{"t":"watch","files":33}
{"t":"reindex","f":"src/models/selector.py","s":0}
+4 -5
View File
@@ -215,15 +215,14 @@ def select_optimal_model(
available_vram, available_ram = get_available_memory_with_offload(hardware, offload_percent)
# Get models to try (with appropriate quantizations)
# On Mac, check which quantizations are actually available
check_available = use_mlx
# Note: Don't check available quantizations here (too slow for menu rendering)
# Only check when user is actually browsing or selecting custom config
if preferred_model:
from models.registry import get_model
preferred = get_model(preferred_model, use_mlx=use_mlx, check_available=check_available)
preferred = get_model(preferred_model, use_mlx=use_mlx, check_available=False)
models = [preferred] if preferred else []
else:
models = list_models(use_mlx=use_mlx, check_available=check_available)
models = list_models(use_mlx=use_mlx, check_available=False)
# Note: On Apple Silicon with MLX, multiple instances work fine in sequential mode
# The swarm manager will handle sequential execution to avoid GPU conflicts