fix: Remove slow HF API check from recommended config selection
- select_optimal_model was checking HF API for available quantizations - This caused menu to hang/slow down when changing context - Now only checks availability when browsing or custom config - Recommended config uses default quantizations (faster)
This commit is contained in:
@@ -64,3 +64,5 @@
|
||||
{"t":"reindex","f":"src/interactive.py","s":0}
|
||||
{"t":"watch","files":33}
|
||||
{"t":"reindex","f":"src/models/selector.py","s":0}
|
||||
{"t":"watch","files":33}
|
||||
{"t":"reindex","f":"src/models/selector.py","s":0}
|
||||
|
||||
@@ -215,15 +215,14 @@ def select_optimal_model(
|
||||
available_vram, available_ram = get_available_memory_with_offload(hardware, offload_percent)
|
||||
|
||||
# Get models to try (with appropriate quantizations)
|
||||
# On Mac, check which quantizations are actually available
|
||||
check_available = use_mlx
|
||||
|
||||
# Note: Don't check available quantizations here (too slow for menu rendering)
|
||||
# Only check when user is actually browsing or selecting custom config
|
||||
if preferred_model:
|
||||
from models.registry import get_model
|
||||
preferred = get_model(preferred_model, use_mlx=use_mlx, check_available=check_available)
|
||||
preferred = get_model(preferred_model, use_mlx=use_mlx, check_available=False)
|
||||
models = [preferred] if preferred else []
|
||||
else:
|
||||
models = list_models(use_mlx=use_mlx, check_available=check_available)
|
||||
models = list_models(use_mlx=use_mlx, check_available=False)
|
||||
|
||||
# Note: On Apple Silicon with MLX, multiple instances work fine in sequential mode
|
||||
# The swarm manager will handle sequential execution to avoid GPU conflicts
|
||||
|
||||
Reference in New Issue
Block a user