Files
local_swarm/tests/test_auto_detection.py
T
sleepy dcca89d89a fix: OpenAI API compatibility for hollama and other clients
- Fixed ChatMessage.tool_calls to be Optional with default None (excluded when empty)
- Added logprobs field to ChatCompletionChoice (always included as null)
- Added stats and system_fingerprint to ChatCompletionResponse
- Fixed streaming response to use delta format (not message format)
- Fixed non-streaming response to include logprobs: null
- Updated tool instructions to include 'NO explanations'
- Added pytest-asyncio markers to async tests
- All 41 tests passing

This fixes the 'Cannot read properties of undefined (reading content)' error in hollama and ensures compatibility with OpenAI clients.
2026-02-25 19:39:05 +01:00

141 lines
5.2 KiB
Python

"""Test Apple Silicon MLX auto-detection and download."""
import sys
import os
from pathlib import Path
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
def test_apple_silicon_mlx_selection():
"""Test that Apple Silicon correctly selects MLX models."""
from hardware.detector import HardwareProfile, GPUInfo
from models.selector import select_optimal_model
# Mock Apple Silicon hardware
class MockAppleHardware:
os = "darwin"
cpu_cores = 12
ram_gb = 24.0
ram_available_gb = 12.0
is_apple_silicon = True
has_dedicated_gpu = False
gpu = GPUInfo(name="Apple Silicon GPU", vram_gb=24.0, driver_version=None)
available_memory_gb = 12.0
recommended_memory_gb = 12.0
hardware = MockAppleHardware()
# Test auto-detection (use_mlx=None)
print("=" * 60)
print("Apple Silicon MLX Auto-Detection Test")
print("=" * 60)
print("\n1. Testing auto-detection (use_mlx=None)...")
config = select_optimal_model(hardware, use_mlx=None)
assert config is not None, "Should find a model"
print(f" ✓ Model selected: {config.model.name}")
# Verify quantization is MLX format (4bit, 8bit, etc.)
print("\n2. Verifying MLX quantization format...")
is_mlx_format = 'bit' in config.quantization.name.lower()
assert is_mlx_format, f"Quantization should be MLX format (4bit/8bit), got {config.quantization.name}"
print(f" ✓ Quantization: {config.quantization.name} (MLX format)")
# Test repository name generation
print("\n3. Testing MLX repository name generation...")
from models.registry import get_model_hf_repo_mlx
mlx_repo = get_model_hf_repo_mlx(config.model.id, config.variant, config.quantization)
assert mlx_repo is not None, "MLX repository should be generated"
assert "mlx-community" in mlx_repo, "Should use mlx-community namespace"
assert "-Instruct-" in mlx_repo, "Should have -Instruct- suffix"
assert config.quantization.name in mlx_repo, "Should include quantization"
print(f" ✓ Repository: {mlx_repo}")
# Verify it's NOT using GGUF format
print("\n4. Verifying NOT using GGUF format...")
has_gguf = 'q4_k_m' in config.quantization.name or 'q5_k_m' in config.quantization.name
has_gguf_suffix = '-GGUF' in mlx_repo
assert not has_gguf, f"Should not use GGUF quantization names"
assert not has_gguf_suffix, f"Should not use GGUF repository suffix"
print(f" ✓ Not using GGUF format")
print("\n" + "=" * 60)
print("All Apple Silicon MLX tests passed!")
print("=" * 60)
def test_nvidia_gpu_gguf_selection():
"""Test that NVIDIA GPU correctly selects GGUF models."""
from hardware.detector import HardwareProfile, GPUInfo
from models.selector import select_optimal_model
# Mock NVIDIA hardware
class MockNvidiaHardware:
os = "linux"
cpu_cores = 8
ram_gb = 32.0
ram_available_gb = 20.0
is_apple_silicon = False
has_dedicated_gpu = True
gpu = GPUInfo(name="NVIDIA RTX 4090", vram_gb=24.0, driver_version="550.80")
available_memory_gb = 20.0
recommended_memory_gb = 20.0
hardware = MockNvidiaHardware()
print("\n" + "=" * 60)
print("NVIDIA GPU GGUF Auto-Detection Test")
print("=" * 60)
print("\n1. Testing auto-detection (use_mlx=None)...")
config = select_optimal_model(hardware, use_mlx=None)
assert config is not None, "Should find a model"
print(f" ✓ Model selected: {config.model.name}")
# Verify quantization is GGUF format (q4_k_m, q5_k_m, etc.)
print("\n2. Verifying GGUF quantization format...")
is_gguf_format = 'q' in config.quantization.name.lower()
assert is_gguf_format, f"Quantization should be GGUF format (q4_k_m/q5_k_m), got {config.quantization.name}"
print(f" ✓ Quantization: {config.quantization.name} (GGUF format)")
# Test repository name generation
print("\n3. Testing GGUF repository name generation...")
from models.registry import get_model_hf_repo
gguf_repo = get_model_hf_repo(config.model.id, config.variant, config.quantization)
assert gguf_repo is not None, "GGUF repository should be generated"
assert "-GGUF" in gguf_repo, "Should have -GGUF suffix"
print(f" ✓ Repository: {gguf_repo}")
# Verify it's NOT using MLX format
print("\n4. Verifying NOT using MLX format...")
has_mlx_format = 'bit' in config.quantization.name.lower() and config.quantization.name not in ['q4_k_m', 'q5_k_m', 'q6_k']
has_mlx_namespace = 'mlx-community' in gguf_repo
assert not has_mlx_namespace, f"Should not use mlx-community namespace"
print(f" ✓ Not using MLX format")
print("\n" + "=" * 60)
print("All NVIDIA GPU GGUF tests passed!")
print("=" * 60)
if __name__ == "__main__":
try:
test_apple_silicon_mlx_selection()
test_nvidia_gpu_gguf_selection()
print("\n" + "=" * 60)
print("ALL AUTO-DETECTION TESTS PASSED!")
print("=" * 60)
except AssertionError as e:
print(f"\n❌ Test failed: {e}")
sys.exit(1)
except Exception as e:
print(f"\n❌ Test error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)