fix(registry): Update MLX model registry with verified HuggingFace repositories

- Fix DeepSeek Coder: Only 4bit available, 1.3b has no quantizations - Fix CodeLlama: Use correct 'hf-{quant}bit-mlx' suffix naming - Fix StarCoder2: 3b/7b only have 4bit, 15b has 4bit/8bit - Add DeepSeek Coder V2 Lite: New model with 4/6/8bit support - Update repository naming for all MLX models to match actual HF repos Verified against HuggingFace mlx-community organization (2025-02-25)
2026-02-25 02:34:34 +01:00
parent af2d616f76
commit 3dbc76de04
1 changed files with 71 additions and 33 deletions
@@ -87,7 +87,7 @@ class Model:

 # MLX quantization sizes (GB) based on mlx-community models
 # HARDOCODED: These are verified to exist on HuggingFace mlx-community
-# Last verified: 2025-02-23
+# Last verified: 2025-02-25
 # DO NOT make API calls on startup - use this hardcoded list
 MLX_QUANT_SIZES = {
    # Format: model_id: {variant_size: {quant_bit: vram_gb}}
@@ -101,16 +101,15 @@ MLX_QUANT_SIZES = {
        # 5bit does NOT exist for 14b
    },
    "deepseek-coder": {
-        "1.3b": {"4bit": 0.8, "6bit": 1.2},
-        # 3bit, 5bit, 8bit do NOT exist
-        "6.7b": {"4bit": 3.9, "6bit": 5.9, "8bit": 7.9},
-        # 3bit, 5bit do NOT exist
+        "1.3b": {},  # Only base models exist, no quantized versions
+        "6.7b": {"4bit": 3.9},  # Only 4bit exists (base and instruct)
+    },
+    "deepseek-coder-v2-lite": {
+        "instruct": {"4bit": 4.5, "6bit": 6.5, "8bit": 8.5},  # V2 Lite has better MLX support
    },
    "codellama": {
-        "7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
-        # 3bit, 5bit do NOT exist
-        "13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2},
-        # 3bit, 5bit do NOT exist
+        "7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},  # Instruct variants only
+        "13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2},  # Instruct variants only
    },
    "llama-3.2": {
        "1b": {"4bit": 0.6, "8bit": 1.2},
@@ -131,12 +130,9 @@ MLX_QUANT_SIZES = {
        # 3bit, 5bit do NOT exist
    },
    "starcoder2": {
-        "3b": {"4bit": 1.8, "6bit": 2.6, "8bit": 3.5},
-        # 3bit, 5bit do NOT exist
-        "7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
-        # 3bit, 5bit do NOT exist
-        "15b": {"4bit": 8.8, "6bit": 13.2, "8bit": 17.6},
-        # 3bit, 5bit do NOT exist
+        "3b": {"4bit": 1.8},  # Only 4bit exists
+        "7b": {"4bit": 4.1},  # Only 4bit exists
+        "15b": {"4bit": 8.8, "8bit": 17.6},  # Has 4bit base, 4bit/8bit instruct variants
    },
 }

@@ -165,6 +161,13 @@ MODEL_METADATA = {
        "max_context": 16384,
        "variants": ["1.3b", "6.7b"],
    },
+    "deepseek-coder-v2-lite": {
+        "name": "DeepSeek Coder V2 Lite",
+        "description": "DeepSeek's V2 Lite model with better MLX support",
+        "priority": 2,
+        "max_context": 16384,
+        "variants": ["instruct"],
+    },
    "codellama": {
        "name": "CodeLlama",
        "description": "Meta's code model",
@@ -364,25 +367,60 @@ def get_model_hf_repo_mlx(model_id: str, variant: ModelVariant, quant: Quantizat
        "q8": "8bit",
    }
    
-    # MLX quantized models are in mlx-community org with -{quant}bit suffix
-    # Map base model names to mlx-community quantized versions
-    mlx_repo_map = {
-        "qwen2.5-coder": f"mlx-community/Qwen2.5-Coder-{variant.size.capitalize()}-Instruct",
-        "deepseek-coder": f"mlx-community/deepseek-coder-{variant.size}-base",
-        "codellama": f"mlx-community/CodeLlama-{variant.size}-Instruct",
-        "llama-3.2": f"mlx-community/Llama-3.2-{variant.size}-Instruct",
-        "phi-4": f"mlx-community/phi-4",
-        "gemma-2": f"mlx-community/gemma-2-{variant.size}-it",
-        "starcoder2": f"mlx-community/starcoder2-{variant.size}",
-    }
+    # Convert GGUF quant name to MLX quant name
+    mlx_quant = gguf_to_mlx_quant.get(quant.name, quant.name) if quant else None
    
-    base_repo = mlx_repo_map.get(model_id, "")
-    if base_repo and quant:
-        # Convert GGUF quant name to MLX quant name
-        mlx_quant = gguf_to_mlx_quant.get(quant.name, quant.name)
-        # Append quantization suffix
-        return f"{base_repo}-{mlx_quant}"
-    return base_repo
+    # MLX quantized models are in mlx-community org
+    # Repository naming varies by model - these are verified to exist on HF
+    if model_id == "qwen2.5-coder":
+        # Qwen: mlx-community/Qwen2.5-Coder-{Size}-Instruct-{quant}bit
+        return f"mlx-community/Qwen2.5-Coder-{variant.size.capitalize()}-Instruct-{mlx_quant}"
+    
+    elif model_id == "deepseek-coder":
+        # DeepSeek: Very limited MLX support
+        # 1.3b: Only base models exist (no quantized versions)
+        # 6.7b: mlx-community/deepseek-coder-6.7b-base-4bit-mlx (base only)
+        #       mlx-community/deepseek-coder-6.7b-instruct-hf-4bit-mlx (instruct)
+        if variant.size == "1.3b":
+            # Only base model exists, no quantization
+            return "mlx-community/deepseek-coder-1.3b-base-mlx"
+        elif variant.size == "6.7b":
+            # Use instruct variant (better for coding) with hf-{quant}bit-mlx suffix
+            return f"mlx-community/deepseek-coder-6.7b-instruct-hf-{mlx_quant}-mlx"
+
+    elif model_id == "deepseek-coder-v2-lite":
+        # DeepSeek Coder V2 Lite: Has good MLX support
+        # mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx
+        # mlx-community/DeepSeek-Coder-V2-Lite-Instruct-6bit
+        # mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit
+        if mlx_quant == "4bit":
+            return "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx"
+        else:
+            # 6bit and 8bit don't have -mlx suffix
+            return f"mlx-community/DeepSeek-Coder-V2-Lite-Instruct-{mlx_quant}"
+    
+    elif model_id == "codellama":
+        # CodeLlama: mlx-community/CodeLlama-{size}-Instruct-hf-{quant}bit-mlx
+        # Only Instruct variants have quantized versions
+        return f"mlx-community/CodeLlama-{variant.size}-Instruct-hf-{mlx_quant}-mlx"
+    
+    elif model_id == "llama-3.2":
+        # Llama 3.2: mlx-community/Llama-3.2-{size}-Instruct-{quant}bit
+        return f"mlx-community/Llama-3.2-{variant.size}-Instruct-{mlx_quant}"
+    
+    elif model_id == "phi-4":
+        # Phi-4: mlx-community/phi-4-{quant}bit
+        return f"mlx-community/phi-4-{mlx_quant}"
+    
+    elif model_id == "gemma-2":
+        # Gemma 2: mlx-community/gemma-2-{size}-it-{quant}bit
+        return f"mlx-community/gemma-2-{variant.size}-it-{mlx_quant}"
+    
+    elif model_id == "starcoder2":
+        # StarCoder2: mlx-community/starcoder2-{size}-{quant}bit
+        return f"mlx-community/starcoder2-{variant.size}-{mlx_quant}"
+    
+    return ""


 def get_model_filename(model_id: str, variant: ModelVariant, quant: QuantizationConfig) -> str: