fix(registry): Update MLX model registry with verified HuggingFace repositories

- Fix DeepSeek Coder: Only 4bit available, 1.3b has no quantizations
- Fix CodeLlama: Use correct 'hf-{quant}bit-mlx' suffix naming
- Fix StarCoder2: 3b/7b only have 4bit, 15b has 4bit/8bit
- Add DeepSeek Coder V2 Lite: New model with 4/6/8bit support
- Update repository naming for all MLX models to match actual HF repos

Verified against HuggingFace mlx-community organization (2025-02-25)
This commit is contained in:
2026-02-25 02:34:34 +01:00
parent af2d616f76
commit 3dbc76de04
+71 -33
View File
@@ -87,7 +87,7 @@ class Model:
# MLX quantization sizes (GB) based on mlx-community models
# HARDOCODED: These are verified to exist on HuggingFace mlx-community
# Last verified: 2025-02-23
# Last verified: 2025-02-25
# DO NOT make API calls on startup - use this hardcoded list
MLX_QUANT_SIZES = {
# Format: model_id: {variant_size: {quant_bit: vram_gb}}
@@ -101,16 +101,15 @@ MLX_QUANT_SIZES = {
# 5bit does NOT exist for 14b
},
"deepseek-coder": {
"1.3b": {"4bit": 0.8, "6bit": 1.2},
# 3bit, 5bit, 8bit do NOT exist
"6.7b": {"4bit": 3.9, "6bit": 5.9, "8bit": 7.9},
# 3bit, 5bit do NOT exist
"1.3b": {}, # Only base models exist, no quantized versions
"6.7b": {"4bit": 3.9}, # Only 4bit exists (base and instruct)
},
"deepseek-coder-v2-lite": {
"instruct": {"4bit": 4.5, "6bit": 6.5, "8bit": 8.5}, # V2 Lite has better MLX support
},
"codellama": {
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
# 3bit, 5bit do NOT exist
"13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2},
# 3bit, 5bit do NOT exist
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1}, # Instruct variants only
"13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2}, # Instruct variants only
},
"llama-3.2": {
"1b": {"4bit": 0.6, "8bit": 1.2},
@@ -131,12 +130,9 @@ MLX_QUANT_SIZES = {
# 3bit, 5bit do NOT exist
},
"starcoder2": {
"3b": {"4bit": 1.8, "6bit": 2.6, "8bit": 3.5},
# 3bit, 5bit do NOT exist
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
# 3bit, 5bit do NOT exist
"15b": {"4bit": 8.8, "6bit": 13.2, "8bit": 17.6},
# 3bit, 5bit do NOT exist
"3b": {"4bit": 1.8}, # Only 4bit exists
"7b": {"4bit": 4.1}, # Only 4bit exists
"15b": {"4bit": 8.8, "8bit": 17.6}, # Has 4bit base, 4bit/8bit instruct variants
},
}
@@ -165,6 +161,13 @@ MODEL_METADATA = {
"max_context": 16384,
"variants": ["1.3b", "6.7b"],
},
"deepseek-coder-v2-lite": {
"name": "DeepSeek Coder V2 Lite",
"description": "DeepSeek's V2 Lite model with better MLX support",
"priority": 2,
"max_context": 16384,
"variants": ["instruct"],
},
"codellama": {
"name": "CodeLlama",
"description": "Meta's code model",
@@ -364,25 +367,60 @@ def get_model_hf_repo_mlx(model_id: str, variant: ModelVariant, quant: Quantizat
"q8": "8bit",
}
# MLX quantized models are in mlx-community org with -{quant}bit suffix
# Map base model names to mlx-community quantized versions
mlx_repo_map = {
"qwen2.5-coder": f"mlx-community/Qwen2.5-Coder-{variant.size.capitalize()}-Instruct",
"deepseek-coder": f"mlx-community/deepseek-coder-{variant.size}-base",
"codellama": f"mlx-community/CodeLlama-{variant.size}-Instruct",
"llama-3.2": f"mlx-community/Llama-3.2-{variant.size}-Instruct",
"phi-4": f"mlx-community/phi-4",
"gemma-2": f"mlx-community/gemma-2-{variant.size}-it",
"starcoder2": f"mlx-community/starcoder2-{variant.size}",
}
# Convert GGUF quant name to MLX quant name
mlx_quant = gguf_to_mlx_quant.get(quant.name, quant.name) if quant else None
base_repo = mlx_repo_map.get(model_id, "")
if base_repo and quant:
# Convert GGUF quant name to MLX quant name
mlx_quant = gguf_to_mlx_quant.get(quant.name, quant.name)
# Append quantization suffix
return f"{base_repo}-{mlx_quant}"
return base_repo
# MLX quantized models are in mlx-community org
# Repository naming varies by model - these are verified to exist on HF
if model_id == "qwen2.5-coder":
# Qwen: mlx-community/Qwen2.5-Coder-{Size}-Instruct-{quant}bit
return f"mlx-community/Qwen2.5-Coder-{variant.size.capitalize()}-Instruct-{mlx_quant}"
elif model_id == "deepseek-coder":
# DeepSeek: Very limited MLX support
# 1.3b: Only base models exist (no quantized versions)
# 6.7b: mlx-community/deepseek-coder-6.7b-base-4bit-mlx (base only)
# mlx-community/deepseek-coder-6.7b-instruct-hf-4bit-mlx (instruct)
if variant.size == "1.3b":
# Only base model exists, no quantization
return "mlx-community/deepseek-coder-1.3b-base-mlx"
elif variant.size == "6.7b":
# Use instruct variant (better for coding) with hf-{quant}bit-mlx suffix
return f"mlx-community/deepseek-coder-6.7b-instruct-hf-{mlx_quant}-mlx"
elif model_id == "deepseek-coder-v2-lite":
# DeepSeek Coder V2 Lite: Has good MLX support
# mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx
# mlx-community/DeepSeek-Coder-V2-Lite-Instruct-6bit
# mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit
if mlx_quant == "4bit":
return "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx"
else:
# 6bit and 8bit don't have -mlx suffix
return f"mlx-community/DeepSeek-Coder-V2-Lite-Instruct-{mlx_quant}"
elif model_id == "codellama":
# CodeLlama: mlx-community/CodeLlama-{size}-Instruct-hf-{quant}bit-mlx
# Only Instruct variants have quantized versions
return f"mlx-community/CodeLlama-{variant.size}-Instruct-hf-{mlx_quant}-mlx"
elif model_id == "llama-3.2":
# Llama 3.2: mlx-community/Llama-3.2-{size}-Instruct-{quant}bit
return f"mlx-community/Llama-3.2-{variant.size}-Instruct-{mlx_quant}"
elif model_id == "phi-4":
# Phi-4: mlx-community/phi-4-{quant}bit
return f"mlx-community/phi-4-{mlx_quant}"
elif model_id == "gemma-2":
# Gemma 2: mlx-community/gemma-2-{size}-it-{quant}bit
return f"mlx-community/gemma-2-{variant.size}-it-{mlx_quant}"
elif model_id == "starcoder2":
# StarCoder2: mlx-community/starcoder2-{size}-{quant}bit
return f"mlx-community/starcoder2-{variant.size}-{mlx_quant}"
return ""
def get_model_filename(model_id: str, variant: ModelVariant, quant: QuantizationConfig) -> str: