fix(registry): Update MLX model registry with verified HuggingFace repositories
- Fix DeepSeek Coder: Only 4bit available, 1.3b has no quantizations
- Fix CodeLlama: Use correct 'hf-{quant}bit-mlx' suffix naming
- Fix StarCoder2: 3b/7b only have 4bit, 15b has 4bit/8bit
- Add DeepSeek Coder V2 Lite: New model with 4/6/8bit support
- Update repository naming for all MLX models to match actual HF repos
Verified against HuggingFace mlx-community organization (2025-02-25)
This commit is contained in:
+71
-33
@@ -87,7 +87,7 @@ class Model:
|
||||
|
||||
# MLX quantization sizes (GB) based on mlx-community models
|
||||
# HARDOCODED: These are verified to exist on HuggingFace mlx-community
|
||||
# Last verified: 2025-02-23
|
||||
# Last verified: 2025-02-25
|
||||
# DO NOT make API calls on startup - use this hardcoded list
|
||||
MLX_QUANT_SIZES = {
|
||||
# Format: model_id: {variant_size: {quant_bit: vram_gb}}
|
||||
@@ -101,16 +101,15 @@ MLX_QUANT_SIZES = {
|
||||
# 5bit does NOT exist for 14b
|
||||
},
|
||||
"deepseek-coder": {
|
||||
"1.3b": {"4bit": 0.8, "6bit": 1.2},
|
||||
# 3bit, 5bit, 8bit do NOT exist
|
||||
"6.7b": {"4bit": 3.9, "6bit": 5.9, "8bit": 7.9},
|
||||
# 3bit, 5bit do NOT exist
|
||||
"1.3b": {}, # Only base models exist, no quantized versions
|
||||
"6.7b": {"4bit": 3.9}, # Only 4bit exists (base and instruct)
|
||||
},
|
||||
"deepseek-coder-v2-lite": {
|
||||
"instruct": {"4bit": 4.5, "6bit": 6.5, "8bit": 8.5}, # V2 Lite has better MLX support
|
||||
},
|
||||
"codellama": {
|
||||
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
|
||||
# 3bit, 5bit do NOT exist
|
||||
"13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2},
|
||||
# 3bit, 5bit do NOT exist
|
||||
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1}, # Instruct variants only
|
||||
"13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2}, # Instruct variants only
|
||||
},
|
||||
"llama-3.2": {
|
||||
"1b": {"4bit": 0.6, "8bit": 1.2},
|
||||
@@ -131,12 +130,9 @@ MLX_QUANT_SIZES = {
|
||||
# 3bit, 5bit do NOT exist
|
||||
},
|
||||
"starcoder2": {
|
||||
"3b": {"4bit": 1.8, "6bit": 2.6, "8bit": 3.5},
|
||||
# 3bit, 5bit do NOT exist
|
||||
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
|
||||
# 3bit, 5bit do NOT exist
|
||||
"15b": {"4bit": 8.8, "6bit": 13.2, "8bit": 17.6},
|
||||
# 3bit, 5bit do NOT exist
|
||||
"3b": {"4bit": 1.8}, # Only 4bit exists
|
||||
"7b": {"4bit": 4.1}, # Only 4bit exists
|
||||
"15b": {"4bit": 8.8, "8bit": 17.6}, # Has 4bit base, 4bit/8bit instruct variants
|
||||
},
|
||||
}
|
||||
|
||||
@@ -165,6 +161,13 @@ MODEL_METADATA = {
|
||||
"max_context": 16384,
|
||||
"variants": ["1.3b", "6.7b"],
|
||||
},
|
||||
"deepseek-coder-v2-lite": {
|
||||
"name": "DeepSeek Coder V2 Lite",
|
||||
"description": "DeepSeek's V2 Lite model with better MLX support",
|
||||
"priority": 2,
|
||||
"max_context": 16384,
|
||||
"variants": ["instruct"],
|
||||
},
|
||||
"codellama": {
|
||||
"name": "CodeLlama",
|
||||
"description": "Meta's code model",
|
||||
@@ -364,25 +367,60 @@ def get_model_hf_repo_mlx(model_id: str, variant: ModelVariant, quant: Quantizat
|
||||
"q8": "8bit",
|
||||
}
|
||||
|
||||
# MLX quantized models are in mlx-community org with -{quant}bit suffix
|
||||
# Map base model names to mlx-community quantized versions
|
||||
mlx_repo_map = {
|
||||
"qwen2.5-coder": f"mlx-community/Qwen2.5-Coder-{variant.size.capitalize()}-Instruct",
|
||||
"deepseek-coder": f"mlx-community/deepseek-coder-{variant.size}-base",
|
||||
"codellama": f"mlx-community/CodeLlama-{variant.size}-Instruct",
|
||||
"llama-3.2": f"mlx-community/Llama-3.2-{variant.size}-Instruct",
|
||||
"phi-4": f"mlx-community/phi-4",
|
||||
"gemma-2": f"mlx-community/gemma-2-{variant.size}-it",
|
||||
"starcoder2": f"mlx-community/starcoder2-{variant.size}",
|
||||
}
|
||||
# Convert GGUF quant name to MLX quant name
|
||||
mlx_quant = gguf_to_mlx_quant.get(quant.name, quant.name) if quant else None
|
||||
|
||||
base_repo = mlx_repo_map.get(model_id, "")
|
||||
if base_repo and quant:
|
||||
# Convert GGUF quant name to MLX quant name
|
||||
mlx_quant = gguf_to_mlx_quant.get(quant.name, quant.name)
|
||||
# Append quantization suffix
|
||||
return f"{base_repo}-{mlx_quant}"
|
||||
return base_repo
|
||||
# MLX quantized models are in mlx-community org
|
||||
# Repository naming varies by model - these are verified to exist on HF
|
||||
if model_id == "qwen2.5-coder":
|
||||
# Qwen: mlx-community/Qwen2.5-Coder-{Size}-Instruct-{quant}bit
|
||||
return f"mlx-community/Qwen2.5-Coder-{variant.size.capitalize()}-Instruct-{mlx_quant}"
|
||||
|
||||
elif model_id == "deepseek-coder":
|
||||
# DeepSeek: Very limited MLX support
|
||||
# 1.3b: Only base models exist (no quantized versions)
|
||||
# 6.7b: mlx-community/deepseek-coder-6.7b-base-4bit-mlx (base only)
|
||||
# mlx-community/deepseek-coder-6.7b-instruct-hf-4bit-mlx (instruct)
|
||||
if variant.size == "1.3b":
|
||||
# Only base model exists, no quantization
|
||||
return "mlx-community/deepseek-coder-1.3b-base-mlx"
|
||||
elif variant.size == "6.7b":
|
||||
# Use instruct variant (better for coding) with hf-{quant}bit-mlx suffix
|
||||
return f"mlx-community/deepseek-coder-6.7b-instruct-hf-{mlx_quant}-mlx"
|
||||
|
||||
elif model_id == "deepseek-coder-v2-lite":
|
||||
# DeepSeek Coder V2 Lite: Has good MLX support
|
||||
# mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx
|
||||
# mlx-community/DeepSeek-Coder-V2-Lite-Instruct-6bit
|
||||
# mlx-community/DeepSeek-Coder-V2-Lite-Instruct-8bit
|
||||
if mlx_quant == "4bit":
|
||||
return "mlx-community/DeepSeek-Coder-V2-Lite-Instruct-4bit-mlx"
|
||||
else:
|
||||
# 6bit and 8bit don't have -mlx suffix
|
||||
return f"mlx-community/DeepSeek-Coder-V2-Lite-Instruct-{mlx_quant}"
|
||||
|
||||
elif model_id == "codellama":
|
||||
# CodeLlama: mlx-community/CodeLlama-{size}-Instruct-hf-{quant}bit-mlx
|
||||
# Only Instruct variants have quantized versions
|
||||
return f"mlx-community/CodeLlama-{variant.size}-Instruct-hf-{mlx_quant}-mlx"
|
||||
|
||||
elif model_id == "llama-3.2":
|
||||
# Llama 3.2: mlx-community/Llama-3.2-{size}-Instruct-{quant}bit
|
||||
return f"mlx-community/Llama-3.2-{variant.size}-Instruct-{mlx_quant}"
|
||||
|
||||
elif model_id == "phi-4":
|
||||
# Phi-4: mlx-community/phi-4-{quant}bit
|
||||
return f"mlx-community/phi-4-{mlx_quant}"
|
||||
|
||||
elif model_id == "gemma-2":
|
||||
# Gemma 2: mlx-community/gemma-2-{size}-it-{quant}bit
|
||||
return f"mlx-community/gemma-2-{variant.size}-it-{mlx_quant}"
|
||||
|
||||
elif model_id == "starcoder2":
|
||||
# StarCoder2: mlx-community/starcoder2-{size}-{quant}bit
|
||||
return f"mlx-community/starcoder2-{variant.size}-{mlx_quant}"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def get_model_filename(model_id: str, variant: ModelVariant, quant: QuantizationConfig) -> str:
|
||||
|
||||
Reference in New Issue
Block a user