32049c766c
Extracted from registry.py (437 → 194 lines): - config/models/mlx_quant_sizes.json - MLX quantization VRAM sizes - config/models/gguf_quant_sizes.json - GGUF quantization VRAM sizes - config/models/model_metadata.json - Model metadata Registry now loads from JSON files instead of hardcoded data. All 35 tests pass.
37 lines
1.2 KiB
JSON
37 lines
1.2 KiB
JSON
{
|
|
"_comment": "MLX quantization sizes (GB) based on mlx-community models. HARDOCODED: These are verified to exist on HuggingFace mlx-community. Last verified: 2025-02-25. DO NOT make API calls on startup - use this hardcoded list.",
|
|
"qwen2.5-coder": {
|
|
"3b": {"3bit": 1.3, "4bit": 1.7, "6bit": 2.5, "8bit": 3.3},
|
|
"7b": {"3bit": 3.1, "4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
|
|
"14b": {"3bit": 6.2, "4bit": 8.2, "6bit": 12.2, "8bit": 16.2}
|
|
},
|
|
"deepseek-coder": {
|
|
"1.3b": {},
|
|
"6.7b": {"4bit": 3.9}
|
|
},
|
|
"deepseek-coder-v2-lite": {
|
|
"instruct": {"4bit": 4.5, "6bit": 6.5, "8bit": 8.5}
|
|
},
|
|
"codellama": {
|
|
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
|
|
"13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2}
|
|
},
|
|
"llama-3.2": {
|
|
"1b": {"4bit": 0.6, "8bit": 1.2},
|
|
"3b": {"4bit": 1.8, "6bit": 2.6, "8bit": 3.5}
|
|
},
|
|
"phi-4": {
|
|
"4b": {"4bit": 2.4, "6bit": 3.6, "8bit": 4.8}
|
|
},
|
|
"gemma-2": {
|
|
"2b": {"4bit": 1.2, "6bit": 1.8, "8bit": 2.4},
|
|
"4b": {"4bit": 2.4, "6bit": 3.6, "8bit": 4.8},
|
|
"9b": {"4bit": 5.3, "6bit": 7.9, "8bit": 10.5}
|
|
},
|
|
"starcoder2": {
|
|
"3b": {"4bit": 1.8},
|
|
"7b": {"4bit": 4.1},
|
|
"15b": {"4bit": 8.8, "8bit": 17.6}
|
|
}
|
|
}
|