32049c766c
Extracted from registry.py (437 → 194 lines): - config/models/mlx_quant_sizes.json - MLX quantization VRAM sizes - config/models/gguf_quant_sizes.json - GGUF quantization VRAM sizes - config/models/model_metadata.json - Model metadata Registry now loads from JSON files instead of hardcoded data. All 35 tests pass.
34 lines
976 B
JSON
34 lines
976 B
JSON
{
|
|
"_comment": "GGUF quantization sizes (GB) - accurate sizes",
|
|
"qwen2.5-coder": {
|
|
"3b": {"q4_k_m": 1.8, "q5_k_m": 2.2, "q6_k": 2.6},
|
|
"7b": {"q4_k_m": 4.5, "q5_k_m": 5.2, "q6_k": 6.0},
|
|
"14b": {"q4_k_m": 8.8, "q5_k_m": 10.5}
|
|
},
|
|
"deepseek-coder": {
|
|
"1.3b": {"q4_k_m": 0.8, "q5_k_m": 1.0},
|
|
"6.7b": {"q4_k_m": 4.2, "q5_k_m": 5.0}
|
|
},
|
|
"codellama": {
|
|
"7b": {"q4_k_m": 4.5, "q5_k_m": 5.2},
|
|
"13b": {"q4_k_m": 8.0, "q5_k_m": 9.5}
|
|
},
|
|
"llama-3.2": {
|
|
"3b": {"q4_k_m": 1.9, "q5_k_m": 2.3, "q6_k": 2.7},
|
|
"1b": {"q4_k_m": 0.7, "q5_k_m": 0.9}
|
|
},
|
|
"phi-4": {
|
|
"4b": {"q4_k_m": 2.4, "q5_k_m": 2.9, "q6_k": 3.4}
|
|
},
|
|
"gemma-2": {
|
|
"2b": {"q4_k_m": 1.5, "q5_k_m": 1.8},
|
|
"4b": {"q4_k_m": 2.7, "q5_k_m": 3.2, "q6_k": 3.8},
|
|
"9b": {"q4_k_m": 5.5, "q5_k_m": 6.5}
|
|
},
|
|
"starcoder2": {
|
|
"3b": {"q4_k_m": 1.9, "q5_k_m": 2.3},
|
|
"7b": {"q4_k_m": 4.5, "q5_k_m": 5.2, "q6_k": 6.1},
|
|
"15b": {"q4_k_m": 9.2, "q5_k_m": 10.8}
|
|
}
|
|
}
|