Files
sleepy 32049c766c refactor(models): extract hardcoded data to JSON configs
Extracted from registry.py (437 → 194 lines):
- config/models/mlx_quant_sizes.json - MLX quantization VRAM sizes
- config/models/gguf_quant_sizes.json - GGUF quantization VRAM sizes
- config/models/model_metadata.json - Model metadata

Registry now loads from JSON files instead of hardcoded data.
All 35 tests pass.
2026-02-25 13:20:29 +01:00

34 lines
976 B
JSON

{
"_comment": "GGUF quantization sizes (GB) - accurate sizes",
"qwen2.5-coder": {
"3b": {"q4_k_m": 1.8, "q5_k_m": 2.2, "q6_k": 2.6},
"7b": {"q4_k_m": 4.5, "q5_k_m": 5.2, "q6_k": 6.0},
"14b": {"q4_k_m": 8.8, "q5_k_m": 10.5}
},
"deepseek-coder": {
"1.3b": {"q4_k_m": 0.8, "q5_k_m": 1.0},
"6.7b": {"q4_k_m": 4.2, "q5_k_m": 5.0}
},
"codellama": {
"7b": {"q4_k_m": 4.5, "q5_k_m": 5.2},
"13b": {"q4_k_m": 8.0, "q5_k_m": 9.5}
},
"llama-3.2": {
"3b": {"q4_k_m": 1.9, "q5_k_m": 2.3, "q6_k": 2.7},
"1b": {"q4_k_m": 0.7, "q5_k_m": 0.9}
},
"phi-4": {
"4b": {"q4_k_m": 2.4, "q5_k_m": 2.9, "q6_k": 3.4}
},
"gemma-2": {
"2b": {"q4_k_m": 1.5, "q5_k_m": 1.8},
"4b": {"q4_k_m": 2.7, "q5_k_m": 3.2, "q6_k": 3.8},
"9b": {"q4_k_m": 5.5, "q5_k_m": 6.5}
},
"starcoder2": {
"3b": {"q4_k_m": 1.9, "q5_k_m": 2.3},
"7b": {"q4_k_m": 4.5, "q5_k_m": 5.2, "q6_k": 6.1},
"15b": {"q4_k_m": 9.2, "q5_k_m": 10.8}
}
}