Files
local_swarm/config/models/mlx_quant_sizes.json
T
sleepy 32049c766c refactor(models): extract hardcoded data to JSON configs
Extracted from registry.py (437 → 194 lines):
- config/models/mlx_quant_sizes.json - MLX quantization VRAM sizes
- config/models/gguf_quant_sizes.json - GGUF quantization VRAM sizes
- config/models/model_metadata.json - Model metadata

Registry now loads from JSON files instead of hardcoded data.
All 35 tests pass.
2026-02-25 13:20:29 +01:00

37 lines
1.2 KiB
JSON

{
"_comment": "MLX quantization sizes (GB) based on mlx-community models. HARDOCODED: These are verified to exist on HuggingFace mlx-community. Last verified: 2025-02-25. DO NOT make API calls on startup - use this hardcoded list.",
"qwen2.5-coder": {
"3b": {"3bit": 1.3, "4bit": 1.7, "6bit": 2.5, "8bit": 3.3},
"7b": {"3bit": 3.1, "4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
"14b": {"3bit": 6.2, "4bit": 8.2, "6bit": 12.2, "8bit": 16.2}
},
"deepseek-coder": {
"1.3b": {},
"6.7b": {"4bit": 3.9}
},
"deepseek-coder-v2-lite": {
"instruct": {"4bit": 4.5, "6bit": 6.5, "8bit": 8.5}
},
"codellama": {
"7b": {"4bit": 4.1, "6bit": 6.1, "8bit": 8.1},
"13b": {"4bit": 7.6, "6bit": 11.4, "8bit": 15.2}
},
"llama-3.2": {
"1b": {"4bit": 0.6, "8bit": 1.2},
"3b": {"4bit": 1.8, "6bit": 2.6, "8bit": 3.5}
},
"phi-4": {
"4b": {"4bit": 2.4, "6bit": 3.6, "8bit": 4.8}
},
"gemma-2": {
"2b": {"4bit": 1.2, "6bit": 1.8, "8bit": 2.4},
"4b": {"4bit": 2.4, "6bit": 3.6, "8bit": 4.8},
"9b": {"4bit": 5.3, "6bit": 7.9, "8bit": 10.5}
},
"starcoder2": {
"3b": {"4bit": 1.8},
"7b": {"4bit": 4.1},
"15b": {"4bit": 8.8, "8bit": 17.6}
}
}