1acebbc6a2
Changes: - selector.py: 486 → 329 lines (-32%) - Extracted memory calculation functions to memory_calculator.py - Extracted constants to selector_config.json - Updated selector.py to load config and import from memory_calculator - All 35 tests pass
22 lines
537 B
JSON
22 lines
537 B
JSON
{
|
|
"_comment": "Model selection configuration constants",
|
|
"constraints": {
|
|
"min_instances": 1,
|
|
"max_instances": 8,
|
|
"optimal_max_instances": 5,
|
|
"memory_overhead_factor": 0.95,
|
|
"mlx_max_instances": 1
|
|
},
|
|
"context_options": {
|
|
"16384": "16K tokens",
|
|
"32768": "32K tokens (default)",
|
|
"65536": "64K tokens",
|
|
"131072": "128K tokens"
|
|
},
|
|
"offload_options": {
|
|
"0.0": "No offload (default) - 100% GPU",
|
|
"0.2": "20% offload - 80% GPU, 20% RAM",
|
|
"0.5": "50% offload - 50% GPU, 50% RAM"
|
|
}
|
|
}
|