dcca89d89a
- Fixed ChatMessage.tool_calls to be Optional with default None (excluded when empty) - Added logprobs field to ChatCompletionChoice (always included as null) - Added stats and system_fingerprint to ChatCompletionResponse - Fixed streaming response to use delta format (not message format) - Fixed non-streaming response to include logprobs: null - Updated tool instructions to include 'NO explanations' - Added pytest-asyncio markers to async tests - All 41 tests passing This fixes the 'Cannot read properties of undefined (reading content)' error in hollama and ensures compatibility with OpenAI clients.
68 lines
1.9 KiB
JSON
68 lines
1.9 KiB
JSON
{
|
|
"_comment": "Base model metadata (without quantization-specific data)",
|
|
"qwen2.5-coder": {
|
|
"name": "Qwen 2.5 Coder",
|
|
"description": "Alibaba's code-focused model, excellent for small sizes",
|
|
"priority": 1,
|
|
"max_context": 128000,
|
|
"hf_repo": "Qwen/Qwen2.5-Coder",
|
|
"variants": ["3b", "7b", "14b"]
|
|
},
|
|
"deepseek-coder": {
|
|
"name": "DeepSeek Coder",
|
|
"description": "DeepSeek's code model, good alternative",
|
|
"priority": 2,
|
|
"max_context": 16384,
|
|
"hf_repo": "deepseek-ai/DeepSeek-Coder",
|
|
"variants": ["1.3b", "6.7b"]
|
|
},
|
|
"deepseek-coder-v2-lite": {
|
|
"name": "DeepSeek Coder V2 Lite",
|
|
"description": "DeepSeek's V2 Lite model with better MLX support",
|
|
"priority": 2,
|
|
"max_context": 16384,
|
|
"hf_repo": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
|
|
"variants": ["instruct"]
|
|
},
|
|
"codellama": {
|
|
"name": "CodeLlama",
|
|
"description": "Meta's code model",
|
|
"priority": 3,
|
|
"max_context": 16384,
|
|
"hf_repo": "codellama/CodeLlama",
|
|
"variants": ["7b", "13b"]
|
|
},
|
|
"llama-3.2": {
|
|
"name": "Llama 3.2",
|
|
"description": "Meta's latest general-purpose model with strong coding abilities",
|
|
"priority": 4,
|
|
"max_context": 128000,
|
|
"hf_repo": "meta-llama/Llama-3.2",
|
|
"variants": ["1b", "3b"]
|
|
},
|
|
"phi-4": {
|
|
"name": "Phi-4",
|
|
"description": "Microsoft's efficient small model with excellent coding performance",
|
|
"priority": 5,
|
|
"max_context": 16384,
|
|
"hf_repo": "microsoft/Phi-4",
|
|
"variants": ["4b"]
|
|
},
|
|
"gemma-2": {
|
|
"name": "Gemma 2",
|
|
"description": "Google's open model, good for coding tasks",
|
|
"priority": 6,
|
|
"max_context": 8192,
|
|
"hf_repo": "google/gemma-2",
|
|
"variants": ["2b", "4b", "9b"]
|
|
},
|
|
"starcoder2": {
|
|
"name": "StarCoder2",
|
|
"description": "BigCode's open code generation model",
|
|
"priority": 7,
|
|
"max_context": 8192,
|
|
"hf_repo": "bigcode/starcoder2",
|
|
"variants": ["3b", "7b", "15b"]
|
|
}
|
|
}
|