Files
local_swarm/config/models/model_metadata.json
T
sleepy dcca89d89a fix: OpenAI API compatibility for hollama and other clients
- Fixed ChatMessage.tool_calls to be Optional with default None (excluded when empty)
- Added logprobs field to ChatCompletionChoice (always included as null)
- Added stats and system_fingerprint to ChatCompletionResponse
- Fixed streaming response to use delta format (not message format)
- Fixed non-streaming response to include logprobs: null
- Updated tool instructions to include 'NO explanations'
- Added pytest-asyncio markers to async tests
- All 41 tests passing

This fixes the 'Cannot read properties of undefined (reading content)' error in hollama and ensures compatibility with OpenAI clients.
2026-02-25 19:39:05 +01:00

68 lines
1.9 KiB
JSON

{
"_comment": "Base model metadata (without quantization-specific data)",
"qwen2.5-coder": {
"name": "Qwen 2.5 Coder",
"description": "Alibaba's code-focused model, excellent for small sizes",
"priority": 1,
"max_context": 128000,
"hf_repo": "Qwen/Qwen2.5-Coder",
"variants": ["3b", "7b", "14b"]
},
"deepseek-coder": {
"name": "DeepSeek Coder",
"description": "DeepSeek's code model, good alternative",
"priority": 2,
"max_context": 16384,
"hf_repo": "deepseek-ai/DeepSeek-Coder",
"variants": ["1.3b", "6.7b"]
},
"deepseek-coder-v2-lite": {
"name": "DeepSeek Coder V2 Lite",
"description": "DeepSeek's V2 Lite model with better MLX support",
"priority": 2,
"max_context": 16384,
"hf_repo": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
"variants": ["instruct"]
},
"codellama": {
"name": "CodeLlama",
"description": "Meta's code model",
"priority": 3,
"max_context": 16384,
"hf_repo": "codellama/CodeLlama",
"variants": ["7b", "13b"]
},
"llama-3.2": {
"name": "Llama 3.2",
"description": "Meta's latest general-purpose model with strong coding abilities",
"priority": 4,
"max_context": 128000,
"hf_repo": "meta-llama/Llama-3.2",
"variants": ["1b", "3b"]
},
"phi-4": {
"name": "Phi-4",
"description": "Microsoft's efficient small model with excellent coding performance",
"priority": 5,
"max_context": 16384,
"hf_repo": "microsoft/Phi-4",
"variants": ["4b"]
},
"gemma-2": {
"name": "Gemma 2",
"description": "Google's open model, good for coding tasks",
"priority": 6,
"max_context": 8192,
"hf_repo": "google/gemma-2",
"variants": ["2b", "4b", "9b"]
},
"starcoder2": {
"name": "StarCoder2",
"description": "BigCode's open code generation model",
"priority": 7,
"max_context": 8192,
"hf_repo": "bigcode/starcoder2",
"variants": ["3b", "7b", "15b"]
}
}