dcca89d89a
- Fixed ChatMessage.tool_calls to be Optional with default None (excluded when empty) - Added logprobs field to ChatCompletionChoice (always included as null) - Added stats and system_fingerprint to ChatCompletionResponse - Fixed streaming response to use delta format (not message format) - Fixed non-streaming response to include logprobs: null - Updated tool instructions to include 'NO explanations' - Added pytest-asyncio markers to async tests - All 41 tests passing This fixes the 'Cannot read properties of undefined (reading content)' error in hollama and ensures compatibility with OpenAI clients.
184 lines
6.0 KiB
Python
184 lines
6.0 KiB
Python
"""Integration test for tool execution in chat completions.
|
|
|
|
This test verifies that:
|
|
1. Tools are properly parsed from model output
|
|
2. Tools are executed and results fed back to model
|
|
3. The loop continues generating until final response
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
import os
|
|
import pytest
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
|
|
|
from api.models import ChatMessage
|
|
from api.chat_handlers import handle_chat_completion, _sanitize_tools
|
|
from api.tool_parser import parse_tool_calls
|
|
from api.formatting import format_messages_with_tools
|
|
|
|
|
|
class MockSwarm:
|
|
"""Mock swarm manager for testing."""
|
|
|
|
async def generate(self, prompt, max_tokens, temperature, use_consensus):
|
|
"""Generate a mock response."""
|
|
# Return different responses based on prompt content
|
|
if "tool_result" in prompt.lower():
|
|
# Final response after tool execution
|
|
return MockResponse("Here's the result: The tool was executed successfully!")
|
|
else:
|
|
# First response with tool call
|
|
return MockResponse("TOOL: bash\nARGUMENTS: {\"command\": \"echo test\"}")
|
|
|
|
|
|
class MockResponse:
|
|
"""Mock generation result."""
|
|
|
|
def __init__(self, text):
|
|
self.selected_response = MockSelectedResponse(text)
|
|
|
|
|
|
class MockSelectedResponse:
|
|
"""Mock selected response."""
|
|
|
|
def __init__(self, text):
|
|
self.text = text
|
|
self.tokens_generated = 50
|
|
self.tokens_per_second = 10.0
|
|
|
|
|
|
class MockExecutor:
|
|
"""Mock tool executor."""
|
|
|
|
async def execute_tool(self, tool_name, tool_args, working_dir=None):
|
|
"""Execute a tool mock."""
|
|
return f"Mock result from {tool_name} with args {tool_args}"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_tool_execution_loop():
|
|
"""Test that tools are executed and loop continues."""
|
|
print("Testing tool execution loop...")
|
|
|
|
# Create a mock request
|
|
request = ChatMessage(
|
|
role="user",
|
|
content="Run echo test"
|
|
)
|
|
|
|
# Wrap in request object
|
|
from api.models import ChatCompletionRequest
|
|
req = ChatCompletionRequest(
|
|
model="test-model",
|
|
messages=[request],
|
|
tools=None,
|
|
max_tokens=1024,
|
|
temperature=0.7
|
|
)
|
|
|
|
# Create mock swarm
|
|
swarm = MockSwarm()
|
|
|
|
# We can't easily test the full handler without a real tool executor,
|
|
# so let's test the key parts
|
|
|
|
# Test 1: Verify tool parsing works
|
|
print(" Test 1: Tool parsing")
|
|
tool_text = 'TOOL: bash\nARGUMENTS: {"command": "echo test"}'
|
|
content, tool_calls = parse_tool_calls(tool_text)
|
|
|
|
assert tool_calls is not None, "Tool calls should be parsed"
|
|
assert len(tool_calls) == 1, "Should parse one tool call"
|
|
assert tool_calls[0]["function"]["name"] == "bash", "Tool name should be bash"
|
|
assert "echo test" in tool_calls[0]["function"]["arguments"], "Command should be in arguments"
|
|
print(" ✓ Tool parsing works correctly")
|
|
|
|
# Test 2: Verify tool instructions are loaded
|
|
print(" Test 2: Tool instructions")
|
|
instructions = format_messages_with_tools([request], None)
|
|
assert len(instructions) > 0, "Instructions should be generated"
|
|
assert "tool" in instructions.lower(), "Instructions should mention tools"
|
|
print(" ✓ Tool instructions are loaded")
|
|
|
|
# Test 3: Verify multiple tool calls can be parsed
|
|
print(" Test 3: Multiple tool calls")
|
|
multi_tool = '''TOOL: bash
|
|
ARGUMENTS: {"command": "ls"}
|
|
|
|
TOOL: write
|
|
ARGUMENTS: {"filePath": "test.txt", "content": "hello"}'''
|
|
content, tool_calls = parse_tool_calls(multi_tool)
|
|
assert tool_calls is not None, "Multiple tools should be parsed"
|
|
assert len(tool_calls) == 2, "Should parse two tool calls"
|
|
assert tool_calls[0]["function"]["name"] == "bash", "First tool should be bash"
|
|
assert tool_calls[1]["function"]["name"] == "write", "Second tool should be write"
|
|
print(" ✓ Multiple tool calls parsed correctly")
|
|
|
|
# Test 4: Verify tool sanitization
|
|
print(" Test 4: Tool sanitization")
|
|
# Create mock tool with invalid 'description' in properties
|
|
from api.models import Tool, FunctionDefinition
|
|
mock_tool = Tool(
|
|
type="function",
|
|
function=FunctionDefinition(
|
|
name="test_tool",
|
|
description="Test tool",
|
|
parameters={
|
|
"type": "object",
|
|
"properties": {
|
|
"description": "Invalid field",
|
|
"param1": {"type": "string"}
|
|
},
|
|
"required": ["description", "param1"]
|
|
}
|
|
)
|
|
)
|
|
sanitized = _sanitize_tools([mock_tool])
|
|
assert len(sanitized) == 1, "Should return one tool"
|
|
assert "description" not in sanitized[0].function.parameters.get("properties", {}), \
|
|
"Should remove invalid 'description' from properties"
|
|
print(" ✓ Tool sanitization removes invalid fields")
|
|
|
|
print("\n✅ All tool execution loop tests passed!")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_tool_parsing():
|
|
"""Test that normal responses without tools work."""
|
|
print("\nTesting response without tools...")
|
|
|
|
# Test normal response
|
|
normal_text = "This is a normal response without any tool calls."
|
|
content, tool_calls = parse_tool_calls(normal_text)
|
|
|
|
assert tool_calls is None, "No tool calls should be found"
|
|
assert content == normal_text, "Content should be returned unchanged"
|
|
print(" ✓ Normal responses pass through without modification")
|
|
|
|
print("\n✅ No-tool parsing test passed!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
async def run_tests():
|
|
try:
|
|
await test_tool_execution_loop()
|
|
await test_no_tool_parsing()
|
|
print("\n" + "=" * 60)
|
|
print("All integration tests passed!")
|
|
print("=" * 60)
|
|
except AssertionError as e:
|
|
print(f"\n❌ Test failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
print(f"\n❌ Test error: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
sys.exit(1)
|
|
|
|
asyncio.run(run_tests())
|