Files
local_swarm/tests/test_e2e_tool_flow.py
sleepy dcca89d89a fix: OpenAI API compatibility for hollama and other clients
- Fixed ChatMessage.tool_calls to be Optional with default None (excluded when empty)
- Added logprobs field to ChatCompletionChoice (always included as null)
- Added stats and system_fingerprint to ChatCompletionResponse
- Fixed streaming response to use delta format (not message format)
- Fixed non-streaming response to include logprobs: null
- Updated tool instructions to include 'NO explanations'
- Added pytest-asyncio markers to async tests
- All 41 tests passing

This fixes the 'Cannot read properties of undefined (reading content)' error in hollama and ensures compatibility with OpenAI clients.
2026-02-25 19:39:05 +01:00

101 lines
3.2 KiB
Python

"""End-to-end test for tool execution with a mock server.
This tests the complete flow:
1. Model generates tool call
2. Tools are executed
3. Response is generated based on tool results
"""
import asyncio
import sys
import os
import pytest
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
@pytest.mark.asyncio
async def test_tool_flow():
"""Test the tool execution flow end-to-end."""
# Import after path is set
from api.models import ChatMessage, ChatCompletionRequest
from api.tool_parser import parse_tool_calls
from api.formatting import format_messages_with_tools
from tools.executor import ToolExecutor
print("=" * 60)
print("End-to-End Tool Execution Test")
print("=" * 60)
# Test 1: Parse tool call from model response
print("\n1. Testing tool parsing...")
model_response = "TOOL: bash\nARGUMENTS: {\"command\": \"echo hello\"}"
content, tool_calls = parse_tool_calls(model_response)
assert tool_calls is not None, "Should parse tool call"
assert len(tool_calls) == 1, "Should have one tool call"
assert tool_calls[0]["function"]["name"] == "bash", "Should be bash tool"
print(f" ✓ Parsed tool: {tool_calls[0]['function']['name']}")
# Test 2: Simulate tool result and format for next prompt
print("\n2. Testing tool result formatting...")
tool_result = "hello\n"
# Build conversation history
messages = [
ChatMessage(role="user", content="Run echo hello"),
ChatMessage(role="assistant", content=model_response),
ChatMessage(role="tool", content=tool_result)
]
# Format for next generation
next_prompt = format_messages_with_tools(messages, None)
assert "tool" in next_prompt.lower(), "Prompt should include tool result"
assert "hello" in next_prompt, "Prompt should include tool output"
print(f" ✓ Tool result formatted for next prompt")
# Test 3: Verify loop detection
print("\n3. Testing loop detection...")
seen_tools = set()
# First tool call
tc1 = [{"function": {"name": "bash", "arguments": '{"command": "ls"}'}}]
sig1 = "bash:{'command': \"ls\"}'[:50]"
seen_tools.add(sig1)
print(f" ✓ First tool call tracked")
# Duplicate tool call
tc2 = tc1
sig2 = sig1
is_duplicate = sig2 in seen_tools
assert is_duplicate, "Should detect duplicate"
print(f" ✓ Duplicate tool call detected")
# Test 4: Verify tool result truncation
print("\n4. Testing tool result truncation...")
long_result = "a" * 3000
max_length = 2000
if len(long_result) > max_length:
truncated = long_result[:max_length] + "\n[...truncated...]"
assert len(truncated) == max_length + len("\n[...truncated...]"), "Should truncate properly"
print(f" ✓ Tool result truncated from {len(long_result)} to {len(truncated)} chars")
print("\n" + "=" * 60)
print("All end-to-end tests passed!")
print("=" * 60)
if __name__ == "__main__":
try:
asyncio.run(test_tool_flow())
except AssertionError as e:
print(f"\n❌ Test failed: {e}")
sys.exit(1)
except Exception as e:
print(f"\n❌ Test error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)