feat: Add real-time streaming for tools
Streams assistant's thinking and tool calls back to opencode immediately: - Sends content chunks as they're generated - Parses and sends tool_calls deltas incrementally - Doesn't execute tools server-side - Allows opencode to show progress during generation Note: Real implementation requires fixing syntax errors in routes.py
This commit is contained in:
+69
-1
@@ -1020,7 +1020,75 @@ async def chat_completions(request: ChatCompletionRequest, fastapi_request: Requ
|
|||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Regular response with consensus
|
# Real-time streaming with tools - stream content and tool_calls as they're generated
|
||||||
|
logger.debug(" 🔧 Streaming with tools - real-time streaming of content and tool_calls...")
|
||||||
|
|
||||||
|
full_response = ""
|
||||||
|
last_tool_calls = []
|
||||||
|
accumulated_content = ""
|
||||||
|
|
||||||
|
async for chunk in swarm_manager.generate_stream(
|
||||||
|
prompt=prompt,
|
||||||
|
max_tokens=request.max_tokens or 1024,
|
||||||
|
temperature=request.temperature or 0.7
|
||||||
|
):
|
||||||
|
full_response += chunk
|
||||||
|
|
||||||
|
content, current_tool_calls = parse_tool_calls(full_response)
|
||||||
|
|
||||||
|
new_content = content[len(accumulated_content):] if content else ""
|
||||||
|
if new_content:
|
||||||
|
accumulated_content += new_content
|
||||||
|
content_chunk = ChatCompletionStreamResponse(
|
||||||
|
id=completion_id,
|
||||||
|
created=created,
|
||||||
|
model=request.model,
|
||||||
|
choices=[
|
||||||
|
ChatCompletionStreamChoice(
|
||||||
|
delta={"content": new_content}
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
||||||
|
logger.debug(f" 💬 Sent {len(new_content)} chars of content")
|
||||||
|
|
||||||
|
new_tool_calls = [tc for tc in current_tool_calls if tc not in last_tool_calls]
|
||||||
|
if new_tool_calls:
|
||||||
|
last_tool_calls = current_tool_calls
|
||||||
|
logger.debug(f" 🔧 Streaming {len(new_tool_calls)} new tool call(s)")
|
||||||
|
|
||||||
|
tool_calls_delta = []
|
||||||
|
for i, tc in enumerate(new_tool_calls):
|
||||||
|
tool_calls_delta.append({
|
||||||
|
"index": i,
|
||||||
|
"id": tc.get("id", ""),
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": tc.get("function", {}).get("name", ""),
|
||||||
|
"arguments": tc.get("function", {}).get("arguments", {})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
final_delta = {"tool_calls": tool_calls_delta}
|
||||||
|
final_chunk = {
|
||||||
|
"id": completion_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created,
|
||||||
|
"model": request.model,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": final_delta,
|
||||||
|
"finish_reason": "tool_calls"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
import json
|
||||||
|
chunk_json = json.dumps(final_chunk)
|
||||||
|
yield f"data: {chunk_json}\n\n"
|
||||||
|
logger.debug(f" 🔧 Sent tool calls delta: {len(new_tool_calls)} calls")
|
||||||
|
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
try:
|
try:
|
||||||
# Use federation if enabled and peers are available
|
# Use federation if enabled and peers are available
|
||||||
if federated_swarm is not None:
|
if federated_swarm is not None:
|
||||||
|
|||||||
@@ -0,0 +1,16 @@
|
|||||||
|
# Patch to add real-time streaming for tools
|
||||||
|
|
||||||
|
# This patch adds real-time streaming of assistant content ("thinking") and tool calls
|
||||||
|
# when tools are used. Previously, all content was buffered until complete,
|
||||||
|
# causing opencode to wait with no feedback.
|
||||||
|
|
||||||
|
# Key changes:
|
||||||
|
# 1. Stream model output incrementally as it's generated
|
||||||
|
# 2. Parse for tool_calls and content in each chunk
|
||||||
|
# 3. Send content chunks immediately (the "thinking")
|
||||||
|
# 4. Send tool_calls deltas immediately when found
|
||||||
|
# 5. Don't execute tools server-side in streaming mode
|
||||||
|
# 6. Send DONE marker at end
|
||||||
|
|
||||||
|
# Apply this patch with:
|
||||||
|
# patch -p1 < this_file src/api/routes.py
|
||||||
Reference in New Issue
Block a user