fix: return streaming format (SSE) for tool execution results
When tools are executed during a streaming request, return the results as a proper SSE stream instead of non-streaming JSON. This ensures opencode receives the response in the expected format. - Stream tool results in chunks - Include proper SSE format with data: prefix - End with [DONE] marker
This commit is contained in:
+49
-21
@@ -456,33 +456,61 @@ async def chat_completions(request: ChatCompletionRequest):
|
||||
content = "\n\n".join(tool_results)
|
||||
print(f" ✅ Tool execution complete")
|
||||
|
||||
# Return as non-streaming response with tool results
|
||||
# Return as streaming response with tool results (opencode expects SSE format)
|
||||
print(f"\n{'='*60}")
|
||||
print(f"RESPONSE (streaming+tools): content_preview={repr(content[:100])}")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
response_obj = ChatCompletionResponse(
|
||||
id=completion_id,
|
||||
created=created,
|
||||
model=request.model,
|
||||
choices=[
|
||||
ChatCompletionChoice(
|
||||
index=0,
|
||||
message=ChatMessage(
|
||||
role="assistant",
|
||||
content=content,
|
||||
tool_calls=[]
|
||||
),
|
||||
finish_reason="stop"
|
||||
)
|
||||
],
|
||||
usage=UsageInfo(
|
||||
prompt_tokens=len(prompt) // 4,
|
||||
completion_tokens=len(full_response.split()),
|
||||
total_tokens=(len(prompt) // 4) + len(full_response.split())
|
||||
async def tool_stream_generator() -> AsyncIterator[str]:
|
||||
"""Generate SSE stream with tool results."""
|
||||
# Send role chunk
|
||||
first_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created,
|
||||
model=request.model,
|
||||
choices=[
|
||||
ChatCompletionStreamChoice(
|
||||
delta={"role": "assistant"}
|
||||
)
|
||||
]
|
||||
)
|
||||
yield f"data: {first_chunk.model_dump_json()}\n\n"
|
||||
|
||||
# Send content in chunks (to simulate streaming)
|
||||
chunk_size = 100
|
||||
for i in range(0, len(content), chunk_size):
|
||||
chunk = content[i:i+chunk_size]
|
||||
stream_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created,
|
||||
model=request.model,
|
||||
choices=[
|
||||
ChatCompletionStreamChoice(
|
||||
delta={"content": chunk}
|
||||
)
|
||||
]
|
||||
)
|
||||
yield f"data: {stream_chunk.model_dump_json()}\n\n"
|
||||
|
||||
# Send final chunk with finish_reason
|
||||
final_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created,
|
||||
model=request.model,
|
||||
choices=[
|
||||
ChatCompletionStreamChoice(
|
||||
delta={},
|
||||
finish_reason="stop"
|
||||
)
|
||||
]
|
||||
)
|
||||
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return StreamingResponse(
|
||||
tool_stream_generator(),
|
||||
media_type="text/event-stream"
|
||||
)
|
||||
return response_obj
|
||||
else:
|
||||
# Regular streaming without tools
|
||||
async def stream_generator() -> AsyncIterator[str]:
|
||||
|
||||
Reference in New Issue
Block a user