fix: return streaming format (SSE) for tool execution results

When tools are executed during a streaming request, return the results as a proper SSE stream instead of non-streaming JSON. This ensures opencode receives the response in the expected format. - Stream tool results in chunks - Include proper SSE format with data: prefix - End with [DONE] marker
2026-02-24 15:16:12 +01:00
parent 539ca21d51
commit 40fe75c738
1 changed files with 49 additions and 21 deletions
@@ -456,33 +456,61 @@ async def chat_completions(request: ChatCompletionRequest):
                content = "\n\n".join(tool_results)
                print(f"  ✅ Tool execution complete")
            
-            # Return as non-streaming response with tool results
+            # Return as streaming response with tool results (opencode expects SSE format)
            print(f"\n{'='*60}")
            print(f"RESPONSE (streaming+tools): content_preview={repr(content[:100])}")
            print(f"{'='*60}\n")
            
-            response_obj = ChatCompletionResponse(
-                id=completion_id,
-                created=created,
-                model=request.model,
-                choices=[
-                    ChatCompletionChoice(
-                        index=0,
-                        message=ChatMessage(
-                            role="assistant",
-                            content=content,
-                            tool_calls=[]
-                        ),
-                        finish_reason="stop"
-                    )
-                ],
-                usage=UsageInfo(
-                    prompt_tokens=len(prompt) // 4,
-                    completion_tokens=len(full_response.split()),
-                    total_tokens=(len(prompt) // 4) + len(full_response.split())
+            async def tool_stream_generator() -> AsyncIterator[str]:
+                """Generate SSE stream with tool results."""
+                # Send role chunk
+                first_chunk = ChatCompletionStreamResponse(
+                    id=completion_id,
+                    created=created,
+                    model=request.model,
+                    choices=[
+                        ChatCompletionStreamChoice(
+                            delta={"role": "assistant"}
+                        )
+                    ]
                )
+                yield f"data: {first_chunk.model_dump_json()}\n\n"
+                
+                # Send content in chunks (to simulate streaming)
+                chunk_size = 100
+                for i in range(0, len(content), chunk_size):
+                    chunk = content[i:i+chunk_size]
+                    stream_chunk = ChatCompletionStreamResponse(
+                        id=completion_id,
+                        created=created,
+                        model=request.model,
+                        choices=[
+                            ChatCompletionStreamChoice(
+                                delta={"content": chunk}
+                            )
+                        ]
+                    )
+                    yield f"data: {stream_chunk.model_dump_json()}\n\n"
+                
+                # Send final chunk with finish_reason
+                final_chunk = ChatCompletionStreamResponse(
+                    id=completion_id,
+                    created=created,
+                    model=request.model,
+                    choices=[
+                        ChatCompletionStreamChoice(
+                            delta={},
+                            finish_reason="stop"
+                        )
+                    ]
+                )
+                yield f"data: {final_chunk.model_dump_json()}\n\n"
+                yield "data: [DONE]\n\n"
+            
+            return StreamingResponse(
+                tool_stream_generator(),
+                media_type="text/event-stream"
            )
-            return response_obj
        else:
            # Regular streaming without tools
            async def stream_generator() -> AsyncIterator[str]: