server: /v1/responses (partial) (#18486)

* from previous PR * Make instruction(system) as first message * Convert [input_message] (text/image/file) * Rename convert_responses_to_chatcmpl(body) -> response_body * Initial tool call support * Erase instructions field from chatcmpl body * Feed reasoning texts to chat template * Use std::vector instead of opaque json array * Make output_item.added events consistent * Move `server_task_result_cmpl_partial::update` from header to source * Match ID of output_item.added and .done events * Add function_call only if there is no "fc_" prefix * Add function call output at non-streaming API * Test if ID is persistent * Add doc * Fix style - use trailing comma * Rewrite state management * catch up with upstream/master * Fix style - "type" is the first item of SSE data * Explicitly check "instructions" from response_body * Make lambdas static * Check if reasoning content exists * Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final * Reject `input_file` since it is not supported by chatcmpl * Add "fc_" prefix to non-straming function call id as coderabbit pointed out --------- Co-authored-by: openingnow <>
2026-01-22 01:47:23 +09:00
parent 33f890e579
commit fbbf3ad190
11 changed files with 836 additions and 40 deletions
@@ -2,7 +2,7 @@ aiohttp~=3.9.3
 pytest~=8.3.3
 huggingface_hub>=0.34.0,<1.0
 numpy~=1.26.4
-openai~=1.55.3
+openai~=2.14.0
 prometheus-client~=0.20.0
 requests~=2.32.3
 wget~=3.2
@@ -0,0 +1,73 @@
+import pytest
+from openai import OpenAI
+from utils import *
+
+server: ServerProcess
+
+@pytest.fixture(autouse=True)
+def create_server():
+    global server
+    server = ServerPreset.tinyllama2()
+
+def test_responses_with_openai_library():
+    global server
+    server.start()
+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
+    res = client.responses.create(
+        model="gpt-4.1",
+        input=[
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        max_output_tokens=8,
+        temperature=0.8,
+    )
+    assert res.id.startswith("resp_")
+    assert res.output[0].id is not None
+    assert res.output[0].id.startswith("msg_")
+    assert match_regex("(Suddenly)+", res.output_text)
+
+def test_responses_stream_with_openai_library():
+    global server
+    server.start()
+    client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
+    stream = client.responses.create(
+        model="gpt-4.1",
+        input=[
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ],
+        max_output_tokens=8,
+        temperature=0.8,
+        stream=True,
+    )
+
+    gathered_text = ''
+    resp_id = ''
+    msg_id = ''
+    for r in stream:
+        if r.type == "response.created":
+            assert r.response.id.startswith("resp_")
+            resp_id = r.response.id
+        if r.type == "response.in_progress":
+            assert r.response.id == resp_id
+        if r.type == "response.output_item.added":
+            assert r.item.id is not None
+            assert r.item.id.startswith("msg_")
+            msg_id = r.item.id
+        if (r.type == "response.content_part.added" or
+            r.type == "response.output_text.delta" or
+            r.type == "response.output_text.done" or
+            r.type == "response.content_part.done"):
+            assert r.item_id == msg_id
+        if r.type == "response.output_item.done":
+            assert r.item.id == msg_id
+
+        if r.type == "response.output_text.delta":
+            gathered_text += r.delta
+        if r.type == "response.completed":
+            assert r.response.id.startswith("resp_")
+            assert r.response.output[0].id is not None
+            assert r.response.output[0].id.startswith("msg_")
+            assert gathered_text == r.response.output_text
+            assert match_regex("(Suddenly)+", r.response.output_text)