server: /v1/responses (partial) (#18486)
* from previous PR * Make instruction(system) as first message * Convert [input_message] (text/image/file) * Rename convert_responses_to_chatcmpl(body) -> response_body * Initial tool call support * Erase instructions field from chatcmpl body * Feed reasoning texts to chat template * Use std::vector instead of opaque json array * Make output_item.added events consistent * Move `server_task_result_cmpl_partial::update` from header to source * Match ID of output_item.added and .done events * Add function_call only if there is no "fc_" prefix * Add function call output at non-streaming API * Test if ID is persistent * Add doc * Fix style - use trailing comma * Rewrite state management * catch up with upstream/master * Fix style - "type" is the first item of SSE data * Explicitly check "instructions" from response_body * Make lambdas static * Check if reasoning content exists * Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final * Reject `input_file` since it is not supported by chatcmpl * Add "fc_" prefix to non-straming function call id as coderabbit pointed out --------- Co-authored-by: openingnow <>
This commit is contained in:
@@ -2,7 +2,7 @@ aiohttp~=3.9.3
|
||||
pytest~=8.3.3
|
||||
huggingface_hub>=0.34.0,<1.0
|
||||
numpy~=1.26.4
|
||||
openai~=1.55.3
|
||||
openai~=2.14.0
|
||||
prometheus-client~=0.20.0
|
||||
requests~=2.32.3
|
||||
wget~=3.2
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
import pytest
|
||||
from openai import OpenAI
|
||||
from utils import *
|
||||
|
||||
server: ServerProcess
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def create_server():
|
||||
global server
|
||||
server = ServerPreset.tinyllama2()
|
||||
|
||||
def test_responses_with_openai_library():
|
||||
global server
|
||||
server.start()
|
||||
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
|
||||
res = client.responses.create(
|
||||
model="gpt-4.1",
|
||||
input=[
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
max_output_tokens=8,
|
||||
temperature=0.8,
|
||||
)
|
||||
assert res.id.startswith("resp_")
|
||||
assert res.output[0].id is not None
|
||||
assert res.output[0].id.startswith("msg_")
|
||||
assert match_regex("(Suddenly)+", res.output_text)
|
||||
|
||||
def test_responses_stream_with_openai_library():
|
||||
global server
|
||||
server.start()
|
||||
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
|
||||
stream = client.responses.create(
|
||||
model="gpt-4.1",
|
||||
input=[
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
max_output_tokens=8,
|
||||
temperature=0.8,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
gathered_text = ''
|
||||
resp_id = ''
|
||||
msg_id = ''
|
||||
for r in stream:
|
||||
if r.type == "response.created":
|
||||
assert r.response.id.startswith("resp_")
|
||||
resp_id = r.response.id
|
||||
if r.type == "response.in_progress":
|
||||
assert r.response.id == resp_id
|
||||
if r.type == "response.output_item.added":
|
||||
assert r.item.id is not None
|
||||
assert r.item.id.startswith("msg_")
|
||||
msg_id = r.item.id
|
||||
if (r.type == "response.content_part.added" or
|
||||
r.type == "response.output_text.delta" or
|
||||
r.type == "response.output_text.done" or
|
||||
r.type == "response.content_part.done"):
|
||||
assert r.item_id == msg_id
|
||||
if r.type == "response.output_item.done":
|
||||
assert r.item.id == msg_id
|
||||
|
||||
if r.type == "response.output_text.delta":
|
||||
gathered_text += r.delta
|
||||
if r.type == "response.completed":
|
||||
assert r.response.id.startswith("resp_")
|
||||
assert r.response.output[0].id is not None
|
||||
assert r.response.output[0].id.startswith("msg_")
|
||||
assert gathered_text == r.response.output_text
|
||||
assert match_regex("(Suddenly)+", r.response.output_text)
|
||||
Reference in New Issue
Block a user