server: support OAI /v1/audio/transcriptions API (#21863)

* server: support OAI /v1/audio/transcriptions API

* address autoreview comments

* correct default response_format value
This commit is contained in:
Xuan-Son Nguyen
2026-04-14 11:09:52 +02:00
committed by GitHub
parent e21cdc11a0
commit e489a5ca0e
9 changed files with 194 additions and 38 deletions
+27
View File
@@ -725,6 +725,8 @@ json server_task_result_cmpl_final::to_json() {
return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
case TASK_RESPONSE_TYPE_OAI_RESP:
return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
case TASK_RESPONSE_TYPE_OAI_ASR:
return to_json_oaicompat_asr();
case TASK_RESPONSE_TYPE_ANTHROPIC:
return stream ? to_json_anthropic_stream() : to_json_anthropic();
default:
@@ -1102,6 +1104,21 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
return server_sent_events;
}
json server_task_result_cmpl_final::to_json_oaicompat_asr() {
json event = json {
{"type", "transcript.text.done"},
{"text", content},
{"usage", json {
{"type", "tokens"},
{"input_tokens", n_prompt_tokens},
{"output_tokens", n_decoded},
{"total_tokens", n_decoded + n_prompt_tokens},
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
}},
};
return event;
}
json server_task_result_cmpl_final::to_json_anthropic() {
std::string stop_reason = "max_tokens";
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
@@ -1400,6 +1417,8 @@ json server_task_result_cmpl_partial::to_json() {
return to_json_oaicompat_chat();
case TASK_RESPONSE_TYPE_OAI_RESP:
return to_json_oaicompat_resp();
case TASK_RESPONSE_TYPE_OAI_ASR:
return to_json_oaicompat_asr();
case TASK_RESPONSE_TYPE_ANTHROPIC:
return to_json_anthropic();
default:
@@ -1650,6 +1669,14 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
return events;
}
json server_task_result_cmpl_partial::to_json_oaicompat_asr() {
json event = json {
{"type", "transcript.text.delta"},
{"delta", content},
};
return event;
}
json server_task_result_cmpl_partial::to_json_anthropic() {
json events = json::array();
bool first = (n_decoded == 1);