server: support OAI /v1/audio/transcriptions API (#21863)

* server: support OAI /v1/audio/transcriptions API

* address autoreview comments

* correct default response_format value
This commit is contained in:
Xuan-Son Nguyen
2026-04-14 11:09:52 +02:00
committed by GitHub
parent e21cdc11a0
commit e489a5ca0e
9 changed files with 194 additions and 38 deletions
+54
View File
@@ -1433,6 +1433,60 @@ json convert_responses_to_chatcmpl(const json & response_body) {
return chatcmpl_body;
}
json convert_transcriptions_to_chatcmpl(
const json & inp_body,
const std::map<std::string, raw_buffer> & in_files,
std::vector<raw_buffer> & out_files) {
// TODO @ngxson : this function may need to be improved in the future
// handle input files
out_files.clear();
auto it = in_files.find("file");
if (it != in_files.end()) {
out_files.push_back(it->second);
} else {
throw std::invalid_argument("No input file found for transcription");
}
// handle input data
std::string prompt = json_value(inp_body, "prompt", std::string());
std::string language = json_value(inp_body, "language", std::string());
std::string response_format = json_value(inp_body, "response_format", std::string("json"));
if (response_format != "json") {
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
}
if (prompt.empty()) {
prompt = "Transcribe audio to text";
}
if (!language.empty()) {
prompt += string_format(" (language: %s)", language.c_str());
}
prompt += mtmd_default_marker();
json chatcmpl_body = inp_body; // copy all fields
chatcmpl_body["messages"] = json::array({
{
{"role", "user"},
{"content", prompt},
},
});
// because input from form-data, everything is string, we need to correct the types here
std::string stream = json_value(inp_body, "stream", std::string("false"));
chatcmpl_body["stream"] = stream == "true";
if (inp_body.contains("max_tokens")) {
std::string inp = inp_body["max_tokens"].get<std::string>();
chatcmpl_body["max_tokens"] = std::stoul(inp);
}
if (inp_body.contains("temperature")) {
std::string inp = inp_body["temperature"].get<std::string>();
chatcmpl_body["temperature"] = std::stof(inp);
}
return chatcmpl_body;
}
json convert_anthropic_to_oai(const json & body) {
json oai_body;