server: support OAI /v1/audio/transcriptions API (#21863)

* server: support OAI /v1/audio/transcriptions API

* address autoreview comments

* correct default response_format value
This commit is contained in:
Xuan-Son Nguyen
2026-04-14 11:09:52 +02:00
committed by GitHub
parent e21cdc11a0
commit e489a5ca0e
9 changed files with 194 additions and 38 deletions
+41 -37
View File
@@ -145,6 +145,7 @@ int main(int argc, char ** argv) {
routes.post_completions_oai = models_routes->proxy_post;
routes.post_chat_completions = models_routes->proxy_post;
routes.post_responses_oai = models_routes->proxy_post;
routes.post_transcriptions_oai = models_routes->proxy_post;
routes.post_anthropic_messages = models_routes->proxy_post;
routes.post_anthropic_count_tokens = models_routes->proxy_post;
routes.post_infill = models_routes->proxy_post;
@@ -160,48 +161,51 @@ int main(int argc, char ** argv) {
routes.post_slots = models_routes->proxy_post;
// custom routes for router
routes.get_props = models_routes->get_router_props;
routes.get_models = models_routes->get_router_models;
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
routes.get_props = models_routes->get_router_props;
routes.get_models = models_routes->get_router_models;
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
}
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics));
ctx_http.get ("/props", ex_wrapper(routes.get_props));
ctx_http.post("/props", ex_wrapper(routes.post_props));
ctx_http.post("/api/show", ex_wrapper(routes.get_api_show));
ctx_http.get ("/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
ctx_http.get ("/v1/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
ctx_http.get ("/api/tags", ex_wrapper(routes.get_models)); // ollama specific endpoint. public endpoint (no API key check)
ctx_http.post("/completion", ex_wrapper(routes.post_completions)); // legacy
ctx_http.post("/completions", ex_wrapper(routes.post_completions));
ctx_http.post("/v1/completions", ex_wrapper(routes.post_completions_oai));
ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions));
ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai));
ctx_http.post("/responses", ex_wrapper(routes.post_responses_oai));
ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics));
ctx_http.get ("/props", ex_wrapper(routes.get_props));
ctx_http.post("/props", ex_wrapper(routes.post_props));
ctx_http.post("/api/show", ex_wrapper(routes.get_api_show));
ctx_http.get ("/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
ctx_http.get ("/v1/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
ctx_http.get ("/api/tags", ex_wrapper(routes.get_models)); // ollama specific endpoint. public endpoint (no API key check)
ctx_http.post("/completion", ex_wrapper(routes.post_completions)); // legacy
ctx_http.post("/completions", ex_wrapper(routes.post_completions));
ctx_http.post("/v1/completions", ex_wrapper(routes.post_completions_oai));
ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions));
ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai));
ctx_http.post("/responses", ex_wrapper(routes.post_responses_oai));
ctx_http.post("/v1/audio/transcriptions", ex_wrapper(routes.post_transcriptions_oai));
ctx_http.post("/audio/transcriptions", ex_wrapper(routes.post_transcriptions_oai));
ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
ctx_http.post("/infill", ex_wrapper(routes.post_infill));
ctx_http.post("/embedding", ex_wrapper(routes.post_embeddings)); // legacy
ctx_http.post("/embeddings", ex_wrapper(routes.post_embeddings));
ctx_http.post("/v1/embeddings", ex_wrapper(routes.post_embeddings_oai));
ctx_http.post("/rerank", ex_wrapper(routes.post_rerank));
ctx_http.post("/reranking", ex_wrapper(routes.post_rerank));
ctx_http.post("/v1/rerank", ex_wrapper(routes.post_rerank));
ctx_http.post("/v1/reranking", ex_wrapper(routes.post_rerank));
ctx_http.post("/tokenize", ex_wrapper(routes.post_tokenize));
ctx_http.post("/detokenize", ex_wrapper(routes.post_detokenize));
ctx_http.post("/apply-template", ex_wrapper(routes.post_apply_template));
ctx_http.post("/infill", ex_wrapper(routes.post_infill));
ctx_http.post("/embedding", ex_wrapper(routes.post_embeddings)); // legacy
ctx_http.post("/embeddings", ex_wrapper(routes.post_embeddings));
ctx_http.post("/v1/embeddings", ex_wrapper(routes.post_embeddings_oai));
ctx_http.post("/rerank", ex_wrapper(routes.post_rerank));
ctx_http.post("/reranking", ex_wrapper(routes.post_rerank));
ctx_http.post("/v1/rerank", ex_wrapper(routes.post_rerank));
ctx_http.post("/v1/reranking", ex_wrapper(routes.post_rerank));
ctx_http.post("/tokenize", ex_wrapper(routes.post_tokenize));
ctx_http.post("/detokenize", ex_wrapper(routes.post_detokenize));
ctx_http.post("/apply-template", ex_wrapper(routes.post_apply_template));
// LoRA adapters hotswap
ctx_http.get ("/lora-adapters", ex_wrapper(routes.get_lora_adapters));
ctx_http.post("/lora-adapters", ex_wrapper(routes.post_lora_adapters));
ctx_http.get ("/lora-adapters", ex_wrapper(routes.get_lora_adapters));
ctx_http.post("/lora-adapters", ex_wrapper(routes.post_lora_adapters));
// Save & load slots
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
// CORS proxy (EXPERIMENTAL, only used by the Web UI for MCP)
if (params.webui_mcp_proxy) {
SRV_WRN("%s", "-----------------\n");