server: Enable transcriptions API for LFM2-Audio (#22000)
This commit is contained in:
+21
-5
@@ -544,6 +544,26 @@ bool common_chat_templates_was_explicit(const struct common_chat_templates * tmp
|
|||||||
return tmpls->has_explicit_template;
|
return tmpls->has_explicit_template;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
|
||||||
|
// and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
|
||||||
|
static bool is_lfm2_template(const std::string & src) {
|
||||||
|
return src.find("<|tool_list_start|>") != std::string::npos &&
|
||||||
|
src.find("<|tool_list_end|>") != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
common_chat_prompt_preset common_chat_get_asr_prompt(const common_chat_templates * chat_templates) {
|
||||||
|
common_chat_prompt_preset asr_preset;
|
||||||
|
asr_preset.system = "";
|
||||||
|
asr_preset.user = "Transcribe audio to text";
|
||||||
|
|
||||||
|
if (chat_templates && chat_templates->template_default && is_lfm2_template(chat_templates->template_default->source())) {
|
||||||
|
asr_preset.system = "Perform ASR.";
|
||||||
|
asr_preset.user = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
return asr_preset;
|
||||||
|
}
|
||||||
|
|
||||||
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
|
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant) {
|
||||||
if (!variant.empty()) {
|
if (!variant.empty()) {
|
||||||
if (variant == "tool_use") {
|
if (variant == "tool_use") {
|
||||||
@@ -2053,10 +2073,7 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
|
|||||||
return common_chat_params_init_kimi_k2(tmpl, params);
|
return common_chat_params_init_kimi_k2(tmpl, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
// LFM2 format detection: template uses <|tool_list_start|>[...]<|tool_list_end|> around the tool list
|
if (is_lfm2_template(src)) {
|
||||||
// and <|tool_call_start|>[...]<|tool_call_end|> around each tool call
|
|
||||||
if (src.find("<|tool_list_start|>") != std::string::npos &&
|
|
||||||
src.find("<|tool_list_end|>") != std::string::npos) {
|
|
||||||
LOG_DBG("Using specialized template: LFM2\n");
|
LOG_DBG("Using specialized template: LFM2\n");
|
||||||
return common_chat_params_init_lfm2(tmpl, params);
|
return common_chat_params_init_lfm2(tmpl, params);
|
||||||
}
|
}
|
||||||
@@ -2365,4 +2382,3 @@ std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_tem
|
|||||||
GGML_ASSERT(chat_templates->template_default != nullptr);
|
GGML_ASSERT(chat_templates->template_default != nullptr);
|
||||||
return chat_templates->template_default->caps.to_map();
|
return chat_templates->template_default->caps.to_map();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -274,3 +274,11 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
|
|||||||
const common_chat_template & tmpl,
|
const common_chat_template & tmpl,
|
||||||
const std::string & src,
|
const std::string & src,
|
||||||
autoparser::generation_params & params);
|
autoparser::generation_params & params);
|
||||||
|
|
||||||
|
// specialized per-task preset
|
||||||
|
struct common_chat_prompt_preset {
|
||||||
|
std::string system;
|
||||||
|
std::string user;
|
||||||
|
};
|
||||||
|
|
||||||
|
common_chat_prompt_preset common_chat_get_asr_prompt(const common_chat_templates * chat_templates);
|
||||||
|
|||||||
@@ -535,6 +535,7 @@ json server_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
|
|||||||
|
|
||||||
json convert_transcriptions_to_chatcmpl(
|
json convert_transcriptions_to_chatcmpl(
|
||||||
const json & inp_body,
|
const json & inp_body,
|
||||||
|
const common_chat_templates * tmpls,
|
||||||
const std::map<std::string, raw_buffer> & in_files,
|
const std::map<std::string, raw_buffer> & in_files,
|
||||||
std::vector<raw_buffer> & out_files) {
|
std::vector<raw_buffer> & out_files) {
|
||||||
// TODO @ngxson : this function may need to be improved in the future
|
// TODO @ngxson : this function may need to be improved in the future
|
||||||
@@ -554,21 +555,23 @@ json convert_transcriptions_to_chatcmpl(
|
|||||||
if (response_format != "json") {
|
if (response_format != "json") {
|
||||||
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
|
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
|
||||||
}
|
}
|
||||||
|
const common_chat_prompt_preset preset = common_chat_get_asr_prompt(tmpls);
|
||||||
if (prompt.empty()) {
|
if (prompt.empty()) {
|
||||||
prompt = "Transcribe audio to text";
|
prompt = preset.user;
|
||||||
}
|
}
|
||||||
if (!language.empty()) {
|
if (!language.empty()) {
|
||||||
prompt += string_format(" (language: %s)", language.c_str());
|
prompt += string_format(" (language: %s)", language.c_str());
|
||||||
}
|
}
|
||||||
prompt += get_media_marker();
|
prompt += get_media_marker();
|
||||||
|
|
||||||
|
json messages = json::array();
|
||||||
|
if (!preset.system.empty()) {
|
||||||
|
messages.push_back({{"role", "system"}, {"content", preset.system}});
|
||||||
|
}
|
||||||
|
messages.push_back({{"role", "user"}, {"content", prompt}});
|
||||||
|
|
||||||
json chatcmpl_body = inp_body; // copy all fields
|
json chatcmpl_body = inp_body; // copy all fields
|
||||||
chatcmpl_body["messages"] = json::array({
|
chatcmpl_body["messages"] = messages;
|
||||||
{
|
|
||||||
{"role", "user"},
|
|
||||||
{"content", prompt},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// because input from form-data, everything is string, we need to correct the types here
|
// because input from form-data, everything is string, we need to correct the types here
|
||||||
std::string stream = json_value(inp_body, "stream", std::string("false"));
|
std::string stream = json_value(inp_body, "stream", std::string("false"));
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ json server_chat_convert_anthropic_to_oai(const json & body);
|
|||||||
// convert OpenAI transcriptions API format to OpenAI Chat Completions API format
|
// convert OpenAI transcriptions API format to OpenAI Chat Completions API format
|
||||||
json convert_transcriptions_to_chatcmpl(
|
json convert_transcriptions_to_chatcmpl(
|
||||||
const json & body,
|
const json & body,
|
||||||
|
const common_chat_templates * tmpls,
|
||||||
const std::map<std::string, raw_buffer> & in_files,
|
const std::map<std::string, raw_buffer> & in_files,
|
||||||
std::vector<raw_buffer> & out_files);
|
std::vector<raw_buffer> & out_files);
|
||||||
|
|
||||||
|
|||||||
@@ -3807,6 +3807,7 @@ void server_routes::init_routes() {
|
|||||||
std::vector<raw_buffer> files;
|
std::vector<raw_buffer> files;
|
||||||
json body = convert_transcriptions_to_chatcmpl(
|
json body = convert_transcriptions_to_chatcmpl(
|
||||||
json::parse(req.body),
|
json::parse(req.body),
|
||||||
|
meta->chat_params.tmpls.get(),
|
||||||
req.files,
|
req.files,
|
||||||
files);
|
files);
|
||||||
SRV_DBG("%s\n", "Request converted: OpenAI Transcriptions -> OpenAI Chat Completions");
|
SRV_DBG("%s\n", "Request converted: OpenAI Transcriptions -> OpenAI Chat Completions");
|
||||||
|
|||||||
Reference in New Issue
Block a user