model : add ASR support for LFM2-Audio-1.5B (conformer) (#18106)

* ASR with LFM2-Audio-1.5B

* Set rope_theta

* Fix comment

* Remove rope_theta setting

* Address PR feedback

* rename functions to conformer

* remove some redundant ggml_cont

* fix missing tensor

* add prefix "a." for conv tensors

* remove redundant reshape

* clean up

* add test model

---------

Co-authored-by: Tarek Dakhran <tarek@liquid.ai>
This commit is contained in:
Xuan-Son Nguyen
2025-12-19 00:18:01 +01:00
committed by GitHub
parent f9ec8858ed
commit 8ea958d4d9
17 changed files with 669 additions and 29 deletions
+19
View File
@@ -309,9 +309,24 @@ int main(int argc, char ** argv) {
if (g_is_interrupted) return 130;
auto eval_system_prompt_if_present = [&] {
if (params.system_prompt.empty()) {
return 0;
}
common_chat_msg msg;
msg.role = "system";
msg.content = params.system_prompt;
return eval_message(ctx, msg);
};
LOG_WRN("WARN: This is an experimental CLI for testing multimodal capability.\n");
LOG_WRN(" For normal use cases, please use the standard llama-cli\n");
if (eval_system_prompt_if_present()) {
return 1;
}
if (is_single_turn) {
g_is_generating = true;
if (params.prompt.find(mtmd_default_marker()) == std::string::npos) {
@@ -321,6 +336,7 @@ int main(int argc, char ** argv) {
params.prompt = mtmd_default_marker() + params.prompt;
}
}
common_chat_msg msg;
msg.role = "user";
msg.content = params.prompt;
@@ -369,6 +385,9 @@ int main(int argc, char ** argv) {
ctx.n_past = 0;
ctx.chat_history.clear();
llama_memory_clear(llama_get_memory(ctx.lctx), true);
if (eval_system_prompt_if_present()) {
return 1;
}
LOG("Chat history cleared\n\n");
continue;
}