model : add ASR support for LFM2-Audio-1.5B (conformer) (#18106)

* ASR with LFM2-Audio-1.5B

* Set rope_theta

* Fix comment

* Remove rope_theta setting

* Address PR feedback

* rename functions to conformer

* remove some redundant ggml_cont

* fix missing tensor

* add prefix "a." for conv tensors

* remove redundant reshape

* clean up

* add test model

---------

Co-authored-by: Tarek Dakhran <tarek@liquid.ai>
This commit is contained in:
Xuan-Son Nguyen
2025-12-19 00:18:01 +01:00
committed by GitHub
parent f9ec8858ed
commit 8ea958d4d9
17 changed files with 669 additions and 29 deletions
+17
View File
@@ -138,6 +138,21 @@
#define TN_TOK_BOI "v.boi"
#define TN_TOK_EOI "v.eoi"
// (conformer) lfm2
#define TN_PRE_ENCODE_OUT "a.pre_encode.out.%s"
#define TN_FFN_NORM "%s.blk.%d.ffn_norm.%s"
#define TN_FFN_NORM_1 "%s.blk.%d.ffn_norm_1.%s"
#define TN_FFN_UP_1 "%s.blk.%d.ffn_up_1.%s"
#define TN_FFN_DOWN_1 "%s.blk.%d.ffn_down_1.%s"
#define TN_POS_BIAS_U "%s.blk.%d.pos_bias_u"
#define TN_POS_BIAS_V "%s.blk.%d.pos_bias_v"
#define TN_NORM_CONV "%s.blk.%d.norm_conv.%s"
#define TN_LINEAR_POS "%s.blk.%d.linear_pos.%s"
#define TN_CONV_DW "%s.blk.%d.conv_dw.%s"
#define TN_CONV_NORM "%s.blk.%d.conv_norm.%s"
#define TN_CONV_PW1 "%s.blk.%d.conv_pw1.%s"
#define TN_CONV_PW2 "%s.blk.%d.conv_pw2.%s"
// align x to upper multiple of n
#define CLIP_ALIGN(x, n) ((((x) + (n) - 1) / (n)) * (n))
@@ -170,6 +185,7 @@ enum projector_type {
PROJECTOR_TYPE_LIGHTONOCR,
PROJECTOR_TYPE_COGVLM,
PROJECTOR_TYPE_JANUS_PRO,
PROJECTOR_TYPE_LFM2A,
PROJECTOR_TYPE_GLM4V,
PROJECTOR_TYPE_UNKNOWN,
};
@@ -198,6 +214,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
{ PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"},
{ PROJECTOR_TYPE_COGVLM, "cogvlm"},
{ PROJECTOR_TYPE_JANUS_PRO, "janus_pro"},
{ PROJECTOR_TYPE_LFM2A, "lfm2a"},
{ PROJECTOR_TYPE_GLM4V, "glm4v"},
};