model : mtmd : make input norm optional in LFM2-VL (#18594)
Upcoming LFM2-VL releases will have configurable input norm. See https://github.com/huggingface/transformers/pull/43087 for details.
This commit is contained in:
@@ -1552,6 +1552,14 @@ struct clip_model_loader {
|
|||||||
model.projection = get_tensor(TN_MM_PROJECTOR);
|
model.projection = get_tensor(TN_MM_PROJECTOR);
|
||||||
} break;
|
} break;
|
||||||
case PROJECTOR_TYPE_LFM2:
|
case PROJECTOR_TYPE_LFM2:
|
||||||
|
{
|
||||||
|
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM, false);
|
||||||
|
model.mm_input_norm_b = get_tensor(TN_MM_INP_NORM_B, false);
|
||||||
|
model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 1, "weight"));
|
||||||
|
model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 1, "bias"));
|
||||||
|
model.mm_2_w = get_tensor(string_format(TN_LLAVA_PROJ, 2, "weight"));
|
||||||
|
model.mm_2_b = get_tensor(string_format(TN_LLAVA_PROJ, 2, "bias"));
|
||||||
|
} break;
|
||||||
case PROJECTOR_TYPE_KIMIVL:
|
case PROJECTOR_TYPE_KIMIVL:
|
||||||
{
|
{
|
||||||
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM);
|
model.mm_input_norm_w = get_tensor(TN_MM_INP_NORM);
|
||||||
|
|||||||
@@ -50,10 +50,15 @@ ggml_cgraph * clip_graph_siglip::build() {
|
|||||||
const int scale_factor = model.hparams.n_merge;
|
const int scale_factor = model.hparams.n_merge;
|
||||||
cur = build_patch_merge_permute(cur, scale_factor);
|
cur = build_patch_merge_permute(cur, scale_factor);
|
||||||
|
|
||||||
// projection
|
// projection, in LFM2-VL input norm is optional
|
||||||
cur = ggml_norm(ctx0, cur, 1e-5); // default nn.LayerNorm
|
if (model.mm_input_norm_w) {
|
||||||
cur = ggml_mul(ctx0, cur, model.mm_input_norm_w);
|
cur = ggml_norm(ctx0, cur, 1e-5); // default nn.LayerNorm
|
||||||
cur = ggml_add(ctx0, cur, model.mm_input_norm_b);
|
cur = ggml_mul(ctx0, cur, model.mm_input_norm_w);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (model.mm_input_norm_b) {
|
||||||
|
cur = ggml_add(ctx0, cur, model.mm_input_norm_b);
|
||||||
|
}
|
||||||
|
|
||||||
cur = build_ffn(cur,
|
cur = build_ffn(cur,
|
||||||
model.mm_1_w, model.mm_1_b,
|
model.mm_1_w, model.mm_1_b,
|
||||||
|
|||||||
Reference in New Issue
Block a user