mtmd: add clip_graph::build_mm() (#20751)

* clip: add build_mm()

* apply to all models

* add TODO for bias overload
This commit is contained in:
Xuan-Son Nguyen
2026-03-19 13:11:39 +01:00
committed by GitHub
parent cd708db0cc
commit 1e64534570
15 changed files with 75 additions and 66 deletions
+4 -4
View File
@@ -22,7 +22,7 @@ ggml_cgraph * clip_graph_llama4::build() {
ggml_tensor * kernel = ggml_reshape_4d(ctx0, model.patch_embeddings_0,
patch_size, patch_size, 3, n_embd);
inp = ggml_im2col(ctx0, kernel, inp, patch_size, patch_size, 0, 0, 1, 1, true, inp->type);
inp = ggml_mul_mat(ctx0, model.patch_embeddings_0, inp);
inp = build_mm(model.patch_embeddings_0, inp);
inp = ggml_reshape_2d(ctx0, inp, n_embd, n_patches);
cb(inp, "patch_conv", -1);
}
@@ -78,15 +78,15 @@ ggml_cgraph * clip_graph_llama4::build() {
// based on Llama4VisionMLP2 (always uses GELU activation, no bias)
{
cur = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w, cur);
cur = build_mm(model.mm_model_mlp_1_w, cur);
cur = ggml_gelu(ctx0, cur);
cur = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, cur);
cur = build_mm(model.mm_model_mlp_2_w, cur);
cur = ggml_gelu(ctx0, cur);
cb(cur, "adapter_mlp", -1);
}
// Llama4MultiModalProjector
cur = ggml_mul_mat(ctx0, model.mm_model_proj, cur);
cur = build_mm(model.mm_model_proj, cur);
cb(cur, "projected", -1);
// build the graph