mtmd: add clip_graph::build_mm() (#20751)

* clip: add build_mm() * apply to all models * add TODO for bias overload
2026-03-19 13:11:39 +01:00
parent cd708db0cc
commit 1e64534570
15 changed files with 75 additions and 66 deletions
@@ -22,7 +22,7 @@ ggml_cgraph * clip_graph_llama4::build() {
        ggml_tensor * kernel = ggml_reshape_4d(ctx0, model.patch_embeddings_0,
                                                patch_size, patch_size, 3, n_embd);
        inp = ggml_im2col(ctx0, kernel, inp, patch_size, patch_size, 0, 0, 1, 1, true, inp->type);
-        inp = ggml_mul_mat(ctx0, model.patch_embeddings_0, inp);
+        inp = build_mm(model.patch_embeddings_0, inp);
        inp = ggml_reshape_2d(ctx0, inp, n_embd, n_patches);
        cb(inp, "patch_conv", -1);
    }
@@ -78,15 +78,15 @@ ggml_cgraph * clip_graph_llama4::build() {

    // based on Llama4VisionMLP2 (always uses GELU activation, no bias)
    {
-        cur = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w, cur);
+        cur = build_mm(model.mm_model_mlp_1_w, cur);
        cur = ggml_gelu(ctx0, cur);
-        cur = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, cur);
+        cur = build_mm(model.mm_model_mlp_2_w, cur);
        cur = ggml_gelu(ctx0, cur);
        cb(cur, "adapter_mlp", -1);
    }

    // Llama4MultiModalProjector
-    cur = ggml_mul_mat(ctx0, model.mm_model_proj, cur);
+    cur = build_mm(model.mm_model_proj, cur);
    cb(cur, "projected", -1);

    // build the graph