From 0f1bb602dd52d3c0c07ac29c8898f2c58c3fa9b9 Mon Sep 17 00:00:00 2001 From: ynankani Date: Mon, 27 Apr 2026 07:58:48 +0000 Subject: [PATCH] model : remove duplicate wo_s scale after build_attn (Qwen3, LLaMA) (#22421) Signed-off-by: Yash Nankani --- src/models/llama.cpp | 3 --- src/models/qwen3.cpp | 3 --- src/models/qwen3moe.cpp | 3 --- 3 files changed, 9 deletions(-) diff --git a/src/models/llama.cpp b/src/models/llama.cpp index ddaa6c40f..8d478dc67 100644 --- a/src/models/llama.cpp +++ b/src/models/llama.cpp @@ -72,9 +72,6 @@ llm_build_llama::llm_build_llama(const llama_model & model, const llm_gra cur = build_attn(inp_attn, model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s, Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il); - if (model.layers[il].wo_s) { - cur = ggml_mul(ctx0, cur, model.layers[il].wo_s); - } cb(cur, "attn_out", il); } if (il == n_layer - 1 && inp_out_ids) { diff --git a/src/models/qwen3.cpp b/src/models/qwen3.cpp index e6f1fc81d..883dd5f9a 100644 --- a/src/models/qwen3.cpp +++ b/src/models/qwen3.cpp @@ -58,9 +58,6 @@ llm_build_qwen3::llm_build_qwen3(const llama_model & model, const llm_graph_para cur = build_attn(inp_attn, model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s, Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); - if (model.layers[il].wo_s) { - cur = ggml_mul(ctx0, cur, model.layers[il].wo_s); - } } if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); diff --git a/src/models/qwen3moe.cpp b/src/models/qwen3moe.cpp index dc554b5b3..16bedba99 100644 --- a/src/models/qwen3moe.cpp +++ b/src/models/qwen3moe.cpp @@ -58,9 +58,6 @@ llm_build_qwen3moe::llm_build_qwen3moe(const llama_model & model, const llm_grap cur = build_attn(inp_attn, model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s, Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); - if (model.layers[il].wo_s) { - cur = ggml_mul(ctx0, cur, model.layers[il].wo_s); - } } if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids);