model : support NVFP4 tensors for Gemma4 (#21971)

* support nvfp4 tensors for Gemma4

* add wo_s to build_attn

* add wo_s to build_attn

* fix glm4
This commit is contained in:
Sigbjørn Skjæret
2026-04-16 16:51:47 +02:00
committed by GitHub
parent b572d1ecd6
commit f772f6e434
105 changed files with 149 additions and 148 deletions
+1 -2
View File
@@ -1,6 +1,5 @@
#include "models.h"
llm_build_qwen::llm_build_qwen(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
const int64_t n_embd_head = hparams.n_embd_head_v();
@@ -56,7 +55,7 @@ llm_build_qwen::llm_build_qwen(const llama_model & model, const llm_graph_params
cb(Vcur, "Vcur", il);
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
model.layers[il].wo, NULL, model.layers[il].wo_s,
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
}
if (il == n_layer - 1 && inp_out_ids) {