model : support NVFP4 tensors for Gemma4 (#21971)

* support nvfp4 tensors for Gemma4

* add wo_s to build_attn

* add wo_s to build_attn

* fix glm4
This commit is contained in:
Sigbjørn Skjæret
2026-04-16 16:51:47 +02:00
committed by GitHub
parent b572d1ecd6
commit f772f6e434
105 changed files with 149 additions and 148 deletions
+1 -1
View File
@@ -179,7 +179,7 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn(
const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f / sqrtf(float(n_embd_head)) : hparams.f_attention_scale;
cur = build_attn(inp,
nullptr, nullptr,
nullptr, nullptr, nullptr,
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
cb(cur, "attn_pregate", il);