llama: dynamic head_dim and n_rot for SWA (#20301)

* llama: dynamic head_dim and n_rot for SWA * also add gguf_writer wrappers * fix build * build_rope_shift arg reorder
2026-03-09 22:22:39 +01:00
parent 23fbfcb1ad
commit 59db9a357d
112 changed files with 419 additions and 346 deletions
@@ -100,8 +100,8 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn(
        ggml_tensor *             cur,
        ggml_tensor *             inp_pos,
        int                       il) {
-    const int64_t n_embd_head = hparams.n_embd_head_v;
-    GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
+    const int64_t n_embd_head = hparams.n_embd_head_v();
+    GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());

    // Order: joint QG projection, QG split, Q norm, KV projection, K norm, RoPE, attention