llama: dynamic head_dim and n_rot for SWA (#20301)

* llama: dynamic head_dim and n_rot for SWA

* also add gguf_writer wrappers

* fix build

* build_rope_shift arg reorder
This commit is contained in:
Xuan-Son Nguyen
2026-03-09 22:22:39 +01:00
committed by GitHub
parent 23fbfcb1ad
commit 59db9a357d
112 changed files with 419 additions and 346 deletions
+4 -4
View File
@@ -4,9 +4,9 @@
llm_build_qwen35moe::llm_build_qwen35moe(const llama_model & model, const llm_graph_params & params) :
llm_build_delta_net_base(params), model(model) {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_head = hparams.n_embd_head_v();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
int sections[4];
std::copy(std::begin(hparams.rope_sections), std::begin(hparams.rope_sections) + 4, sections);
@@ -117,8 +117,8 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn(
ggml_tensor * inp_pos,
int * sections,
int il) {
const int64_t n_embd_head = hparams.n_embd_head_v;
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
const int64_t n_embd_head = hparams.n_embd_head_v();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
// Order: joint QG projection, QG split, Q norm, KV projection, K norm, RoPE, attention