llama: dynamic head_dim and n_rot for SWA (#20301)
* llama: dynamic head_dim and n_rot for SWA * also add gguf_writer wrappers * fix build * build_rope_shift arg reorder
This commit is contained in:
@@ -4,9 +4,9 @@
|
||||
|
||||
llm_build_qwen35moe::llm_build_qwen35moe(const llama_model & model, const llm_graph_params & params) :
|
||||
llm_build_delta_net_base(params), model(model) {
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v();
|
||||
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
|
||||
|
||||
int sections[4];
|
||||
std::copy(std::begin(hparams.rope_sections), std::begin(hparams.rope_sections) + 4, sections);
|
||||
@@ -117,8 +117,8 @@ ggml_tensor * llm_build_qwen35moe ::build_layer_attn(
|
||||
ggml_tensor * inp_pos,
|
||||
int * sections,
|
||||
int il) {
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v();
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
|
||||
|
||||
// Order: joint QG projection, QG split, Q norm, KV projection, K norm, RoPE, attention
|
||||
|
||||
|
||||
Reference in New Issue
Block a user