llama: dynamic head_dim and n_rot for SWA (#20301)
* llama: dynamic head_dim and n_rot for SWA * also add gguf_writer wrappers * fix build * build_rope_shift arg reorder
This commit is contained in:
@@ -4,10 +4,10 @@ llm_build_qwen3vl::llm_build_qwen3vl(const llama_model & model, const llm_graph_
|
||||
const size_t n_deepstack_layers = hparams.n_deepstack_layers;
|
||||
|
||||
const int64_t n_embd = hparams.n_embd;
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v;
|
||||
const int64_t n_embd_head = hparams.n_embd_head_v();
|
||||
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||
GGML_ASSERT(n_embd_head == hparams.n_rot);
|
||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
|
||||
GGML_ASSERT(n_embd_head == n_rot);
|
||||
|
||||
ggml_tensor * cur;
|
||||
ggml_tensor * inpL;
|
||||
|
||||
Reference in New Issue
Block a user