models : deduplicate delta-net graphs for Qwen family (#19597)

* models : add llm_build_delta_net_base

* cont : keep qwen35 and qwen35moe graphs intact

* cont : add comments
This commit is contained in:
Georgi Gerganov
2026-02-16 14:35:04 +02:00
committed by GitHub
parent d5dfc33027
commit cc45f2ada6
16 changed files with 428 additions and 392 deletions
+3 -2
View File
@@ -1,10 +1,11 @@
#include "ggml.h"
#include "models.h"
#include "llama-memory-recurrent.h"
#define CHUNK_SIZE 64
llm_build_qwen35moe::llm_build_qwen35moe(const llama_model & model, const llm_graph_params & params) :
llm_graph_context_mamba(params), model(model) {
llm_graph_context(params), model(model) {
const int64_t n_embd_head = hparams.n_embd_head_v;
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);