models : dedup Kimi Linear delta net implementation (#19668)
* models : add llm_build_delta_net_base * cont : keep qwen35 and qwen35moe graphs intact * cont : add comments [no ci] * add kimi linear to delta-net-base * removed unnecessary ggml_cont from g_exp_t * removed ggml_cont from g_diff_exp_t. moved ggml_cont for o to kimi-linear.cpp * removed unnecessary diag mask * cont : simplify * cont : avoid graph splits * scale q after mul instead of beginning * scale q after mul instead of beginning * identical ppl * cont : fix scale and decay mask * minor : remove TODO --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -306,8 +306,6 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn_linear(
|
||||
|
||||
ggml_tensor * beta = ggml_sigmoid(ctx0, b);
|
||||
|
||||
beta = ggml_reshape_4d(ctx0, beta, num_v_heads, 1, n_seq_tokens, n_seqs);
|
||||
|
||||
// Reshape a to merge head dimensions: [batch, seq_len, num_k_heads, num_v_heads/num_k_heads] -> [batch, seq_len, num_v_heads]
|
||||
ggml_tensor * alpha = ggml_cont_3d(ctx0, a, num_v_heads, n_seq_tokens, n_seqs);
|
||||
|
||||
@@ -318,6 +316,9 @@ ggml_tensor * llm_build_qwen3next::build_layer_attn_linear(
|
||||
ggml_tensor * gate = ggml_mul(ctx0, alpha_softplus, model.layers[il].ssm_a); // -A_log.exp() * softplus
|
||||
cb(gate, "gate", il);
|
||||
|
||||
beta = ggml_reshape_4d(ctx0, beta, 1, num_v_heads, n_seq_tokens, n_seqs);
|
||||
gate = ggml_reshape_4d(ctx0, gate, 1, num_v_heads, n_seq_tokens, n_seqs);
|
||||
|
||||
// Get convolution states from cache
|
||||
ggml_tensor * conv_states_all = mctx_cur->get_r_l(il);
|
||||
ggml_tensor * ssm_states_all = mctx_cur->get_s_l(il);
|
||||
|
||||
Reference in New Issue
Block a user