models : fix the attn_factor for mistral3 graphs + improve consistency (#17945)
* models : fix the attn_factor for mistral3 graphs * cont : rework attn_factor correction logic * cont : make deepseek2 consistent * cont : add TODO * cont : special-case DSv2 * cont : revert Mistral 3 Large changes * cont : fix DS2 to use the original attn_factor * cont : minor comments
This commit is contained in:
+1
-1
@@ -574,7 +574,7 @@ llm_graph_context::llm_graph_context(const llm_graph_params & params) :
|
||||
freq_base (cparams.rope_freq_base),
|
||||
freq_scale (cparams.rope_freq_scale),
|
||||
ext_factor (cparams.yarn_ext_factor),
|
||||
attn_factor (cparams.yarn_attn_factor),
|
||||
attn_factor (llama_hparams::yarn_attn_factor_adjust(cparams.yarn_attn_factor, cparams.rope_freq_scale, cparams.yarn_ext_factor)),
|
||||
beta_fast (cparams.yarn_beta_fast),
|
||||
beta_slow (cparams.yarn_beta_slow),
|
||||
norm_eps (hparams.f_norm_eps),
|
||||
|
||||
Reference in New Issue
Block a user