cuda : fix "V is K view" check for non-unified KV cache (#19145)

This commit is contained in:
Georgi Gerganov
2026-01-28 09:15:27 +02:00
committed by GitHub
parent 2eee6c866c
commit 631cbfcc7a
3 changed files with 5 additions and 4 deletions
+3 -2
View File
@@ -1295,9 +1295,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_env("LLAMA_ARG_CACHE_RAM").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI}));
add_opt(common_arg(
{"-kvu", "--kv-unified"},
{"-no-kvu", "--no-kv-unified"},
"use single unified KV buffer shared across all sequences (default: enabled if number of slots is auto)",
[](common_params & params) {
params.kv_unified = true;
[](common_params & params, bool value) {
params.kv_unified = value;
}
).set_env("LLAMA_ARG_KV_UNIFIED").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_BATCHED}));
add_opt(common_arg(