tests: enable kv_unified to prevent cuda oom error on rtx 2060 (#20645)
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
@@ -89,6 +89,7 @@ struct test_context {
|
|||||||
cparams.n_batch = 512;
|
cparams.n_batch = 512;
|
||||||
cparams.samplers = configs.data();
|
cparams.samplers = configs.data();
|
||||||
cparams.n_samplers = configs.size();
|
cparams.n_samplers = configs.size();
|
||||||
|
cparams.kv_unified = true;
|
||||||
|
|
||||||
// If n_seq_max is not specified, calculate it from configs
|
// If n_seq_max is not specified, calculate it from configs
|
||||||
if (n_seq_max < 0) {
|
if (n_seq_max < 0) {
|
||||||
|
|||||||
Reference in New Issue
Block a user