CUDA: fix FA kernel selection logic (#21271)

This commit is contained in:
Johannes Gäßler
2026-04-01 21:28:19 +02:00
committed by GitHub
parent 6de97b9d3e
commit 86221cf6da
+7
View File
@@ -340,7 +340,14 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
case 128:
case 112:
case 256:
if (V->ne[0] != K->ne[0]) {
return BEST_FATTN_KERNEL_NONE;
}
break;
case 512:
if (V->ne[0] != K->ne[0]) {
return BEST_FATTN_KERNEL_NONE;
}
if (!gqa_opt_applies) {
return BEST_FATTN_KERNEL_NONE;
}