add fast mat-vec kernels for i-quants (#22344)

This commit is contained in:
Rithik Sharma
2026-04-27 08:25:45 -07:00
committed by GitHub
parent 4414c04b9a
commit 665abc6097
3 changed files with 543 additions and 0 deletions
+11
View File
@@ -1391,6 +1391,17 @@ static webgpu_encoded_op ggml_webgpu_mul_mat(webgpu_context & ctx,
case GGML_TYPE_Q2_K:
use_fast = true;
break;
case GGML_TYPE_IQ1_S:
case GGML_TYPE_IQ1_M:
case GGML_TYPE_IQ2_XXS:
case GGML_TYPE_IQ2_XS:
case GGML_TYPE_IQ2_S:
case GGML_TYPE_IQ3_XXS:
case GGML_TYPE_IQ3_S:
case GGML_TYPE_IQ4_NL:
case GGML_TYPE_IQ4_XS:
use_fast = is_vec;
break;
default:
break;
}