ggml-cuda: Repost of 21896: Blackwell native NVFP4 support (#22196)

This commit is contained in:
Michael Wand
2026-04-28 15:47:42 -07:00
committed by GitHub
parent 7b8443ac78
commit fc2b0053ff
8 changed files with 318 additions and 130 deletions
+2 -2
View File
@@ -3815,7 +3815,7 @@ struct test_mul_mat : public test_case {
double max_nmse_err(ggml_backend_t backend) override {
// for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
if ((type_a == GGML_TYPE_MXFP4 || type_a == GGML_TYPE_NVFP4) && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
return 2e-2;
}
return max_nmse_err();
@@ -3951,7 +3951,7 @@ struct test_mul_mat_id : public test_case {
double max_nmse_err(ggml_backend_t backend) override {
// for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
if ((type_a == GGML_TYPE_MXFP4 || type_a == GGML_TYPE_NVFP4) && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
return 2e-2;
}
return max_nmse_err();