ggml-cuda: Repost of 21896: Blackwell native NVFP4 support (#22196)
This commit is contained in:
@@ -3815,7 +3815,7 @@ struct test_mul_mat : public test_case {
|
||||
|
||||
double max_nmse_err(ggml_backend_t backend) override {
|
||||
// for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
|
||||
if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
|
||||
if ((type_a == GGML_TYPE_MXFP4 || type_a == GGML_TYPE_NVFP4) && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
|
||||
return 2e-2;
|
||||
}
|
||||
return max_nmse_err();
|
||||
@@ -3951,7 +3951,7 @@ struct test_mul_mat_id : public test_case {
|
||||
|
||||
double max_nmse_err(ggml_backend_t backend) override {
|
||||
// for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
|
||||
if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
|
||||
if ((type_a == GGML_TYPE_MXFP4 || type_a == GGML_TYPE_NVFP4) && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
|
||||
return 2e-2;
|
||||
}
|
||||
return max_nmse_err();
|
||||
|
||||
Reference in New Issue
Block a user