fix vulkan ggml_acc only works in 3d but not 4d (#19426)

* fix vulkan ggml_acc only works in 3d but not 4d

* removed clamp in test_acc_block

* use the correct stride and its test case

* cuda : fix "supports op" condition

* change src0 to src1 in ggml_vk_acc. Update acc.comp with jeffbolznv\'s suggestion except to keep the boundary check

* version without boundary check

* revert back to boundary check version

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
ymcki
2026-02-13 20:31:37 +08:00
committed by GitHub
parent b2ecc0cdb4
commit 0e21991472
4 changed files with 60 additions and 23 deletions
+41 -8
View File
@@ -5847,26 +5847,46 @@ struct test_acc : public test_case {
const ggml_type type;
const std::array<int64_t, 4> ne_a;
const std::array<int64_t, 4> ne_b;
const int64_t stride_dim;
std::string vars() override {
return VARS_TO_STR3(type, ne_a, ne_b);
return VARS_TO_STR4(type, ne_a, ne_b, stride_dim);
}
test_acc(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne_a = {256, 17, 1, 1},
std::array<int64_t, 4> ne_b = {256, 16, 1, 1})
: type(type), ne_a(ne_a), ne_b(ne_b) {}
std::array<int64_t, 4> ne_a = {256, 17, 2, 3},
std::array<int64_t, 4> ne_b = {256, 16, 2, 3},
uint64_t stride_dim = -1)
: type(type), ne_a(ne_a), ne_b(ne_b), stride_dim(stride_dim) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
ggml_set_param(a);
ggml_set_name(a, "a");
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne_b.data());
ggml_set_param(b);
ggml_tensor * b;
if (stride_dim == 1 || stride_dim == 2 || stride_dim == 3) {
// Create a larger tensor and take a view at a non-zero offset.
// This tests that the backend correctly handles b's data offset
std::array<int64_t, 4> ne_b_pad = {ne_b[0], ne_b[1], ne_b[2], ne_b[3]};
ne_b_pad[stride_dim] += 1;
ggml_tensor * b_pad = ggml_new_tensor(ctx, type, 4, ne_b_pad.data());
ggml_set_param(b_pad);
ggml_set_name(b_pad, "b_pad");
// View that skips the first row, so b has a non-zero byte offset
b = ggml_view_4d(ctx, b_pad,
ne_b[0], ne_b[1], ne_b[2], ne_b[3],
b_pad->nb[1], b_pad->nb[2], b_pad->nb[3],
b_pad->nb[1]);
} else {
b = ggml_new_tensor(ctx, type, 4, ne_b.data());
ggml_set_param(b);
}
ggml_set_name(b, "b");
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], b->nb[1]);
// When ne_b[0] < ne_a[0], a->nb[1] != b->nb[1], so the stride
// parameters to ggml_acc don't match b's natural stride.
ggml_tensor * out = ggml_acc(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], 0);
ggml_set_name(out, "out");
return out;
@@ -8170,7 +8190,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {64, 64, 320, 1}));
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {9, 9, 1280, 1}));
test_cases.emplace_back(new test_acc());
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 1, 1}, {256, 16, 1, 1}, -1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, -1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, -1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, 1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, 2));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {64, 16, 2, 3}, 3));
test_cases.emplace_back(new test_pad());
test_cases.emplace_back(new test_pad(GGML_TYPE_F32, {33, 17, 2, 1}, 4, 3, true)); // circular
test_cases.emplace_back(new test_pad_ext());
@@ -8605,6 +8630,14 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1, 1)); // generate
// acc
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 1, 1}, {256, 16, 1, 1}, -1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, -1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, -1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {256, 16, 2, 3}, 1));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {128, 16, 2, 3}, 2));
test_cases.emplace_back(new test_acc(GGML_TYPE_F32, {256, 17, 2, 3}, {64, 16, 2, 3}, 3));
return test_cases;
}