ggml : add circular tiling support to pad, for Vulkan, CUDA, and CPU (used for making seamless textures) (#16985)

* Feat: Added vulkan circular tiling support

* Feat: Added cpu circular

* Feat: Added cuda kernels

* Added tests

* Added tests

* Removed non-pad operations

* Removed unneded changes

* removed backend non pad tests

* Update test-backend-ops.cpp

* Fixed comment on pad test

* removed trailing whitespace

* Removed unneded test in test-backend-ops

* Removed removed test from calls

* Update ggml/src/ggml-vulkan/vulkan-shaders/pad.comp

Co-authored-by: Ruben Ortlam <picard12@live.de>

* Fixed alignment

* Formatting

Co-authored-by: Aman Gupta <amangupta052@gmail.com>

* Format pad

* Format

* Clang format

* format

* format

* don't change so much stuff

* clang format and update to bool

* fix duplicates

* don't need to fix the padding

* make circular bool

* duplicate again

* rename vulkan to wrap around

* Don't need indent

* moved to const expr

* removed unneded extra line break

* More readable method calls

* Minor wording changes

* Added final newline

* Update ggml/include/ggml.h

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Update ggml/include/ggml.h

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* Added circular pad ext tests

* Gate non circular pad devices

* Cleaned gating of non-circular pad devices

---------

Co-authored-by: Phylliida <phylliidadev@gmail.com>
Co-authored-by: Ruben Ortlam <picard12@live.de>
Co-authored-by: Aman Gupta <amangupta052@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Phylliida Dev
2025-12-06 06:07:02 -08:00
committed by GitHub
parent f334b79494
commit 09c7c50e64
11 changed files with 214 additions and 60 deletions
+20 -10
View File
@@ -5604,21 +5604,24 @@ struct test_pad : public test_case {
const std::array<int64_t, 4> ne_a;
const int pad_0;
const int pad_1;
const bool circular;
std::string vars() override {
return VARS_TO_STR4(type, ne_a, pad_0, pad_1);
return VARS_TO_STR5(type, ne_a, pad_0, pad_1, circular);
}
test_pad(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne_a = {512, 512, 1, 1},
int pad_0 = 1, int pad_1 = 1)
: type(type), ne_a(ne_a), pad_0(pad_0), pad_1(pad_1) {}
int pad_0 = 1, int pad_1 = 1, bool circular = false)
: type(type), ne_a(ne_a), pad_0(pad_0), pad_1(pad_1), circular(circular) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
ggml_set_name(a, "a");
ggml_tensor * out = ggml_pad(ctx, a, pad_0, pad_1, 0, 0);
ggml_tensor * out = circular
? ggml_pad_circular(ctx, a, pad_0, pad_1, 0, 0)
: ggml_pad(ctx, a, pad_0, pad_1, 0, 0);
ggml_set_name(out, "out");
return out;
@@ -5638,17 +5641,19 @@ struct test_pad_ext : public test_case {
const int lp3;
const int rp3;
const bool v;
const bool circular;
std::string vars() override {
return VARS_TO_STR11(type, ne_a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3, v);
return VARS_TO_STR12(type, ne_a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3, v, circular);
}
test_pad_ext(ggml_type type = GGML_TYPE_F32,
std::array<int64_t, 4> ne_a = {512, 512, 3, 1},
int lp0 = 1, int rp0 = 1, int lp1 = 1, int rp1 = 1,
int lp2 = 1, int rp2 = 1, int lp3 = 1, int rp3 = 1,
bool v = false)
: type(type), ne_a(ne_a), lp0(lp0), rp0(rp0), lp1(lp1), rp1(rp1), lp2(lp2), rp2(rp2), lp3(lp3), rp3(rp3), v(v) {}
bool v = false, bool circular = false)
: type(type), ne_a(ne_a), lp0(lp0), rp0(rp0), lp1(lp1), rp1(rp1), lp2(lp2), rp2(rp2), lp3(lp3), rp3(rp3),
v(v), circular(circular) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
@@ -5659,7 +5664,9 @@ struct test_pad_ext : public test_case {
ggml_set_name(a, "view of a");
}
ggml_tensor * out = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
ggml_tensor * out = circular
? ggml_pad_ext_circular(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3)
: ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
ggml_set_name(out, "out");
return out;
@@ -7782,6 +7789,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {9, 9, 1280, 1}));
test_cases.emplace_back(new test_acc());
test_cases.emplace_back(new test_pad());
test_cases.emplace_back(new test_pad(GGML_TYPE_F32, {33, 17, 2, 1}, 4, 3, true)); // circular
test_cases.emplace_back(new test_pad_ext());
test_cases.emplace_back(new test_pad_reflect_1d());
test_cases.emplace_back(new test_pad_reflect_1d(GGML_TYPE_F32, {3000, 384, 4, 1}));
@@ -7829,8 +7837,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 }));
for (bool v : {false, true}) {
test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {512, 512, 1, 1}, 0, 1, 0, 1, 0, 0, 0, 0, v));
test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {11, 22, 33, 44}, 1, 2, 3, 4, 5, 6, 7, 8, v));
for (bool circular : {false, true}) {
test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {512, 512, 1, 1}, 0, 1, 0, 1, 0, 0, 0, 0, v, circular));
test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {11, 22, 33, 44}, 1, 2, 3, 4, 5, 6, 7, 8, v, circular));
}
}
for (int hsk : { 40, 64, 72, 80, 96, 128, 192, 256, 576 }) {