ggml : add circular tiling support to pad, for Vulkan, CUDA, and CPU (used for making seamless textures) (#16985)

* Feat: Added vulkan circular tiling support * Feat: Added cpu circular * Feat: Added cuda kernels * Added tests * Added tests * Removed non-pad operations * Removed unneded changes * removed backend non pad tests * Update test-backend-ops.cpp * Fixed comment on pad test * removed trailing whitespace * Removed unneded test in test-backend-ops * Removed removed test from calls * Update ggml/src/ggml-vulkan/vulkan-shaders/pad.comp Co-authored-by: Ruben Ortlam <picard12@live.de> * Fixed alignment * Formatting Co-authored-by: Aman Gupta <amangupta052@gmail.com> * Format pad * Format * Clang format * format * format * don't change so much stuff * clang format and update to bool * fix duplicates * don't need to fix the padding * make circular bool * duplicate again * rename vulkan to wrap around * Don't need indent * moved to const expr * removed unneded extra line break * More readable method calls * Minor wording changes * Added final newline * Update ggml/include/ggml.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update ggml/include/ggml.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Added circular pad ext tests * Gate non circular pad devices * Cleaned gating of non-circular pad devices --------- Co-authored-by: Phylliida <phylliidadev@gmail.com> Co-authored-by: Ruben Ortlam <picard12@live.de> Co-authored-by: Aman Gupta <amangupta052@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-12-06 06:07:02 -08:00
parent f334b79494
commit 09c7c50e64
11 changed files with 214 additions and 60 deletions
@@ -5604,21 +5604,24 @@ struct test_pad : public test_case {
    const std::array<int64_t, 4> ne_a;
    const int pad_0;
    const int pad_1;
+    const bool circular;

    std::string vars() override {
-        return VARS_TO_STR4(type, ne_a, pad_0, pad_1);
+        return VARS_TO_STR5(type, ne_a, pad_0, pad_1, circular);
    }

    test_pad(ggml_type type = GGML_TYPE_F32,
            std::array<int64_t, 4> ne_a = {512, 512, 1, 1},
-            int pad_0 = 1, int pad_1 = 1)
-        : type(type), ne_a(ne_a), pad_0(pad_0), pad_1(pad_1)  {}
+            int pad_0 = 1, int pad_1 = 1, bool circular = false)
+        : type(type), ne_a(ne_a), pad_0(pad_0), pad_1(pad_1), circular(circular) {}

    ggml_tensor * build_graph(ggml_context * ctx) override {
        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
        ggml_set_name(a, "a");

-        ggml_tensor * out = ggml_pad(ctx, a, pad_0, pad_1, 0, 0);
+        ggml_tensor * out = circular
+            ? ggml_pad_circular(ctx, a, pad_0, pad_1, 0, 0)
+            : ggml_pad(ctx, a, pad_0, pad_1, 0, 0);
        ggml_set_name(out, "out");

        return out;
@@ -5638,17 +5641,19 @@ struct test_pad_ext : public test_case {
    const int lp3;
    const int rp3;
    const bool v;
+    const bool circular;

    std::string vars() override {
-        return VARS_TO_STR11(type, ne_a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3, v);
+        return VARS_TO_STR12(type, ne_a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3, v, circular);
    }

    test_pad_ext(ggml_type type = GGML_TYPE_F32,
            std::array<int64_t, 4> ne_a = {512, 512, 3, 1},
            int lp0 = 1, int rp0 = 1, int lp1 = 1, int rp1 = 1,
            int lp2 = 1, int rp2 = 1, int lp3 = 1, int rp3 = 1,
-            bool v = false)
-        : type(type), ne_a(ne_a), lp0(lp0), rp0(rp0), lp1(lp1), rp1(rp1), lp2(lp2), rp2(rp2), lp3(lp3), rp3(rp3), v(v) {}
+            bool v = false, bool circular = false)
+        : type(type), ne_a(ne_a), lp0(lp0), rp0(rp0), lp1(lp1), rp1(rp1), lp2(lp2), rp2(rp2), lp3(lp3), rp3(rp3),
+          v(v), circular(circular) {}

    ggml_tensor * build_graph(ggml_context * ctx) override {
        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne_a.data());
@@ -5659,7 +5664,9 @@ struct test_pad_ext : public test_case {
            ggml_set_name(a, "view of a");
        }

-        ggml_tensor * out = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
+        ggml_tensor * out = circular
+            ? ggml_pad_ext_circular(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3)
+            : ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
        ggml_set_name(out, "out");

        return out;
@@ -7782,6 +7789,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {9, 9, 1280, 1}));
    test_cases.emplace_back(new test_acc());
    test_cases.emplace_back(new test_pad());
+    test_cases.emplace_back(new test_pad(GGML_TYPE_F32, {33, 17, 2, 1}, 4, 3, true)); // circular
    test_cases.emplace_back(new test_pad_ext());
    test_cases.emplace_back(new test_pad_reflect_1d());
    test_cases.emplace_back(new test_pad_reflect_1d(GGML_TYPE_F32, {3000, 384, 4, 1}));
@@ -7829,8 +7837,10 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_solve_tri(GGML_TYPE_F32, { 64, 64, 4, 4 }, { 300, 64, 4, 4 }));

    for (bool v : {false, true}) {
-        test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {512, 512, 1, 1}, 0, 1, 0, 1, 0, 0, 0, 0, v));
-        test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {11, 22, 33, 44}, 1, 2, 3, 4, 5, 6, 7, 8, v));
+        for (bool circular : {false, true}) {
+            test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {512, 512, 1, 1}, 0, 1, 0, 1, 0, 0, 0, 0, v, circular));
+            test_cases.emplace_back(new test_pad_ext(GGML_TYPE_F32, {11, 22, 33, 44}, 1, 2, 3, 4, 5, 6, 7, 8, v, circular));
+        }
    }

    for (int hsk : { 40, 64, 72, 80, 96, 128, 192, 256, 576 }) {