CUDA: fuse relu + sqr (#22249)
This commit is contained in:
@@ -3522,6 +3522,40 @@ struct test_add_rms_norm : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_UNARY(RELU) + GGML_OP_SQR (fused operation)
|
||||
struct test_relu_sqr : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "RELU_SQR";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR2(type, ne);
|
||||
}
|
||||
|
||||
test_relu_sqr(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {128, 2, 2, 2})
|
||||
: type(type), ne(ne) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * r = ggml_relu(ctx, a);
|
||||
ggml_set_name(r, "relu");
|
||||
|
||||
ggml_tensor * out = ggml_sqr(ctx, r);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SSM_CONV
|
||||
struct test_ssm_conv : public test_case {
|
||||
const ggml_type type;
|
||||
@@ -7311,6 +7345,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
}
|
||||
}
|
||||
|
||||
// fused relu + sqr (squared ReLU)
|
||||
for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
||||
test_cases.emplace_back(new test_relu_sqr(type, { 128, 2, 2, 2 }));
|
||||
test_cases.emplace_back(new test_relu_sqr(type, { 5, 7, 11, 13 }));
|
||||
}
|
||||
|
||||
// glu ops
|
||||
for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) {
|
||||
for (int v : {0, 1}) {
|
||||
|
||||
Reference in New Issue
Block a user