ggml : add ggml_top_k (#17365)
* ggml : add ggml_top_k * cont : add ggml_argsort_top_k * metal : add top_k support * ggml : cleanup * tests : add virtual err() function for test_case * ggml : add comments
This commit is contained in:
+127
-8
@@ -39,6 +39,7 @@
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
|
||||
size_t nels = ggml_nelements(tensor);
|
||||
@@ -269,6 +270,34 @@ static double nmse(const float * a, const float * b, size_t n) {
|
||||
return mse_a_b / mse_a_0;
|
||||
}
|
||||
|
||||
// difference between 2 integer sets (Jaccard distance, 0 - no difference, 1 - no overlap)
|
||||
static double jdst(const int32_t * a, const int32_t * b, size_t n) {
|
||||
std::unordered_map<int32_t, size_t> set_a;
|
||||
std::unordered_map<int32_t, size_t> set_b;
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
set_a[a[i]]++;
|
||||
set_b[b[i]]++;
|
||||
}
|
||||
|
||||
size_t diff = 0;
|
||||
|
||||
for (const auto & p : set_a) {
|
||||
const int64_t na = p.second;
|
||||
const int64_t nb = set_b.find(p.first) != set_b.end() ? set_b.at(p.first) : 0;
|
||||
|
||||
diff += std::abs(na - nb);
|
||||
}
|
||||
|
||||
for (const auto & p : set_b) {
|
||||
if (set_a.find(p.first) == set_a.end()) {
|
||||
diff += p.second;
|
||||
}
|
||||
}
|
||||
|
||||
return (double) diff / (2*n);
|
||||
}
|
||||
|
||||
// maximum absolute asymmetry between a and b
|
||||
// asymmetry: (a - b) / (a + b)
|
||||
// This is more stable than relative error if one of the values fluctuates towards zero.
|
||||
@@ -1051,6 +1080,14 @@ struct test_case {
|
||||
return 1e-4;
|
||||
}
|
||||
|
||||
virtual double max_err() {
|
||||
return max_nmse_err();
|
||||
}
|
||||
|
||||
virtual double err(const float * a, const float * b, size_t n) {
|
||||
return nmse(a, b, n);
|
||||
}
|
||||
|
||||
virtual float grad_eps() {
|
||||
return 1e-1f;
|
||||
}
|
||||
@@ -1257,16 +1294,16 @@ struct test_case {
|
||||
// compare
|
||||
struct callback_userdata {
|
||||
bool ok;
|
||||
double max_err;
|
||||
test_case * tc;
|
||||
ggml_backend_t backend1;
|
||||
ggml_backend_t backend2;
|
||||
};
|
||||
|
||||
callback_userdata ud {
|
||||
true,
|
||||
max_nmse_err(),
|
||||
this,
|
||||
backend1,
|
||||
backend2
|
||||
backend2,
|
||||
};
|
||||
|
||||
auto callback = [](int index, ggml_tensor * t1, ggml_tensor * t2, void * user_data) -> bool {
|
||||
@@ -1314,9 +1351,9 @@ struct test_case {
|
||||
}
|
||||
}
|
||||
|
||||
double err = nmse(f1.data(), f2.data(), f1.size());
|
||||
if (err > ud->max_err) {
|
||||
printf("[%s] NMSE = %.9f > %.9f ", ggml_op_desc(t1), err, ud->max_err);
|
||||
double err = ud->tc->err(f1.data(), f2.data(), f1.size());
|
||||
if (err > ud->tc->max_err()) {
|
||||
printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err());
|
||||
//for (int i = 0; i < (int) f1.size(); i++) {
|
||||
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
||||
//}
|
||||
@@ -4943,7 +4980,71 @@ struct test_argsort : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
struct test_topk_moe: public test_case {
|
||||
// GGML_OP_TOP_K
|
||||
struct test_top_k : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const int k;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR3(type, ne, k);
|
||||
}
|
||||
|
||||
test_top_k(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {16, 10, 10, 10},
|
||||
int k = 4)
|
||||
: type(type), ne(ne), k(k) {}
|
||||
|
||||
double max_err() override {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
double err(const float * a, const float * b, size_t n) override {
|
||||
std::vector<int32_t> ia(n);
|
||||
std::vector<int32_t> ib(n);
|
||||
|
||||
double diff = 0.0f;
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
ia[i] = (int32_t) a[i];
|
||||
ib[i] = (int32_t) b[i];
|
||||
|
||||
// penalize the result if the data is not integer valued
|
||||
diff += std::fabs(a[i] - ia[i]);
|
||||
diff += std::fabs(b[i] - ib[i]);
|
||||
}
|
||||
|
||||
return diff + jdst(ia.data(), ib.data(), n);
|
||||
}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_name(a, "a");
|
||||
|
||||
ggml_tensor * out = ggml_top_k(ctx, a, k);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
std::random_device rd;
|
||||
std::default_random_engine rng(rd());
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
// initialize with unique values to avoid ties
|
||||
for (int64_t r = 0; r < ggml_nrows(t); r++) {
|
||||
std::vector<float> data(t->ne[0]);
|
||||
for (int i = 0; i < t->ne[0]; i++) {
|
||||
data[i] = i;
|
||||
}
|
||||
std::shuffle(data.begin(), data.end(), rng);
|
||||
ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(float));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct test_topk_moe : public test_case {
|
||||
const std::array<int64_t, 4> ne;
|
||||
const int n_expert_used;
|
||||
const bool with_norm;
|
||||
@@ -4976,7 +5077,7 @@ struct test_topk_moe: public test_case {
|
||||
|
||||
ggml_tensor * logits = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
|
||||
ggml_tensor * probs = delayed_softmax ? logits : ggml_soft_max(ctx, logits);
|
||||
ggml_tensor * selected_experts = ggml_top_k(ctx, probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
|
||||
ggml_tensor * out = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||
|
||||
@@ -7534,6 +7635,23 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {2, 8, 8192, 1}, order)); // bailingmoe2 (group selection)
|
||||
}
|
||||
|
||||
for (int k : {1, 2, 3, 7, 15}) {
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {16, 10, 10, 10}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {60, 10, 10, 10}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {1023, 2, 1, 3}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {1024, 2, 1, 3}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {1025, 2, 1, 3}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {16384, 1, 1, 1}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2047, 2, 1, 3}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2048, 2, 1, 3}, k));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2049, 2, 1, 3}, k));
|
||||
}
|
||||
|
||||
// exhaustive top_k tests
|
||||
//for (int i = 1; i < 9999; ++i) {
|
||||
// test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {i, 2, 1, 3}, rand() % i + 1));
|
||||
//}
|
||||
|
||||
for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
|
||||
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode));
|
||||
test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true));
|
||||
@@ -7914,6 +8032,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {65000, 16, 1, 1}, 40));
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user