model: support GLM-OCR (#19677)
* model: support GLM-OCR * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
+11
-3
@@ -342,9 +342,17 @@ ggml_tensor * clip_graph::build_vit(
|
||||
/* nb2 */ cur->nb[1],
|
||||
/* offset */ ggml_row_size(cur->type, 2 * n_embd));
|
||||
|
||||
// TODO: q/k norm requires row size == n_embd, while here it's d_head
|
||||
// we can add support in the future if needed
|
||||
GGML_ASSERT(layer.q_norm == nullptr && layer.k_norm == nullptr);
|
||||
if (layer.q_norm) {
|
||||
GGML_ASSERT(layer.q_norm->ne[0] == Qcur->ne[0]);
|
||||
Qcur = build_norm(Qcur, layer.q_norm, NULL, norm_t, eps, il);
|
||||
cb(Qcur, "Qcur_norm", il);
|
||||
}
|
||||
|
||||
if (layer.k_norm) {
|
||||
GGML_ASSERT(layer.k_norm->ne[0] == Kcur->ne[0]);
|
||||
Kcur = build_norm(Kcur, layer.k_norm, NULL, norm_t, eps, il);
|
||||
cb(Kcur, "Kcur_norm", il);
|
||||
}
|
||||
|
||||
} else {
|
||||
// separate q, k, v
|
||||
|
||||
+16
-14
@@ -2,7 +2,6 @@
|
||||
|
||||
ggml_cgraph * clip_graph_glm4v::build() {
|
||||
GGML_ASSERT(model.patch_bias != nullptr);
|
||||
GGML_ASSERT(model.position_embeddings != nullptr);
|
||||
GGML_ASSERT(model.class_embedding == nullptr);
|
||||
|
||||
const int batch_size = 1;
|
||||
@@ -45,19 +44,22 @@ ggml_cgraph * clip_graph_glm4v::build() {
|
||||
// pos-conv norm
|
||||
inp = build_norm(inp, model.norm_embd_w, model.norm_embd_b, norm_t, eps, -1);
|
||||
|
||||
// calculate absolute position embedding and apply
|
||||
ggml_tensor * learned_pos_embd = resize_position_embeddings(GGML_SCALE_MODE_BICUBIC);
|
||||
learned_pos_embd = ggml_cont_4d(
|
||||
ctx0, learned_pos_embd,
|
||||
n_embd * 2, n_patches_x / 2, n_patches_y, batch_size);
|
||||
learned_pos_embd = ggml_reshape_4d(
|
||||
ctx0, learned_pos_embd,
|
||||
n_embd * 2, n_patches_x / 2, 2, batch_size * (n_patches_y / 2));
|
||||
learned_pos_embd = ggml_permute(ctx0, learned_pos_embd, 0, 2, 1, 3);
|
||||
learned_pos_embd = ggml_cont_3d(
|
||||
ctx0, learned_pos_embd,
|
||||
n_embd, n_patches_x * n_patches_y, batch_size);
|
||||
cb(learned_pos_embd, "learned_pos_embd", -1);
|
||||
ggml_tensor * learned_pos_embd = nullptr;
|
||||
// Note: GLM-OCR does not have learned position embeddings
|
||||
if (model.position_embeddings != nullptr) {
|
||||
learned_pos_embd = resize_position_embeddings(GGML_SCALE_MODE_BICUBIC);
|
||||
learned_pos_embd = ggml_cont_4d(
|
||||
ctx0, learned_pos_embd,
|
||||
n_embd * 2, n_patches_x / 2, n_patches_y, batch_size);
|
||||
learned_pos_embd = ggml_reshape_4d(
|
||||
ctx0, learned_pos_embd,
|
||||
n_embd * 2, n_patches_x / 2, 2, batch_size * (n_patches_y / 2));
|
||||
learned_pos_embd = ggml_permute(ctx0, learned_pos_embd, 0, 2, 1, 3);
|
||||
learned_pos_embd = ggml_cont_3d(
|
||||
ctx0, learned_pos_embd,
|
||||
n_embd, n_patches_x * n_patches_y, batch_size);
|
||||
cb(learned_pos_embd, "learned_pos_embd", -1);
|
||||
}
|
||||
|
||||
auto add_pos = [&](ggml_tensor * cur, const clip_layer &) {
|
||||
return ggml_rope_multi(
|
||||
|
||||
Reference in New Issue
Block a user