model: support GLM4V vision encoder (#18042)
* convert ok * no deepstack * less new tensors * cgraph ok * add mrope for text model * faster patch merger * add GGML_ROPE_TYPE_MRNORM * add support for metal * move glm4v do dedicated graph * convert: add norm_embd * clip: add debugging fn * working correctly * fix style * use bicubic * fix mrope metal * improve cpu * convert to neox ordering on conversion * revert backend changes * force stop if using old weight * support moe variant * fix conversion * fix convert (2) * Update tools/mtmd/clip-graph.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * process mrope_section on TextModel base class * resolve conflict merge --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -56,3 +56,8 @@ struct clip_graph_whisper_enc : clip_graph {
|
||||
clip_graph_whisper_enc(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
};
|
||||
|
||||
struct clip_graph_glm4v : clip_graph {
|
||||
clip_graph_glm4v(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
|
||||
ggml_cgraph * build() override;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user