model: Add PaddleOCR-VL model support (#18825)

* support PaddleOCR-VL

* clip: update PaddleOCR model loader parameters to prevent OOM during warmup

* [update] add paddleocr vl text model instead of ernie4.5

* [update] restore change of minicpmv

* [update] format

* [update] format

* [update] positions and patch merge permute

* [update] mtmd_decode_use_mrope for paddleocr

* [update] image min/max pixels

* [update] remove set_limit_image_tokens

* upate: preprocess without padding

* clean up

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Update convert_hf_to_gguf.py

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
megemini
2026-02-20 00:05:25 +08:00
committed by GitHub
parent abb9f3c42b
commit 237958db33
16 changed files with 327 additions and 0 deletions
+5
View File
@@ -325,6 +325,10 @@ struct mtmd_context {
img_beg = "<|begin_of_image|>";
img_end = "<|end_of_image|>";
} else if (proj == PROJECTOR_TYPE_PADDLEOCR) {
// <|IMAGE_START|> ... (image embeddings) ... <|IMAGE_END|>
img_beg = "<|IMAGE_START|>";
img_end = "<|IMAGE_END|>";
}
}
@@ -890,6 +894,7 @@ bool mtmd_decode_use_mrope(mtmd_context * ctx) {
case PROJECTOR_TYPE_QWEN25VL:
case PROJECTOR_TYPE_QWEN3VL:
case PROJECTOR_TYPE_GLM4V:
case PROJECTOR_TYPE_PADDLEOCR:
return true;
default:
return false;