mtmd: Add dynamic high-resolution image preprocessing for InternVL model (#20847)

* added support for internvl's dynamic high-resolution (Qianfan-OCR needed) * add min/max dynamic patch to gguf meta * clean up * simplified handling min/max dynamic patch * reuse llava_uhd logic for slice images * provide default values for older models * flake8 * prevent writing 0 value to gguf * remove duplicated resolution candidates with a better algorithm * fix indentation * format * add protection from divide by zero * change to 0 to be safe --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2026-03-22 20:06:30 -04:00
parent d3ac030a5d
commit ec2b787ebe
7 changed files with 84 additions and 4 deletions
@@ -4273,6 +4273,16 @@ class Qwen25OmniModel(Qwen2VLVisionModel):

@ModelBase.register("InternVisionModel")
 class InternVisionModel(MmprojModel):
+
+    min_dynamic_tiles: int = 0
+    max_dynamic_tiles: int = 0
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        assert self.hparams_vision is not None
+        self.min_dynamic_tiles = self.global_config.get("min_dynamic_patch", 0)
+        self.max_dynamic_tiles = self.global_config.get("max_dynamic_patch", 0)
+
    def set_gguf_parameters(self):
        assert self.hparams_vision is not None
        if isinstance(self.hparams_vision['image_size'], list):
@@ -4295,6 +4305,11 @@ class InternVisionModel(MmprojModel):
        downsample_ratio = self.global_config.get("downsample_ratio")
        assert downsample_ratio is not None
        self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / downsample_ratio))
+        # older models may not have min/max_dynamic_patch in config
+        if self.min_dynamic_tiles > 0:
+            self.gguf_writer.add_vision_preproc_min_tiles(self.min_dynamic_tiles)
+        if self.max_dynamic_tiles > 0:
+            self.gguf_writer.add_vision_preproc_max_tiles(self.max_dynamic_tiles)

    def tensor_force_quant(self, name, new_name, bid, n_dims):
        if ".position_embd." in new_name: