ModelCloud · Qubitium · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@
 
 ## Latest News
 
+* 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` model support
 * 05/07/2026 7.1.0-dev `main`: ✨ Added `GLM-4.5V`, `GLM-4.6V`, `Zamba` and `Zamba2` model support
 * 04/29/2026 7.1.0-dev `main`: ✨ Added PoolSideAI `Laguna` model support for fused Laguna MoE checkpoints. Added `ERNIE 4.5 VL MoE`, `Ling-2.6-flash` and NVIDIA `Nemotron 3 Nano Omni` model support.
 * 04/28/2026 [7.0.0](https://github.com/ModelCloud/GPTQModel/releases/tag/v7.0.0): 🚀 Added Huawei Ascend NPU support through native torch kernels for GPTQ, AWQ, ParoQuant, GGUF, QQQ, and EXL3. Added `internvl_chat`, `gemma3n`, `GLM-OCR`, `GLM-ASR`, and `falcon_mamba` model support.
@@ -243,23 +244,23 @@ Selected public references where teams or companies explicitly mention GPT-QMode
 <img src=https://github.com/user-attachments/assets/c1b89394-f8f6-44e5-9949-bef15a124723 width="51%"> <img src=https://github.com/user-attachments/assets/23901236-10c5-4435-ac2f-06cf2e097f1e width="47%">
 
 ## Model Support  
-| Model                    |   |                                 |   |                  |   |                   |   |                         |   |
-|--------------------------|---|---------------------------------|---|------------------|---|-------------------|---|-------------------------|---|
-| Apertus                  | ✅ | EXAONE 3/4                      | ✅ | Dots1            | ✅ | Mistral3          | ✅ | Qwen 2/3/3.5 (Next/MoE) | ✅ |
-| Baichuan                 | ✅ | Falcon (H1 / Mamba)             | ✅ | InternLM 1/2/2.5 | ✅ | Mixtral           | ✅ | Qwen 2/2.5/3 VL         | ✅ |
-| Bloom                    | ✅ | FastVLM                         | ✅ | Kimi K2          | ✅ | MobileLLM         | ✅ | Qwen 2.5/3 Omni         | ✅ |
-| ChatGLM                  | ✅ | Gemma 1-4 / 3n                  | ✅ | Klear            | ✅ | MOSS              | ✅ | RefinedWeb              | ✅ |
-| CodeGen                  | ✅ | GPTBigCode                      | ✅ | LING/RING        | ✅ | MPT               | ✅ | StableLM                | ✅ |
-| Cohere 1-2               | ✅ | GPT-Neo / NeoX                  | ✅ | Llama 1-3.3      | ✅ | Nemotron H / Omni | ✅ | StarCoder2              | ✅ |
-| DBRX Converted           | ✅ | GPT-2                           | ✅ | Llama 3.2 VL     | ✅ | Nemotron Ultra    | ✅ | TeleChat2               | ✅ |
-| Deci                     | ✅ | GPT-J                           | ✅ | Llama 4          | ✅ | OPT               | ✅ | Trinity                 | ✅ |
-| DeepSeek-V2/V3/R1        | ✅ | GPT-OSS                         | ✅ | LongCat Flash    | ✅ | OLMo2 / LLaDA2    | ✅ | Yi                      | ✅ |
-| DeepSeek-V2-Lite         | ✅ | Granite / Granite MoE           | ✅ | LongLLaMA        | ✅ | Ovis 1.6/2        | ✅ | Seed-OSS                | ✅ |
-| Dream                    | ✅ | GRIN-MoE                        | ✅ | Instella         | ✅ | Phi 1-4           | ✅ | Voxtral                 | ✅ |
-| ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V     | ✅ | PanGu-α                 | ✅ |
-| XVERSE                   | ✅ | Brumby                          | ✅ | Hymba            | ✅ | Mistral           | ✅ | Qwen 1/2/3/3.5          | ✅ |
-| MiniMax M2               | ✅ | AfMoE                           | ✅ | Bailing-MoE      | ✅ | LFM2-MoE          | ✅ | Marin                   | ✅ |
-| InternVL Chat            | ✅ | Laguna                          | ✅ | Zamba / Zamba2   | ✅ |                   |   |                         |   |
+| Model                    |   |                                 |   |                  |   |                     |   |                         |   |
+|--------------------------|---|---------------------------------|---|------------------|---|---------------------|---|-------------------------|---|
+| Apertus                  | ✅ | EXAONE 3/4                      | ✅ | Dots1            | ✅ | Mistral3            | ✅ | Qwen 2/3/3.5 (Next/MoE) | ✅ |
+| Baichuan                 | ✅ | Falcon (H1 / Mamba)             | ✅ | InternLM 1/2/2.5 | ✅ | Mixtral             | ✅ | Qwen 2/2.5/3 VL         | ✅ |
+| Bloom                    | ✅ | FastVLM                         | ✅ | Kimi K2          | ✅ | MobileLLM           | ✅ | Qwen 2.5/3 Omni         | ✅ |
+| ChatGLM                  | ✅ | Gemma 1-4 / 3n                  | ✅ | Klear            | ✅ | MOSS                | ✅ | RefinedWeb              | ✅ |
+| CodeGen                  | ✅ | GPTBigCode                      | ✅ | LING/RING        | ✅ | MPT                 | ✅ | StableLM                | ✅ |
+| Cohere 1-2               | ✅ | GPT-Neo / NeoX                  | ✅ | Llama 1-3.3      | ✅ | Nemotron H / Omni   | ✅ | StarCoder2              | ✅ |
+| DBRX Converted           | ✅ | GPT-2                           | ✅ | Llama 3.2 VL     | ✅ | Nemotron Ultra      | ✅ | TeleChat2               | ✅ |
+| Deci                     | ✅ | GPT-J                           | ✅ | Llama 4          | ✅ | OPT                 | ✅ | Trinity                 | ✅ |
+| DeepSeek-V2/V3/R1        | ✅ | GPT-OSS                         | ✅ | LongCat Flash    | ✅ | OLMo2 / LLaDA2      | ✅ | Yi                      | ✅ |
+| DeepSeek-V2-Lite         | ✅ | Granite / Granite MoE           | ✅ | LongLLaMA        | ✅ | Ovis 1.6/2          | ✅ | Seed-OSS                | ✅ |
+| Dream                    | ✅ | GRIN-MoE                        | ✅ | Instella         | ✅ | Phi 1-4             | ✅ | Voxtral                 | ✅ |
+| ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α                 | ✅ |
+| XVERSE                   | ✅ | Brumby                          | ✅ | Hymba            | ✅ | Mistral             | ✅ | Qwen 1/2/3/3.5          | ✅ |
+| MiniMax M2               | ✅ | AfMoE                           | ✅ | Bailing-MoE      | ✅ | LFM2-MoE            | ✅ | Marin                   | ✅ |
+| InternVL Chat            | ✅ | Laguna                          | ✅ | Zamba / Zamba2   | ✅ |                     |   |                         |   |
 
 Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included.
 

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -128,7 +128,8 @@
 from .definitions.minicpm import MiniCPMGPTQ  # noqa: E402
 from .definitions.minicpm3 import MiniCpm3QModel  # noqa: E402
 from .definitions.minicpm_o import MiniCPMOQModel  # noqa: E402
-from .definitions.minicpm_v import MiniCPMVQModel  # noqa: E402
+from .definitions.minicpmv import MiniCPMVQModel  # noqa: E402
+from .definitions.minicpmv_4_6 import MiniCPMV4_6QModel  # noqa: E402
 from .definitions.minimax_m2 import MiniMaxM2GPTQ  # noqa: E402
 from .definitions.mistral3 import Mistral3GPTQ
 from .definitions.mixtral import MixtralQModel  # noqa: E402
@@ -246,6 +247,7 @@
     "minicpm3": MiniCpm3QModel,
     "minicpmo": MiniCPMOQModel,
     "minicpmv": MiniCPMVQModel,
+    "minicpmv4_6": MiniCPMV4_6QModel,
     "minimax": MiniMaxM2GPTQ,
     "minimax_m2": MiniMaxM2GPTQ,
     "qwen2_moe": Qwen2MoeQModel,

diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
@@ -48,7 +48,8 @@
 from .mimo import MimoQModel
 from .minicpm3 import MiniCpm3QModel
 from .minicpm_o import MiniCPMOQModel
-from .minicpm_v import MiniCPMVQModel
+from .minicpmv import MiniCPMVQModel
+from .minicpmv_4_6 import MiniCPMV4_6QModel
 from .minimax_m2 import MiniMaxM2GPTQ
 from .mixtral import MixtralQModel
 from .mllama import MLlamaQModel

diff --git a/gptqmodel/models/definitions/minicpm_v.py → gptqmodel/models/definitions/minicpmv.py b/gptqmodel/models/definitions/minicpm_v.py → gptqmodel/models/definitions/minicpmv.py
diff --git a/gptqmodel/models/definitions/minicpmv_4_6.py b/gptqmodel/models/definitions/minicpmv_4_6.py
@@ -0,0 +1,134 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+from typing import Dict
+
+from transformers import AutoModelForImageTextToText, AutoProcessor, ProcessorMixin
+
+from ...utils.calibration import batched
+from ...utils.model import MODALITY, move_to, nested_move_to
+from ...utils.offload import offload_to_disk
+from .._const import CPU
+from ..base import BaseQModel
+
+
+class MiniCPMV4_6QModel(BaseQModel):
+    loader = AutoModelForImageTextToText
+
+    pre_lm_head_norm_module = "model.language_model.norm"
+    rotary_embedding = "model.language_model.rotary_emb"
+
+    module_tree = [
+        "model",
+        "language_model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("q_norm:!", "q_proj:0", "k_norm:!", "k_proj:0", "v_proj:0", "o_proj:1"),
+            "linear_attn": (
+                "norm:!",
+                "conv1d:!",
+                "in_proj_qkv:0",
+                "in_proj_z:1",
+                "in_proj_b:!:1",
+                "in_proj_a:!:1",
+                "out_proj:2",
+            ),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+        }
+    ]
+
+    modality = [MODALITY.TEXT, MODALITY.IMAGE_TO_TEXT]
+    require_load_processor = True
+    require_trust_remote_code = False
+    layer_modules_strict = False
+
+    def pre_quantize_generate_hook_start(self):
+        language_model = self.model.model.language_model
+        self.shell_module_materialize(language_model.embed_tokens, self.quantize_config.device)
+        self.shell_module_materialize(language_model.rotary_emb, self.quantize_config.device)
+        self.shell_module_materialize(self.model.model.vision_tower, self.quantize_config.device)
+        self.shell_module_materialize(self.model.model.merger, self.quantize_config.device)
+
+    def pre_quantize_generate_hook_end(self):
+        language_model = self.model.model.language_model
+        if self.quantize_config.offload_to_disk:
+            offload_to_disk(
+                model=language_model,
+                module=language_model.embed_tokens,
+                disk_path=self.quantize_config.offload_to_disk_path,
+            )
+            offload_to_disk(
+                model=language_model,
+                module=language_model.rotary_emb,
+                disk_path=self.quantize_config.offload_to_disk_path,
+            )
+            offload_to_disk(
+                model=self.model,
+                module=self.model.model.vision_tower,
+                disk_path=self.quantize_config.offload_to_disk_path,
+            )
+            offload_to_disk(
+                model=self.model,
+                module=self.model.model.merger,
+                disk_path=self.quantize_config.offload_to_disk_path,
+            )
+            return
+
+        language_model.embed_tokens = move_to(language_model.embed_tokens, device=CPU)
+        language_model.rotary_emb = move_to(language_model.rotary_emb, device=CPU)
+        self.model.model.vision_tower = move_to(self.model.model.vision_tower, device=CPU)
+        self.model.model.merger = move_to(self.model.model.merger, device=CPU)
+
+    def preprocess_dataset(self, sample: Dict) -> Dict:
+        return sample
+
+    def load_processor(self) -> ProcessorMixin:
+        return AutoProcessor.from_pretrained(self.model_local_path, trust_remote_code=False)
+
+    @classmethod
+    def prepare_inputs_for_conversations(
+        cls,
+        processor: ProcessorMixin,
+        conversations: list[dict] | list[list[dict]],
+    ):
+        if conversations and isinstance(conversations[0], dict):
+            conversations = [conversations]
+
+        downsample_mode = "16x"  # Using `downsample_mode="4x"` for Finer Detail
+
+        inputs = processor.apply_chat_template(
+            conversations, tokenize=True, add_generation_prompt=True,
+            return_dict=True, return_tensors="pt",
+            downsample_mode=downsample_mode,
+            max_slice_nums=36,
+        )
+        return inputs
+
+    def prepare_dataset(self, calibration_dataset, batch_size: int = 1, **kwargs):
+        processor = self.load_processor()
+        calib_data = []
+        for batch in batched(calibration_dataset, batch_size, process_func=self.preprocess_dataset):
+            calib_data.append(
+                self.prepare_inputs_for_conversations(
+                    processor,
+                    batch,
+                )
+            )
+        del processor
+        return calib_data
+
+    def move_input_capture_example(self, example, data_device):
+        for key, value in example.items():
+            example[key] = nested_move_to(value, device=data_device)
+
+        return self.finalize_input_capture_example(example)
+
+    def run_input_capture(self, example, use_cache: bool, data_device):
+        return self.model.generate(
+            **example,
+        )
diff --git a/gptqmodel/utils/hf.py b/gptqmodel/utils/hf.py
@@ -1213,7 +1213,7 @@ def encoder_init_compat(self, encoder_config):
                 encoder_cls.__init__ = encoder_init_compat
                 encoder_cls._gptqmodel_meta_dpr_patch = True
 
-        if config.model_type == "minicpmv" or config.model_type == "minicpmo":
+        if config.model_type in {"minicpmv", "minicpmv4_6", "minicpmo"}:
             vision_model_cls = getattr(
                 remote_module,
                 "SiglipVisionTransformer",

diff --git a/tests/models/ovis/image_to_test_dataset.py b/tests/models/ovis/image_to_test_dataset.py
@@ -7,7 +7,8 @@
 from gptqmodel.models.definitions.ernie4_5_vl_moe import Ernie4_5_VLMoeQModel
 from gptqmodel.models.definitions.internvl_chat import InternVLChatQModel
 from gptqmodel.models.definitions.minicpm_o import MiniCPMOQModel
-from gptqmodel.models.definitions.minicpm_v import MiniCPMVQModel
+from gptqmodel.models.definitions.minicpmv import MiniCPMVQModel
+from gptqmodel.models.definitions.minicpmv_4_6 import MiniCPMV4_6QModel
 from gptqmodel.models.definitions.ovis import OvisQModel
 from gptqmodel.models.definitions.ovis2 import Ovis2QModel
 from gptqmodel.models.definitions.qwen3_vl import Qwen3_VLQModel
@@ -98,6 +99,7 @@ def get_calib_dataset(model):
         or isinstance(model, Qwen3_VLQModel)
         or isinstance(model, MiniCPMOQModel)
         or isinstance(model, MiniCPMVQModel)
+        or isinstance(model, MiniCPMV4_6QModel)
         or isinstance(model, InternVLChatQModel)
         or isinstance(model, Ernie4_5_VLMoeQModel)
     ):

diff --git a/tests/models/test_minicpm_v_4_5.py → tests/models/test_minicpmv_4_5.py b/tests/models/test_minicpm_v_4_5.py → tests/models/test_minicpmv_4_5.py
@@ -14,7 +14,7 @@ class TestMiniCPMV4_5(ModelTest):
     TRUST_REMOTE_CODE = True
     EVAL_BATCH_SIZE = 1
 
-    def test_minicpm_v_4_5(self):
+    def test_minicpmv_4_5(self):
         # Evalution does not support minicpmv, and will throw an error during execution:
         # E TypeError: MiniCPMV.forward() missing 1 required positional argument: 'data
         with self.model_compat_test_context():

diff --git a/tests/models/test_minicpmv_4_6.py b/tests/models/test_minicpmv_4_6.py
@@ -0,0 +1,60 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+import os.path
+
+from model_test import ModelTest
+
+
+class TestMiniCPMV4_6(ModelTest):
+    NATIVE_MODEL_ID = "openbmb/MiniCPM-V-4.6" # openbmb/MiniCPM-V-4.6"
+    TRUST_REMOTE_CODE = True
+    EVAL_BATCH_SIZE = 1
+
+    def test_minicpmv_4_6(self):
+        # Evalution does not support minicpmv, and will throw an error during execution:
+        # E TypeError: MiniCPMV.forward() missing 1 required positional argument: 'data
+        with self.model_compat_test_context():
+            model, tokenizer, processor = self.quantModel(
+                self.NATIVE_MODEL_ID,
+                trust_remote_code=self.TRUST_REMOTE_CODE,
+                dtype=self.TORCH_DTYPE,
+                batch_size=1,
+                call_perform_post_quant_validation=False,
+            )
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "image": os.path.join(os.path.dirname(os.path.abspath(__file__)), "ovis/10016.jpg"),
+                    },
+                    {"type": "text", "text": "Describe this image."},
+                ],
+            }
+        ]
+
+        downsample_mode = "16x"  # Using `downsample_mode="4x"` for Finer Detail
+
+        inputs = processor.apply_chat_template(
+            messages, tokenize=True, add_generation_prompt=True,
+            return_dict=True, return_tensors="pt",
+            downsample_mode=downsample_mode,
+            max_slice_nums=36,
+        ).to(model.device)
+
+        generated_ids = model.generate(**inputs, downsample_mode=downsample_mode, max_new_tokens=512)
+
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f'Output:\n{output_text}')
+
+        self.assertIn("snow", output_text.lower())