ModelCloud · Qubitium · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@
 
 ## Latest News
 
+* 05/15/2026 7.1.0-dev `main`: ✨ Added `mimo_v2` model support
 * 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` and `DeepSeek V4` model support
 * 05/07/2026 7.1.0-dev `main`: ✨ Added `GLM-4.5V`, `GLM-4.6V`, `Zamba` and `Zamba2` model support
 * 04/29/2026 7.1.0-dev `main`: ✨ Added PoolSideAI `Laguna` model support for fused Laguna MoE checkpoints. Added `ERNIE 4.5 VL MoE`, `Ling-2.6-flash` and NVIDIA `Nemotron 3 Nano Omni` model support.
@@ -260,7 +261,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
 | ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α                 | ✅ |
 | XVERSE                   | ✅ | Brumby                          | ✅ | Hymba            | ✅ | Mistral             | ✅ | Qwen 1/2/3/3.5          | ✅ |
 | MiniMax M2               | ✅ | AfMoE                           | ✅ | Bailing-MoE      | ✅ | LFM2-MoE            | ✅ | Marin                   | ✅ |
-| InternVL Chat            | ✅ | Laguna                          | ✅ | Zamba / Zamba2   | ✅ |                     |   |                         |   |
+| InternVL Chat            | ✅ | Laguna                          | ✅ | Mimo / Mimo V2   | ✅ | Zamba / Zamba2      | ✅ |                         |   |
 
 Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included.
 

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -128,6 +128,7 @@
 from .definitions.llava_qwen2 import LlavaQwen2QModel  # noqa: E402
 from .definitions.longcat_flash import LongCatFlashQModel  # noqa: E402
 from .definitions.mimo import MimoQModel  # noqa: E402
+from .definitions.mimo_v2 import MimoV2QModel  # noqa: E402
 from .definitions.minicpm import MiniCPMGPTQ  # noqa: E402
 from .definitions.minicpm3 import MiniCpm3QModel  # noqa: E402
 from .definitions.minicpm_o import MiniCPMOQModel  # noqa: E402
@@ -285,6 +286,7 @@
     "telechat": TeleChat2QModel,
     "instella": InstellaQModel,
     "mimo": MimoQModel,
+    "mimo_v2": MimoV2QModel,
     "falcon_h1": FalconH1QModel,
     "zamba": ZambaQModel,
     "zamba2": Zamba2QModel,

diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
@@ -52,6 +52,7 @@
 from .minicpmv import MiniCPMVQModel
 from .minicpmv_4_6 import MiniCPMV4_6QModel
 from .minimax_m2 import MiniMaxM2GPTQ
+from .mimo_v2 import MimoV2QModel
 from .mixtral import MixtralQModel
 from .mllama import MLlamaQModel
 from .mobilellm import MobileLLMQModel

diff --git a/gptqmodel/models/definitions/mimo_v2.py b/gptqmodel/models/definitions/mimo_v2.py
@@ -0,0 +1,136 @@
+# SPDX-FileCopyrightText: 2026 ModelCloud.ai
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+
+from safetensors import safe_open
+from torch import nn
+
+from gptqmodel.models.moe_lifecycle import GateUpDownMoELifecycleHooks
+
+from ..base import BaseQModel
+from ...utils.torch import CPU
+
+
+class MimoV2QModel(BaseQModel):
+    # MiMo V2 uses repository-defined configuration/modeling classes.
+    require_trust_remote_code = True
+
+    dynamic_expert_index = "n_routed_experts"
+
+    pre_lm_head_norm_module = "model.norm"
+    rotary_embedding = "model.rotary_emb"
+
+    awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"]
+
+    moe_lifecycle_hooks = GateUpDownMoELifecycleHooks()
+
+    # MiMo V2 supports both split q/k/v and fused qkv checkpoints, and individual
+    # layers can be dense MLP or routed MoE according to config.moe_layer_freq.
+    layer_modules_strict = False
+
+    module_tree = [
+        "model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("qkv_proj:0", "q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp:moe:?": {
+                "": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                "gate": ("gate:!",),
+                "experts": {
+                    "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                },
+            },
+        },
+    ]
+
+    @staticmethod
+    def _checkpoint_has_tensor(model_local_path: str, tensor_name: str) -> bool:
+        if not model_local_path:
+            return True
+
+        index_path = os.path.join(model_local_path, "model.safetensors.index.json")
+        if os.path.isfile(index_path):
+            with open(index_path, encoding="utf-8") as fp:
+                weight_map = json.load(fp).get("weight_map", {})
+            return tensor_name in weight_map
+
+        tensor_file = os.path.join(model_local_path, "model.safetensors")
+        if os.path.isfile(tensor_file):
+            with safe_open(tensor_file, framework="pt", device="cpu") as handler:
+                return tensor_name in handler.keys()
+
+        return True
+
+    @staticmethod
+    def _drop_visual_merger_biases_if_checkpoint_omits_them(model, model_local_path: str) -> None:
+        visual = getattr(model, "visual", None)
+        merger = getattr(visual, "merger", None)
+        if not isinstance(merger, nn.Module):
+            return
+
+        for module_name, module in merger.named_modules():
+            if getattr(module, "bias", None) is None:
+                continue
+
+            prefix = "visual.merger"
+            if module_name:
+                prefix = f"{prefix}.{module_name}"
+            weight_name = f"{prefix}.weight"
+            bias_name = f"{prefix}.bias"
+            if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name):
+                continue
+            if not MimoV2QModel._checkpoint_has_tensor(model_local_path, weight_name):
+                continue
+
+            # MiMo V2.5 Base visual merger checkpoints include weights but omit
+            # default biases; align the shell so offload-backed save skips them.
+            module.register_parameter("bias", None)
+
+    @staticmethod
+    def _drop_parameter_if_checkpoint_omits_it(model, model_local_path: str, tensor_name: str) -> None:
+        if MimoV2QModel._checkpoint_has_tensor(model_local_path, tensor_name):
+            return
+
+        module_path, _, leaf = tensor_name.rpartition(".")
+        module = model
+        for part in module_path.split("."):
+            module = getattr(module, part, None)
+            if module is None:
+                return
+
+        if not isinstance(module, nn.Module) or leaf not in module._parameters:
+            return
+
+        module.register_parameter(leaf, None)
+
+    @staticmethod
+    def _drop_checkpoint_omitted_audio_tensors(model, model_local_path: str) -> None:
+        # Remote MiMo marks this input embedding as load-missing-ignored and
+        # feeds the local transformer via inputs_embeds, so no trained weight exists.
+        MimoV2QModel._drop_parameter_if_checkpoint_omits_it(
+            model,
+            model_local_path,
+            "audio_encoder.input_local_transformer.embed_tokens.weight",
+        )
+
+    def after_model_load(self, model, load_quantized_model=False):
+        model = super().after_model_load(model, load_quantized_model=load_quantized_model)
+        self._drop_visual_merger_biases_if_checkpoint_omits_them(model, self.model_local_path)
+        self._drop_checkpoint_omitted_audio_tensors(model, self.model_local_path)
+        return model
+
+    def pre_quantize_generate_hook_start(self):
+        model = self.model.model
+        rotary_emb_cls = type(model.rotary_emb)
+        assert "MiMoV2RotaryEmbedding" in rotary_emb_cls.__name__
+        config = model.rotary_emb.config
+        # MiMoV2RotaryEmbedding cannot be correctly reconstructed via `_build_nonpersistent_buffer_template()`.
+        # Since it takes three arguments, `_build_nonpersistent_buffer_template()` is unable to infer the `is_swa` parameter.
+        # Therefore, MiMoV2RotaryEmbedding is manually reconstructed here.
+        model.rotary_emb = rotary_emb_cls(config=config, is_swa=False, device=CPU)
+        model.swa_rotary_emb = rotary_emb_cls(config=config, is_swa=True, device=CPU)
diff --git a/tests/models/test_mimo_v2.py b/tests/models/test_mimo_v2.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+from gptqmodel.quantization.config import MoEConfig, ExpertsRoutingOverride
+from model_test import ModelTest
+
+
+class TestMimo(ModelTest):
+    NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16"
+    EVAL_TASKS_SLOW = {
+        "arc_challenge": {
+            "chat_template": True,
+            "acc": {"value": 0.2739, "floor_pct": 0.2},
+            "acc_norm": {"value": 0.3055, "floor_pct": 0.2},
+        },
+    }
+    EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW)
+    TRUST_REMOTE_CODE = True
+    USE_FLASH_ATTN = False
+    EVAL_BATCH_SIZE = 6
+    MOE_CONFIG = MoEConfig(routing=ExpertsRoutingOverride(num_experts_per_tok="all"))
+    MODEL_COMPAT_FAST_LAYER_POSITION = "first"
+
+    def test_mimo(self):
+        self.quantize_and_evaluate()
diff --git a/tests/test_mimo_v2_support.py b/tests/test_mimo_v2_support.py
@@ -0,0 +1,169 @@
+import json
+from types import SimpleNamespace
+
+from torch import nn
+
+from gptqmodel.models import auto
+from gptqmodel.models.definitions.mimo_v2 import MimoV2QModel
+
+
+_LOCAL_MIMO_V2_5_BASE_MODELING_SIGNATURE = {
+    "architectures": ["MiMoV2ForCausalLM"],
+    "attention_projection_layout": "fused_qkv",
+    "hidden_size": 4096,
+    "intermediate_size": 16384,
+    "model_type": "mimo_v2",
+    "moe_intermediate_size": 2048,
+    "n_routed_experts": 256,
+    "num_attention_heads": 64,
+    "num_experts_per_tok": 8,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 4,
+}
+
+
+class _FakeVisualMerger(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ln_q = nn.LayerNorm(8)
+        self.mlp = nn.Sequential(
+            nn.Linear(8, 8),
+            nn.GELU(),
+            nn.Linear(8, 4),
+        )
+
+
+class _FakeAudioEncoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.input_local_transformer = nn.Module()
+        self.input_local_transformer.embed_tokens = nn.Embedding(16, 8)
+
+
+def test_mimo_v2_model_type_selects_definition(monkeypatch):
+    fake_config = SimpleNamespace(model_type="mimo_v2")
+
+    monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code)
+    monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config)
+
+    assert auto.check_and_get_model_definition("/monster/data/model/MiMo-V2.5-Base") is MimoV2QModel
+
+
+def test_mimo_v2_5_base_local_modeling_signature_snapshot():
+    assert _LOCAL_MIMO_V2_5_BASE_MODELING_SIGNATURE == {
+        "architectures": ["MiMoV2ForCausalLM"],
+        "attention_projection_layout": "fused_qkv",
+        "hidden_size": 4096,
+        "intermediate_size": 16384,
+        "model_type": "mimo_v2",
+        "moe_intermediate_size": 2048,
+        "n_routed_experts": 256,
+        "num_attention_heads": 64,
+        "num_experts_per_tok": 8,
+        "num_hidden_layers": 48,
+        "num_key_value_heads": 4,
+    }
+
+
+def test_mimo_v2_module_tree_expands_fused_attention_dense_mlp_and_moe_paths():
+    layer_modules = MimoV2QModel.simple_layer_modules(
+        model_config=SimpleNamespace(n_routed_experts=4),
+        quantize_config=SimpleNamespace(dynamic=None),
+    )
+    flat_modules = {name for block in layer_modules for name in block}
+
+    assert MimoV2QModel.require_trust_remote_code is True
+    assert MimoV2QModel.layer_modules_strict is False
+    assert MimoV2QModel.pre_lm_head_norm_module == "model.norm"
+    assert MimoV2QModel.rotary_embedding == "model.rotary_emb"
+    assert "self_attn.qkv_proj" in flat_modules
+    assert "self_attn.q_proj" in flat_modules
+    assert "self_attn.k_proj" in flat_modules
+    assert "self_attn.v_proj" in flat_modules
+    assert "self_attn.o_proj" in flat_modules
+    assert "mlp.gate_proj" in flat_modules
+    assert "mlp.up_proj" in flat_modules
+    assert "mlp.down_proj" in flat_modules
+    assert "mlp.experts.0.gate_proj" in flat_modules
+    assert "mlp.experts.0.up_proj" in flat_modules
+    assert "mlp.experts.0.down_proj" in flat_modules
+    assert "mlp.gate" not in flat_modules
+
+
+def test_mimo_v2_drops_visual_merger_biases_when_checkpoint_omits_them(tmp_path):
+    model = SimpleNamespace(
+        visual=SimpleNamespace(
+            merger=_FakeVisualMerger()
+        )
+    )
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "visual.merger.ln_q.weight": "model.safetensors",
+            "visual.merger.mlp.0.weight": "model.safetensors",
+            "visual.merger.mlp.2.weight": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_visual_merger_biases_if_checkpoint_omits_them(model, str(tmp_path))
+
+    assert model.visual.merger.ln_q.bias is None
+    assert model.visual.merger.mlp[0].bias is None
+    assert model.visual.merger.mlp[2].bias is None
+
+
+def test_mimo_v2_keeps_visual_merger_biases_when_checkpoint_has_them(tmp_path):
+    model = SimpleNamespace(
+        visual=SimpleNamespace(
+            merger=_FakeVisualMerger()
+        )
+    )
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "visual.merger.ln_q.weight": "model.safetensors",
+            "visual.merger.ln_q.bias": "model.safetensors",
+            "visual.merger.mlp.0.weight": "model.safetensors",
+            "visual.merger.mlp.0.bias": "model.safetensors",
+            "visual.merger.mlp.2.weight": "model.safetensors",
+            "visual.merger.mlp.2.bias": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_visual_merger_biases_if_checkpoint_omits_them(model, str(tmp_path))
+
+    assert model.visual.merger.ln_q.bias is not None
+    assert model.visual.merger.mlp[0].bias is not None
+    assert model.visual.merger.mlp[2].bias is not None
+
+
+def test_mimo_v2_drops_audio_input_embedding_when_checkpoint_omits_it(tmp_path):
+    model = SimpleNamespace(audio_encoder=_FakeAudioEncoder())
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "audio_encoder.input_local_transformer.layers.0.input_layernorm.weight": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_checkpoint_omitted_audio_tensors(model, str(tmp_path))
+
+    assert model.audio_encoder.input_local_transformer.embed_tokens.weight is None
+
+
+def test_mimo_v2_keeps_audio_input_embedding_when_checkpoint_has_it(tmp_path):
+    model = SimpleNamespace(audio_encoder=_FakeAudioEncoder())
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "audio_encoder.input_local_transformer.embed_tokens.weight": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_checkpoint_omitted_audio_tensors(model, str(tmp_path))
+
+    assert model.audio_encoder.input_local_transformer.embed_tokens.weight is not None