From d9e91e770cb1fcd2a0ce1f134eda4f866181a722 Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Fri, 15 May 2026 09:01:49 +0800
Subject: [PATCH 1/3] support mimo_v2

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/models/auto.py                 |   2 +
 gptqmodel/models/definitions/__init__.py |   1 +
 gptqmodel/models/definitions/mimo_v2.py  |  86 +++++++++++++++++
 tests/models/test_mimo_v2.py             |  28 ++++++
 tests/test_mimo_v2_support.py            | 115 +++++++++++++++++++++++
 5 files changed, 232 insertions(+)
 create mode 100644 gptqmodel/models/definitions/mimo_v2.py
 create mode 100644 tests/models/test_mimo_v2.py
 create mode 100644 tests/test_mimo_v2_support.py

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
index f6c590378..748ad69fc 100644
--- a/gptqmodel/models/auto.py
+++ b/gptqmodel/models/auto.py
@@ -125,6 +125,7 @@
 from .definitions.llava_qwen2 import LlavaQwen2QModel  # noqa: E402
 from .definitions.longcat_flash import LongCatFlashQModel  # noqa: E402
 from .definitions.mimo import MimoQModel  # noqa: E402
+from .definitions.mimo_v2 import MimoV2QModel  # noqa: E402
 from .definitions.minicpm import MiniCPMGPTQ  # noqa: E402
 from .definitions.minicpm3 import MiniCpm3QModel  # noqa: E402
 from .definitions.minicpm_o import MiniCPMOQModel  # noqa: E402
@@ -280,6 +281,7 @@
     "telechat": TeleChat2QModel,
     "instella": InstellaQModel,
     "mimo": MimoQModel,
+    "mimo_v2": MimoV2QModel,
     "falcon_h1": FalconH1QModel,
     "zamba": ZambaQModel,
     "zamba2": Zamba2QModel,
diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
index ebb3896bd..d23a1e62e 100644
--- a/gptqmodel/models/definitions/__init__.py
+++ b/gptqmodel/models/definitions/__init__.py
@@ -51,6 +51,7 @@
 from .minicpmv import MiniCPMVQModel
 from .minicpmv_4_6 import MiniCPMV4_6QModel
 from .minimax_m2 import MiniMaxM2GPTQ
+from .mimo_v2 import MimoV2QModel
 from .mixtral import MixtralQModel
 from .mllama import MLlamaQModel
 from .mobilellm import MobileLLMQModel
diff --git a/gptqmodel/models/definitions/mimo_v2.py b/gptqmodel/models/definitions/mimo_v2.py
new file mode 100644
index 000000000..a59d231eb
--- /dev/null
+++ b/gptqmodel/models/definitions/mimo_v2.py
@@ -0,0 +1,86 @@
+# SPDX-FileCopyrightText: 2026 ModelCloud.ai
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+
+from safetensors import safe_open
+
+from gptqmodel.models.moe_lifecycle import GateUpDownMoELifecycleHooks
+
+from ..base import BaseQModel
+
+
+class MimoV2QModel(BaseQModel):
+    # MiMo V2 uses repository-defined configuration/modeling classes.
+    require_trust_remote_code = True
+
+    dynamic_expert_index = "n_routed_experts"
+
+    pre_lm_head_norm_module = "model.norm"
+    rotary_embedding = "model.rotary_emb"
+
+    awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"]
+
+    moe_lifecycle_hooks = GateUpDownMoELifecycleHooks()
+
+    # MiMo V2 supports both split q/k/v and fused qkv checkpoints, and individual
+    # layers can be dense MLP or routed MoE according to config.moe_layer_freq.
+    layer_modules_strict = False
+
+    module_tree = [
+        "model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("qkv_proj:0", "q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp:moe:?": {
+                "": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                "gate": ("gate:!",),
+                "experts": {
+                    "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                },
+            },
+        },
+    ]
+
+    @staticmethod
+    def _checkpoint_has_tensor(model_local_path: str, tensor_name: str) -> bool:
+        if not model_local_path:
+            return True
+
+        index_path = os.path.join(model_local_path, "model.safetensors.index.json")
+        if os.path.isfile(index_path):
+            with open(index_path, encoding="utf-8") as fp:
+                weight_map = json.load(fp).get("weight_map", {})
+            return tensor_name in weight_map
+
+        tensor_file = os.path.join(model_local_path, "model.safetensors")
+        if os.path.isfile(tensor_file):
+            with safe_open(tensor_file, framework="pt", device="cpu") as handler:
+                return tensor_name in handler.keys()
+
+        return True
+
+    @staticmethod
+    def _drop_visual_ln_q_bias_if_checkpoint_omits_it(model, model_local_path: str) -> None:
+        visual = getattr(model, "visual", None)
+        merger = getattr(visual, "merger", None)
+        ln_q = getattr(merger, "ln_q", None)
+        if ln_q is None or getattr(ln_q, "bias", None) is None:
+            return
+
+        bias_name = "visual.merger.ln_q.bias"
+        if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name):
+            return
+
+        # MiMo V2.5 Base checkpoints omit this default LayerNorm bias; keep
+        # the shell parameters aligned so offload-backed save does not chase it.
+        ln_q.register_parameter("bias", None)
+
+    def after_model_load(self, model, load_quantized_model=False):
+        model = super().after_model_load(model, load_quantized_model=load_quantized_model)
+        self._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, self.model_local_path)
+        return model
diff --git a/tests/models/test_mimo_v2.py b/tests/models/test_mimo_v2.py
new file mode 100644
index 000000000..a521a605f
--- /dev/null
+++ b/tests/models/test_mimo_v2.py
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+from gptqmodel.quantization.config import MoEConfig, ExpertsRoutingOverride
+from model_test import ModelTest
+
+
+class TestMimo(ModelTest):
+    # NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16"
+    NATIVE_MODEL_ID = "./temp/MiMo-V2.5-Base-BF16"
+    EVAL_TASKS_SLOW = {
+        "arc_challenge": {
+            "chat_template": True,
+            "acc": {"value": 0.2739, "floor_pct": 0.2},
+            "acc_norm": {"value": 0.3055, "floor_pct": 0.2},
+        },
+    }
+    EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW)
+    TRUST_REMOTE_CODE = True
+    USE_FLASH_ATTN = False
+    EVAL_BATCH_SIZE = 6
+    MOE_CONFIG = MoEConfig(routing=ExpertsRoutingOverride(num_experts_per_tok="all"))
+    MODEL_COMPAT_FAST_LAYER_POSITION = "first"
+    SAVE_PATH = "./temp/mimo_v2_gptq"
+
+    def test_mimo(self):
+        self.quantize_and_evaluate()
diff --git a/tests/test_mimo_v2_support.py b/tests/test_mimo_v2_support.py
new file mode 100644
index 000000000..9d7389924
--- /dev/null
+++ b/tests/test_mimo_v2_support.py
@@ -0,0 +1,115 @@
+import json
+from types import SimpleNamespace
+
+from torch import nn
+
+from gptqmodel.models import auto
+from gptqmodel.models.definitions.mimo_v2 import MimoV2QModel
+
+
+_LOCAL_MIMO_V2_5_BASE_MODELING_SIGNATURE = {
+    "architectures": ["MiMoV2ForCausalLM"],
+    "attention_projection_layout": "fused_qkv",
+    "hidden_size": 4096,
+    "intermediate_size": 16384,
+    "model_type": "mimo_v2",
+    "moe_intermediate_size": 2048,
+    "n_routed_experts": 256,
+    "num_attention_heads": 64,
+    "num_experts_per_tok": 8,
+    "num_hidden_layers": 48,
+    "num_key_value_heads": 4,
+}
+
+
+def test_mimo_v2_model_type_selects_definition(monkeypatch):
+    fake_config = SimpleNamespace(model_type="mimo_v2")
+
+    monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code)
+    monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config)
+
+    assert auto.check_and_get_model_definition("/monster/data/model/MiMo-V2.5-Base") is MimoV2QModel
+
+
+def test_mimo_v2_5_base_local_modeling_signature_snapshot():
+    assert _LOCAL_MIMO_V2_5_BASE_MODELING_SIGNATURE == {
+        "architectures": ["MiMoV2ForCausalLM"],
+        "attention_projection_layout": "fused_qkv",
+        "hidden_size": 4096,
+        "intermediate_size": 16384,
+        "model_type": "mimo_v2",
+        "moe_intermediate_size": 2048,
+        "n_routed_experts": 256,
+        "num_attention_heads": 64,
+        "num_experts_per_tok": 8,
+        "num_hidden_layers": 48,
+        "num_key_value_heads": 4,
+    }
+
+
+def test_mimo_v2_module_tree_expands_fused_attention_dense_mlp_and_moe_paths():
+    layer_modules = MimoV2QModel.simple_layer_modules(
+        model_config=SimpleNamespace(n_routed_experts=4),
+        quantize_config=SimpleNamespace(dynamic=None),
+    )
+    flat_modules = {name for block in layer_modules for name in block}
+
+    assert MimoV2QModel.require_trust_remote_code is True
+    assert MimoV2QModel.layer_modules_strict is False
+    assert MimoV2QModel.pre_lm_head_norm_module == "model.norm"
+    assert MimoV2QModel.rotary_embedding == "model.rotary_emb"
+    assert "self_attn.qkv_proj" in flat_modules
+    assert "self_attn.q_proj" in flat_modules
+    assert "self_attn.k_proj" in flat_modules
+    assert "self_attn.v_proj" in flat_modules
+    assert "self_attn.o_proj" in flat_modules
+    assert "mlp.gate_proj" in flat_modules
+    assert "mlp.up_proj" in flat_modules
+    assert "mlp.down_proj" in flat_modules
+    assert "mlp.experts.0.gate_proj" in flat_modules
+    assert "mlp.experts.0.up_proj" in flat_modules
+    assert "mlp.experts.0.down_proj" in flat_modules
+    assert "mlp.gate" not in flat_modules
+
+
+def test_mimo_v2_drops_visual_ln_q_bias_when_checkpoint_omits_it(tmp_path):
+    model = SimpleNamespace(
+        visual=SimpleNamespace(
+            merger=SimpleNamespace(
+                ln_q=nn.LayerNorm(8),
+            )
+        )
+    )
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "visual.merger.ln_q.weight": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path))
+
+    assert model.visual.merger.ln_q.bias is None
+
+
+def test_mimo_v2_keeps_visual_ln_q_bias_when_checkpoint_has_it(tmp_path):
+    model = SimpleNamespace(
+        visual=SimpleNamespace(
+            merger=SimpleNamespace(
+                ln_q=nn.LayerNorm(8),
+            )
+        )
+    )
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "visual.merger.ln_q.weight": "model.safetensors",
+            "visual.merger.ln_q.bias": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path))
+
+    assert model.visual.merger.ln_q.bias is not None

From e98b987b46748b6f2e8f7179244347be81d5d65b Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Fri, 15 May 2026 15:13:48 +0800
Subject: [PATCH 2/3] fix test_mimo_v2

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/models/definitions/mimo_v2.py | 68 ++++++++++++++++++++---
 tests/models/test_mimo_v2.py            |  4 +-
 tests/test_mimo_v2_support.py           | 74 +++++++++++++++++++++----
 3 files changed, 124 insertions(+), 22 deletions(-)

diff --git a/gptqmodel/models/definitions/mimo_v2.py b/gptqmodel/models/definitions/mimo_v2.py
index a59d231eb..3443dbc3f 100644
--- a/gptqmodel/models/definitions/mimo_v2.py
+++ b/gptqmodel/models/definitions/mimo_v2.py
@@ -5,10 +5,12 @@
 import os
 
 from safetensors import safe_open
+from torch import nn
 
 from gptqmodel.models.moe_lifecycle import GateUpDownMoELifecycleHooks
 
 from ..base import BaseQModel
+from ...utils.torch import CPU
 
 
 class MimoV2QModel(BaseQModel):
@@ -65,22 +67,70 @@ def _checkpoint_has_tensor(model_local_path: str, tensor_name: str) -> bool:
         return True
 
     @staticmethod
-    def _drop_visual_ln_q_bias_if_checkpoint_omits_it(model, model_local_path: str) -> None:
+    def _drop_visual_merger_biases_if_checkpoint_omits_them(model, model_local_path: str) -> None:
         visual = getattr(model, "visual", None)
         merger = getattr(visual, "merger", None)
-        ln_q = getattr(merger, "ln_q", None)
-        if ln_q is None or getattr(ln_q, "bias", None) is None:
+        if not isinstance(merger, nn.Module):
             return
 
-        bias_name = "visual.merger.ln_q.bias"
-        if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name):
+        for module_name, module in merger.named_modules():
+            if getattr(module, "bias", None) is None:
+                continue
+
+            prefix = "visual.merger"
+            if module_name:
+                prefix = f"{prefix}.{module_name}"
+            weight_name = f"{prefix}.weight"
+            bias_name = f"{prefix}.bias"
+            if MimoV2QModel._checkpoint_has_tensor(model_local_path, bias_name):
+                continue
+            if not MimoV2QModel._checkpoint_has_tensor(model_local_path, weight_name):
+                continue
+
+            # MiMo V2.5 Base visual merger checkpoints include weights but omit
+            # default biases; align the shell so offload-backed save skips them.
+            module.register_parameter("bias", None)
+
+    @staticmethod
+    def _drop_parameter_if_checkpoint_omits_it(model, model_local_path: str, tensor_name: str) -> None:
+        if MimoV2QModel._checkpoint_has_tensor(model_local_path, tensor_name):
+            return
+
+        module_path, _, leaf = tensor_name.rpartition(".")
+        module = model
+        for part in module_path.split("."):
+            module = getattr(module, part, None)
+            if module is None:
+                return
+
+        if not isinstance(module, nn.Module) or leaf not in module._parameters:
             return
 
-        # MiMo V2.5 Base checkpoints omit this default LayerNorm bias; keep
-        # the shell parameters aligned so offload-backed save does not chase it.
-        ln_q.register_parameter("bias", None)
+        module.register_parameter(leaf, None)
+
+    @staticmethod
+    def _drop_checkpoint_omitted_audio_tensors(model, model_local_path: str) -> None:
+        # Remote MiMo marks this input embedding as load-missing-ignored and
+        # feeds the local transformer via inputs_embeds, so no trained weight exists.
+        MimoV2QModel._drop_parameter_if_checkpoint_omits_it(
+            model,
+            model_local_path,
+            "audio_encoder.input_local_transformer.embed_tokens.weight",
+        )
 
     def after_model_load(self, model, load_quantized_model=False):
         model = super().after_model_load(model, load_quantized_model=load_quantized_model)
-        self._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, self.model_local_path)
+        self._drop_visual_merger_biases_if_checkpoint_omits_them(model, self.model_local_path)
+        self._drop_checkpoint_omitted_audio_tensors(model, self.model_local_path)
         return model
+
+    def pre_quantize_generate_hook_start(self):
+        model = self.model.model
+        rotary_emb_cls = type(model.rotary_emb)
+        assert "MiMoV2RotaryEmbedding" in rotary_emb_cls.__name__
+        config = model.rotary_emb.config
+        # MiMoV2RotaryEmbedding cannot be correctly reconstructed via `_build_nonpersistent_buffer_template()`.
+        # Since it takes three arguments, `_build_nonpersistent_buffer_template()` is unable to infer the `is_swa` parameter.
+        # Therefore, MiMoV2RotaryEmbedding is manually reconstructed here.
+        model.rotary_emb = rotary_emb_cls(config=config, is_swa=False, device=CPU)
+        model.swa_rotary_emb = rotary_emb_cls(config=config, is_swa=True, device=CPU)
diff --git a/tests/models/test_mimo_v2.py b/tests/models/test_mimo_v2.py
index a521a605f..0cf0f02e4 100644
--- a/tests/models/test_mimo_v2.py
+++ b/tests/models/test_mimo_v2.py
@@ -7,8 +7,7 @@
 
 
 class TestMimo(ModelTest):
-    # NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16"
-    NATIVE_MODEL_ID = "./temp/MiMo-V2.5-Base-BF16"
+    NATIVE_MODEL_ID = "/monster/data/model/MiMo-V2.5-Base-BF16"
     EVAL_TASKS_SLOW = {
         "arc_challenge": {
             "chat_template": True,
@@ -22,7 +21,6 @@ class TestMimo(ModelTest):
     EVAL_BATCH_SIZE = 6
     MOE_CONFIG = MoEConfig(routing=ExpertsRoutingOverride(num_experts_per_tok="all"))
     MODEL_COMPAT_FAST_LAYER_POSITION = "first"
-    SAVE_PATH = "./temp/mimo_v2_gptq"
 
     def test_mimo(self):
         self.quantize_and_evaluate()
diff --git a/tests/test_mimo_v2_support.py b/tests/test_mimo_v2_support.py
index 9d7389924..23886acfe 100644
--- a/tests/test_mimo_v2_support.py
+++ b/tests/test_mimo_v2_support.py
@@ -22,6 +22,24 @@
 }
 
 
+class _FakeVisualMerger(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.ln_q = nn.LayerNorm(8)
+        self.mlp = nn.Sequential(
+            nn.Linear(8, 8),
+            nn.GELU(),
+            nn.Linear(8, 4),
+        )
+
+
+class _FakeAudioEncoder(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.input_local_transformer = nn.Module()
+        self.input_local_transformer.embed_tokens = nn.Embedding(16, 8)
+
+
 def test_mimo_v2_model_type_selects_definition(monkeypatch):
     fake_config = SimpleNamespace(model_type="mimo_v2")
 
@@ -72,33 +90,33 @@ def test_mimo_v2_module_tree_expands_fused_attention_dense_mlp_and_moe_paths():
     assert "mlp.gate" not in flat_modules
 
 
-def test_mimo_v2_drops_visual_ln_q_bias_when_checkpoint_omits_it(tmp_path):
+def test_mimo_v2_drops_visual_merger_biases_when_checkpoint_omits_them(tmp_path):
     model = SimpleNamespace(
         visual=SimpleNamespace(
-            merger=SimpleNamespace(
-                ln_q=nn.LayerNorm(8),
-            )
+            merger=_FakeVisualMerger()
         )
     )
     index = {
         "metadata": {},
         "weight_map": {
             "visual.merger.ln_q.weight": "model.safetensors",
+            "visual.merger.mlp.0.weight": "model.safetensors",
+            "visual.merger.mlp.2.weight": "model.safetensors",
         },
     }
     (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
 
-    MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path))
+    MimoV2QModel._drop_visual_merger_biases_if_checkpoint_omits_them(model, str(tmp_path))
 
     assert model.visual.merger.ln_q.bias is None
+    assert model.visual.merger.mlp[0].bias is None
+    assert model.visual.merger.mlp[2].bias is None
 
 
-def test_mimo_v2_keeps_visual_ln_q_bias_when_checkpoint_has_it(tmp_path):
+def test_mimo_v2_keeps_visual_merger_biases_when_checkpoint_has_them(tmp_path):
     model = SimpleNamespace(
         visual=SimpleNamespace(
-            merger=SimpleNamespace(
-                ln_q=nn.LayerNorm(8),
-            )
+            merger=_FakeVisualMerger()
         )
     )
     index = {
@@ -106,10 +124,46 @@ def test_mimo_v2_keeps_visual_ln_q_bias_when_checkpoint_has_it(tmp_path):
         "weight_map": {
             "visual.merger.ln_q.weight": "model.safetensors",
             "visual.merger.ln_q.bias": "model.safetensors",
+            "visual.merger.mlp.0.weight": "model.safetensors",
+            "visual.merger.mlp.0.bias": "model.safetensors",
+            "visual.merger.mlp.2.weight": "model.safetensors",
+            "visual.merger.mlp.2.bias": "model.safetensors",
         },
     }
     (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
 
-    MimoV2QModel._drop_visual_ln_q_bias_if_checkpoint_omits_it(model, str(tmp_path))
+    MimoV2QModel._drop_visual_merger_biases_if_checkpoint_omits_them(model, str(tmp_path))
 
     assert model.visual.merger.ln_q.bias is not None
+    assert model.visual.merger.mlp[0].bias is not None
+    assert model.visual.merger.mlp[2].bias is not None
+
+
+def test_mimo_v2_drops_audio_input_embedding_when_checkpoint_omits_it(tmp_path):
+    model = SimpleNamespace(audio_encoder=_FakeAudioEncoder())
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "audio_encoder.input_local_transformer.layers.0.input_layernorm.weight": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_checkpoint_omitted_audio_tensors(model, str(tmp_path))
+
+    assert model.audio_encoder.input_local_transformer.embed_tokens.weight is None
+
+
+def test_mimo_v2_keeps_audio_input_embedding_when_checkpoint_has_it(tmp_path):
+    model = SimpleNamespace(audio_encoder=_FakeAudioEncoder())
+    index = {
+        "metadata": {},
+        "weight_map": {
+            "audio_encoder.input_local_transformer.embed_tokens.weight": "model.safetensors",
+        },
+    }
+    (tmp_path / "model.safetensors.index.json").write_text(json.dumps(index), encoding="utf-8")
+
+    MimoV2QModel._drop_checkpoint_omitted_audio_tensors(model, str(tmp_path))
+
+    assert model.audio_encoder.input_local_transformer.embed_tokens.weight is not None

From 351df1760d2aac5ff919c8e9e48e52349b1195df Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Fri, 15 May 2026 15:20:00 +0800
Subject: [PATCH 3/3] update README.md

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f3d40bb5b..fdc92c9aa 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@
 
 ## Latest News
 
+* 05/15/2026 7.1.0-dev `main`: ✨ Added `mimo_v2` model support
 * 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` model support
 * 05/07/2026 7.1.0-dev `main`: ✨ Added `GLM-4.5V`, `GLM-4.6V`, `Zamba` and `Zamba2` model support
 * 04/29/2026 7.1.0-dev `main`: ✨ Added PoolSideAI `Laguna` model support for fused Laguna MoE checkpoints. Added `ERNIE 4.5 VL MoE`, `Ling-2.6-flash` and NVIDIA `Nemotron 3 Nano Omni` model support.
@@ -260,7 +261,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
 | ERNIE 4.5 / MoE / VL MoE | ✅ | GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR | ✅ | GLM4 MoE / Lite / 4.5V MoE | ✅ | MiniCPM 3/O/V/V 4_6 | ✅ | PanGu-α                 | ✅ |
 | XVERSE                   | ✅ | Brumby                          | ✅ | Hymba            | ✅ | Mistral             | ✅ | Qwen 1/2/3/3.5          | ✅ |
 | MiniMax M2               | ✅ | AfMoE                           | ✅ | Bailing-MoE      | ✅ | LFM2-MoE            | ✅ | Marin                   | ✅ |
-| InternVL Chat            | ✅ | Laguna                          | ✅ | Zamba / Zamba2   | ✅ |                     |   |                         |   |
+| InternVL Chat            | ✅ | Laguna                          | ✅ | Mimo / Mimo V2   | ✅ | Zamba / Zamba2      | ✅ |                         |   |
 
 Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included.