Skip to content

Commit c6cd44a

Browse files
[MODEL] support ovis2 6 moe (#2899)
* support ovis2_6_moe Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * BaseQModel add field: defuser_module_paths Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> --------- Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
1 parent f05aa5a commit c6cd44a

15 files changed

Lines changed: 256 additions & 27 deletions

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
## Latest News
2323

24+
* 05/19/2026 7.1.0-dev `main`: ✨ Added `ovis2_6_moe` model support
2425
* 05/18/2026 7.1.0-dev `main`: ✨ Added `ovis2_5` model support
2526
* 05/15/2026 7.1.0-dev `main`: ✨ Added `mimo_v2` model support
2627
* 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` and `DeepSeek V4` model support
@@ -257,7 +258,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
257258
| DBRX Converted || GPT-2 || Llama 3.2 VL || Nemotron Ultra || TeleChat2 ||
258259
| Deci || GPT-J || Llama 4 || OPT || Trinity ||
259260
| DeepSeek-V2/V3/V4/R1 || GPT-OSS || LongCat Flash || OLMo2 / LLaDA2 || Yi ||
260-
| DeepSeek-V2-Lite || Granite / Granite MoE || LongLLaMA || Ovis 1.6/2/2.5 || Seed-OSS ||
261+
| DeepSeek-V2-Lite || Granite / Granite MoE || LongLLaMA || Ovis 1.6/2/2.5/2.6 MoE || Seed-OSS ||
261262
| Dream || GRIN-MoE || Instella || Phi 1-4 || Voxtral ||
262263
| ERNIE 4.5 / MoE / VL MoE || GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR || GLM4 MoE / Lite / 4.5V MoE || MiniCPM 3/O/V/V 4_6 || PanGu-α ||
263264
| XVERSE || Brumby || Hymba || Mistral || Qwen 1/2/3/3.5 ||

gptqmodel/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,12 @@ def _build_device_thread_pool():
175175
"cpu": WarmupTask(run_torch_linalg_warmup, scope=WarmUpCtx.THREAD_AND_DEVICE),
176176
},
177177
workers={
178-
"cuda:per": 4,
178+
"cuda:per": 1,
179179
"xpu:per": 1,
180180
"npu:per": 1,
181-
"mps": 8,
182-
"cpu": min(12, max(1, (os.cpu_count() or 1) + 1 // 2)), # count + 1, fixed pool size > 1 check when count=3
183-
"model_loader:cpu": 2,
181+
"mps": 1,
182+
"cpu": 1, # count + 1, fixed pool size > 1 check when count=3
183+
"model_loader:cpu": 1,
184184
},
185185
empty_cache_every_n=512,
186186
)

gptqmodel/models/auto.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@
147147
from .definitions.ovis import OvisQModel # noqa: E402
148148
from .definitions.ovis2 import Ovis2QModel # noqa: E402
149149
from .definitions.ovis2_5 import Ovis2_5QModel # noqa: E402
150+
from .definitions.ovis2_6_moe import Ovis2_6_MoeQModel # noqa: E402
150151
from .definitions.pangu_alpha import PanguAlphaQModel # noqa: E402
151152
from .definitions.phi import PhiQModel # noqa: E402
152153
from .definitions.phi3 import Phi3QModel, PhiMoEGPTQForCausalLM # noqa: E402
@@ -285,6 +286,7 @@
285286
"ovis": OvisQModel,
286287
"ovis2": Ovis2QModel,
287288
"ovis2_5": Ovis2_5QModel,
289+
"ovis2_6_moe": Ovis2_6_MoeQModel,
288290
"telechat": TeleChat2QModel,
289291
"instella": InstellaQModel,
290292
"mimo": MimoQModel,

gptqmodel/models/base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ class BaseQModel(nn.Module):
266266

267267
INPUT_EMBEDDING_EXTRA_ARGS = None
268268

269+
# Some models (e.g. ovis2_6_moe) do not contain MoE layers directly.
270+
# The actual experts live inside submodules (e.g. Qwen3MoeModel.mlp.experts),
271+
# so `defuser_module_paths` is used to explicitly locate and defuse them.
272+
defuser_module_paths = None
273+
269274
def __init__(
270275
self,
271276
model: PreTrainedModel,

gptqmodel/models/definitions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from .opt import OptQModel
6262
from .ovis import OvisQModel
6363
from .ovis2_5 import Ovis2_5QModel
64+
from .ovis2_6_moe import Ovis2_6_MoeQModel
6465
from .phi import PhiQModel
6566
from .phi3 import Phi3QModel
6667
from .qwen import QwenQModel
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
2+
# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
3+
# SPDX-License-Identifier: Apache-2.0
4+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
5+
6+
import torch
7+
from torch import nn
8+
9+
from ..moe_lifecycle import GateUpDownMoELifecycleHooks
10+
from .ovis2_5 import Ovis2_5QModel
11+
12+
13+
class Ovis2_6_MoeQModel(Ovis2_5QModel):
14+
dynamic_expert_index = "num_experts"
15+
16+
pre_lm_head_norm_module = "llm.model.norm"
17+
rotary_embedding = "llm.model.rotary_emb"
18+
19+
awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"]
20+
21+
defuser_module_paths = ("llm",)
22+
23+
moe_lifecycle_hooks = GateUpDownMoELifecycleHooks()
24+
25+
module_tree = [
26+
"llm",
27+
"model",
28+
"layers",
29+
"#",
30+
{
31+
"input_layernorm": ("input_layernorm:!",),
32+
"self_attn": ("q_norm:!", "k_norm:!", "q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
33+
"post_attention_layernorm": ("post_attention_layernorm:!",),
34+
"mlp:moe:?": {
35+
"gate": ("gate:!",),
36+
"experts": {
37+
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
38+
},
39+
},
40+
}
41+
]
42+
43+
@staticmethod
44+
def _materialize_layernorm_defaults(layernorm: nn.LayerNorm, device: torch.device) -> None:
45+
if layernorm.weight is not None and (
46+
getattr(layernorm.weight, "is_meta", False) or layernorm.weight.device.type == "meta"
47+
):
48+
layernorm.weight = nn.Parameter(
49+
torch.ones(layernorm.normalized_shape, device=device, dtype=layernorm.weight.dtype),
50+
requires_grad=layernorm.weight.requires_grad,
51+
)
52+
53+
if layernorm.bias is not None and (
54+
getattr(layernorm.bias, "is_meta", False) or layernorm.bias.device.type == "meta"
55+
):
56+
layernorm.bias = nn.Parameter(
57+
torch.zeros(layernorm.normalized_shape, device=device, dtype=layernorm.bias.dtype),
58+
requires_grad=layernorm.bias.requires_grad,
59+
)
60+
61+
def _materialize_missing_vision_post_layernorm(self, device: torch.device) -> None:
62+
post_layernorm = getattr(
63+
getattr(getattr(self.model.visual_tokenizer, "vit", None), "vision_model", None),
64+
"post_layernorm",
65+
None,
66+
)
67+
if isinstance(post_layernorm, nn.LayerNorm):
68+
self._materialize_layernorm_defaults(post_layernorm, device)
69+
70+
def pre_quantize_generate_hook_start(self):
71+
# Ovis 2.6 checkpoints omit SigLIP2 post_layernorm tensors even though
72+
# the remote code constructs the LayerNorm. Keep its default init instead of
73+
# resolving nonexistent checkpoint keys.
74+
self._materialize_missing_vision_post_layernorm(torch.device(self.quantize_config.device))
75+
super().pre_quantize_generate_hook_start()

gptqmodel/models/loader.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,21 @@ def _maybe_print_module_tree(model) -> None:
9999
print_module_tree(model=model)
100100

101101

102+
def _convert_model_with_defuser(cls, model, cleanup_original: bool) -> bool:
103+
converted = defuser.convert_model(model, cleanup_original=cleanup_original)
104+
105+
defuser_module_paths = getattr(cls, "defuser_module_paths", ())
106+
if defuser_module_paths:
107+
for module_path in defuser_module_paths:
108+
module, _ = get_module_by_name_prefix(model, module_path)
109+
if module is None:
110+
log.warn("Loader: defuser module path `%s` was not found.", module_path)
111+
continue
112+
converted = defuser.convert_model(module, cleanup_original=cleanup_original) or converted
113+
114+
return converted
115+
116+
102117
def _supports_flash_attn_2(config: PretrainedConfig) -> bool:
103118
"""Detect whether the resolved HF architecture exposes FA2 kernels."""
104119

@@ -727,12 +742,12 @@ def skip(*args, **kwargs):
727742
)
728743
if getattr(model, "config", None) is config:
729744
model.config = copy.deepcopy(config)
730-
defuser.convert_model(model, cleanup_original=False)
745+
_convert_model_with_defuser(cls, model, cleanup_original=False)
731746
model._model_init_kwargs = fallback_init_kwargs
732747
_maybe_print_module_tree(model=model)
733748
turtle_model = None
734749
else:
735-
defuser.convert_model(model, cleanup_original=False)
750+
_convert_model_with_defuser(cls, model, cleanup_original=False)
736751
shell_model_init_kwargs = dict(model_init_kwargs_without_internal)
737752
shell_model_init_kwargs.update(hf_gguf_load_kwargs)
738753
model._model_init_kwargs = shell_model_init_kwargs
@@ -768,7 +783,7 @@ def skip(*args, **kwargs):
768783
)
769784
if getattr(model, "config", None) is config:
770785
model.config = copy.deepcopy(config)
771-
defuser.convert_model(model, cleanup_original=False)
786+
_convert_model_with_defuser(cls, model, cleanup_original=False)
772787
direct_model_init_kwargs = dict(model_init_kwargs_without_internal)
773788
direct_model_init_kwargs.update(hf_gguf_load_kwargs)
774789
model._model_init_kwargs = direct_model_init_kwargs
@@ -1188,7 +1203,7 @@ def from_quantized(
11881203
)
11891204
else:
11901205
raise
1191-
defuser.convert_model(model, cleanup_original=True)
1206+
_convert_model_with_defuser(cls, model, cleanup_original=True)
11921207
model.checkpoint_file_name = model_save_name
11931208
if native_gguf_qspec is not None:
11941209
gguf_tensor_key_mapping = _build_gguf_tensor_key_mapping(model, config)

gptqmodel/utils/hf.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1317,6 +1317,15 @@ def encoder_init_compat(self, encoder_config):
13171317
if vision_model_cls:
13181318
try_patch_legacy_flash_attn_flag(vision_model_cls)
13191319

1320+
if config.model_type == "ovis2_6_moe":
1321+
vision_model_cls = getattr(
1322+
remote_module,
1323+
"Siglip2NavitModel",
1324+
None,
1325+
)
1326+
if vision_model_cls:
1327+
try_patch_legacy_flash_attn_flag(vision_model_cls)
1328+
13201329
if (
13211330
outer_model_cls is not None
13221331
and hasattr(outer_model_cls, "tie_weights")
@@ -1359,7 +1368,7 @@ def tie_weights_compat(self, *args, **kwargs):
13591368
formatter_cls.support_tokenizer_types = support_tokenizer_types
13601369
formatter_cls._gptqmodel_tokenizer_backend_patch = True
13611370

1362-
if getattr(config, "model_type", None) == "ovis2_5":
1371+
if getattr(config, "model_type", None) in {"ovis2_5", "ovis2_6", "ovis2_6_moe"}:
13631372
register_runtime_automodel_config(config, remote_module, "vit_config", "Siglip2NavitModel")
13641373

13651374
if getattr(config, "model_type", None) == "hymba" and remote_module is not None:
@@ -1520,9 +1529,8 @@ def try_patch_legacy_flash_attn_flag(model_cls):
15201529
return
15211530

15221531
# The remote modeling code for some models(For example, ovis.) still relies on `_supports_flash_attn_2`
1523-
if hasattr(model_cls, "_supports_flash_attn"):
1524-
if not hasattr(model_cls, "_supports_flash_attn_2"):
1525-
setattr(model_cls, "_supports_flash_attn_2", bool(model_cls._supports_flash_attn))
1532+
if hasattr(model_cls, "_supports_flash_attn") and not hasattr(model_cls, "_supports_flash_attn_2"):
1533+
setattr(model_cls, "_supports_flash_attn_2", bool(model_cls._supports_flash_attn))
15261534
return
15271535

15281536
# Find the most specific class that explicitly declares the newer

gptqmodel/utils/structure.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2072,7 +2072,6 @@ def _copy_checkpoint_tensors_into_submodule(
20722072
grouped_names: Dict[str, list[tuple[str, str, str, Optional[int], Optional[int], Optional[int]]]] = {}
20732073
for rel_name in t_params:
20742074
full_name, expert_index, split_index, split_dim = self._resolve_checkpoint_tensor_source(module_path, rel_name)
2075-
print("full_name", full_name, rel_name)
20762075
if full_name is None:
20772076
continue
20782077
shard = self._weight_map.get(full_name)

tests/models/ovis/image_to_test_dataset.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from gptqmodel.models.definitions.ovis import OvisQModel
1313
from gptqmodel.models.definitions.ovis2 import Ovis2QModel
1414
from gptqmodel.models.definitions.ovis2_5 import Ovis2_5QModel
15+
from gptqmodel.models.definitions.ovis2_6_moe import Ovis2_6_MoeQModel
1516
from gptqmodel.models.definitions.qwen3_vl import Qwen3_VLQModel
1617

1718

@@ -98,6 +99,9 @@ def get_calib_dataset(model):
9899
if isinstance(model, Ovis2_5QModel):
99100
return prepare_dataset(format_ovis2_dataset, n_sample=20)
100101

102+
if isinstance(model, Ovis2_6_MoeQModel):
103+
return prepare_dataset(format_ovis2_dataset, n_sample=20)
104+
101105
if (
102106
isinstance(model, BaseQwen2VLGPTQ)
103107
or isinstance(model, Qwen3_VLQModel)

0 commit comments

Comments
 (0)