Skip to content

Commit fa932a1

Browse files
committed
support hy_v3 and ministral3
1 parent 28b3870 commit fa932a1

7 files changed

Lines changed: 152 additions & 2 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
251251

252252
| Model | | | | | | | | | |
253253
|--------------------------|---|---------------------------------|--|------------------|--|---------------------------------|--|------------------------|---|
254-
| Apertus || EXAONE 3/4 || Dots1 || Mistral3 || Qwen 2/3/3.5 (Next/MoE) ||
254+
| Apertus || EXAONE 3/4 || Dots1 || Mistral3 / Ministral3 || Qwen 2/3/3.5 (Next/MoE) ||
255255
| Baichuan || Falcon (H1 / Mamba) || InternLM 1/2/2.5 || Mixtral || Qwen 2/2.5/3 VL ||
256256
| Bloom || FastVLM || Kimi K2 || MobileLLM || Qwen 2.5/3 Omni ||
257257
| ChatGLM || Gemma 1-4 / 3n || Klear || MOSS || RefinedWeb ||
@@ -266,7 +266,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
266266
| XVERSE || Brumby || Hymba || Mistral || Qwen 1/2/3/3.5 ||
267267
| MiniMax M2 || AfMoE || Bailing-MoE || LFM2-MoE || Marin ||
268268
| InternVL Chat || Laguna || Mimo / Mimo V2 || Zamba / Zamba2 || Intern S1 ||
269-
| HunYuan V1 Dense / MoE || | | | | | | | |
269+
| HunYuan V1 Dense / MoE || HY-V3 | | | | | | | |
270270

271271
Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included.
272272

gptqmodel/models/auto.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@
116116
from .definitions.hrm_text import HrmTextQModel # noqa: E402
117117
from .definitions.hunyuan_v1_dense import HunYuanDenseV1QModel # noqa: E402
118118
from .definitions.hunyuan_v1_moe import HunYuanMoEV1QModel # noqa: E402
119+
from .definitions.hy_v3 import HYV3QModel # noqa: E402
119120
from .definitions.hymba import HymbaQModel # noqa: E402
120121
from .definitions.instella import InstellaQModel # noqa: E402
121122
from .definitions.internlm import InternLMQModel # noqa: E402
@@ -139,6 +140,7 @@
139140
from .definitions.minicpmv import MiniCPMVQModel # noqa: E402
140141
from .definitions.minicpmv_4_6 import MiniCPMV4_6QModel # noqa: E402
141142
from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402
143+
from .definitions.ministral3 import Ministral3GPTQ # noqa: E402
142144
from .definitions.mistral3 import Mistral3GPTQ
143145
from .definitions.mixtral import MixtralQModel # noqa: E402
144146
from .definitions.mllama import MLlamaQModel # noqa: E402
@@ -234,6 +236,7 @@
234236
"hrm_text": HrmTextQModel,
235237
"hunyuan_v1_dense": HunYuanDenseV1QModel,
236238
"hunyuan_v1_moe": HunYuanMoEV1QModel,
239+
"hy_v3": HYV3QModel,
237240
"qwen": QwenQModel,
238241
"mistral": LlamaQModel, # 100% llama clone
239242
"yi": LlamaQModel, # 100% llama clone
@@ -267,6 +270,7 @@
267270
"minicpmv4_6": MiniCPMV4_6QModel,
268271
"minimax": MiniMaxM2GPTQ,
269272
"minimax_m2": MiniMaxM2GPTQ,
273+
"ministral3": Ministral3GPTQ,
270274
"qwen2_moe": Qwen2MoeQModel,
271275
"qwen3_moe": Qwen3MoeQModel,
272276
"qwen3_next": Qwen3NextGPTQ,

gptqmodel/models/definitions/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from .hrm_text import HrmTextQModel
4444
from .hunyuan_v1_dense import HunYuanDenseV1QModel
4545
from .hunyuan_v1_moe import HunYuanMoEV1QModel
46+
from .hy_v3 import HYV3QModel
4647
from .hymba import HymbaQModel
4748
from .instella import InstellaQModel
4849
from .internlm import InternLMQModel
@@ -56,6 +57,7 @@
5657
from .minicpmv import MiniCPMVQModel
5758
from .minicpmv_4_6 import MiniCPMV4_6QModel
5859
from .minimax_m2 import MiniMaxM2GPTQ
60+
from .ministral3 import Ministral3GPTQ
5961
from .mimo_v2 import MimoV2QModel
6062
from .mixtral import MixtralQModel
6163
from .mllama import MLlamaQModel
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from ..base import BaseQModel
5+
from ..moe_lifecycle import GateUpDownMoELifecycleHooks
6+
7+
8+
class HYV3QModel(BaseQModel):
9+
# HYV3 uses a dense first MLP layer and sparse MoE layers after it.
10+
layer_modules_strict = False
11+
dynamic_expert_index = "num_experts"
12+
13+
pre_lm_head_norm_module = "model.norm"
14+
15+
awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"]
16+
17+
moe_lifecycle_hooks = GateUpDownMoELifecycleHooks()
18+
19+
module_tree = [
20+
"model",
21+
"layers",
22+
"#",
23+
{
24+
"input_layernorm": ("input_layernorm:!",),
25+
"self_attn": (
26+
"q_norm:!",
27+
"k_norm:!",
28+
"q_proj:0",
29+
"k_proj:0",
30+
"v_proj:0",
31+
"o_proj:1",
32+
),
33+
"post_attention_layernorm": ("post_attention_layernorm:!",),
34+
"mlp:moe": {
35+
"gate": ("gate:!",),
36+
"e_score_correction_bias": ("e_score_correction_bias:!",),
37+
"experts": {
38+
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
39+
},
40+
"shared_experts": ("gate_proj:0", "up_proj:0", "down_proj:1"),
41+
"": ("gate_proj:0", "up_proj:0", "down_proj:1"),
42+
},
43+
},
44+
]
45+
46+
47+
__all__ = ["HYV3QModel"]
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from ..base import BaseQModel
5+
6+
7+
class Ministral3GPTQ(BaseQModel):
8+
pre_lm_head_norm_module = "model.norm"
9+
10+
module_tree = [
11+
"model",
12+
"layers",
13+
"#",
14+
{
15+
"input_layernorm": ("input_layernorm:!",),
16+
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
17+
"post_attention_layernorm": ("post_attention_layernorm:!",),
18+
"mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
19+
},
20+
]
21+
22+
23+
__all__ = ["Ministral3GPTQ"]

tests/test_hy_v3_support.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from types import SimpleNamespace
2+
3+
from gptqmodel.models import auto
4+
from gptqmodel.models.definitions.hy_v3 import HYV3QModel
5+
6+
def test_hy_v3_model_type_selects_definition(monkeypatch):
7+
fake_config = SimpleNamespace(model_type="hy_v3")
8+
9+
monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code)
10+
monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config)
11+
12+
assert auto.check_and_get_model_definition("/tmp/hy_v3") is HYV3QModel
13+
14+
15+
def test_hy_v3_module_tree_expands_dense_and_sparse_moe_paths():
16+
layer_modules = HYV3QModel.simple_layer_modules(
17+
model_config=SimpleNamespace(num_experts=3),
18+
quantize_config=SimpleNamespace(dynamic=None),
19+
)
20+
flat_modules = {name for block in layer_modules for name in block}
21+
first_expert_block = next(i for i, block in enumerate(layer_modules) if "mlp.experts.0.gate_proj" in block)
22+
shared_block = next(i for i, block in enumerate(layer_modules) if "mlp.shared_experts.gate_proj" in block)
23+
24+
assert HYV3QModel.layer_modules_strict is False
25+
assert HYV3QModel.dynamic_expert_index == "num_experts"
26+
assert "self_attn.q_proj" in flat_modules
27+
assert "self_attn.k_proj" in flat_modules
28+
assert "self_attn.v_proj" in flat_modules
29+
assert "self_attn.o_proj" in flat_modules
30+
assert "self_attn.q_norm" not in flat_modules
31+
assert "self_attn.k_norm" not in flat_modules
32+
assert "mlp.gate_proj" in flat_modules
33+
assert "mlp.up_proj" in flat_modules
34+
assert "mlp.down_proj" in flat_modules
35+
assert "mlp.shared_experts.gate_proj" in flat_modules
36+
assert "mlp.shared_experts.up_proj" in flat_modules
37+
assert "mlp.shared_experts.down_proj" in flat_modules
38+
assert "mlp.experts.0.gate_proj" in flat_modules
39+
assert "mlp.experts.1.up_proj" in flat_modules
40+
assert "mlp.experts.2.down_proj" in flat_modules
41+
assert "mlp.gate" not in flat_modules
42+
assert "mlp.e_score_correction_bias" not in flat_modules
43+
assert first_expert_block < shared_block

tests/test_ministral3_support.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from types import SimpleNamespace
2+
3+
from gptqmodel.models import auto
4+
from gptqmodel.models.definitions.ministral3 import Ministral3GPTQ
5+
6+
7+
def test_ministral3_model_type_selects_definition(monkeypatch):
8+
fake_config = SimpleNamespace(model_type="ministral3")
9+
10+
monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code)
11+
monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config)
12+
13+
assert auto.check_and_get_model_definition("/tmp/ministral3") is Ministral3GPTQ
14+
15+
16+
def test_ministral3_module_tree_matches_text_only_layout():
17+
layer_modules = Ministral3GPTQ.simple_layer_modules(
18+
model_config=SimpleNamespace(),
19+
quantize_config=SimpleNamespace(dynamic=None),
20+
)
21+
flat_modules = {name for block in layer_modules for name in block}
22+
23+
assert Ministral3GPTQ.module_tree[:3] == ["model", "layers", "#"]
24+
assert "self_attn.q_proj" in flat_modules
25+
assert "self_attn.k_proj" in flat_modules
26+
assert "self_attn.v_proj" in flat_modules
27+
assert "self_attn.o_proj" in flat_modules
28+
assert "mlp.gate_proj" in flat_modules
29+
assert "mlp.up_proj" in flat_modules
30+
assert "mlp.down_proj" in flat_modules
31+

0 commit comments

Comments
 (0)