From 931c9ead28fab1c89158c3f3b58f2966d281af69 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 19:12:52 +0800 Subject: [PATCH 1/6] Adapt module_tree for compatibility with Transformers v5.3.0 modeling code Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/minimax_m2.py | 10 +++++----- gptqmodel/models/definitions/mixtral.py | 10 +++++----- gptqmodel/models/definitions/phi3.py | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gptqmodel/models/definitions/minimax_m2.py b/gptqmodel/models/definitions/minimax_m2.py index 2592cc51a..ba81f1161 100644 --- a/gptqmodel/models/definitions/minimax_m2.py +++ b/gptqmodel/models/definitions/minimax_m2.py @@ -4,7 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from ..base import BaseQModel -from ..moe_lifecycle import W1W3W2MoELifecycleHooks +from ..moe_lifecycle import GateUpDownMoELifecycleHooks class MiniMaxM2GPTQ(BaseQModel): @@ -21,8 +21,8 @@ class MiniMaxM2GPTQ(BaseQModel): dynamic_expert_index = "num_local_experts" - # MoE lifecycle hooks for w1/w3/w2 pattern - moe_lifecycle_hooks = W1W3W2MoELifecycleHooks() + # MoE lifecycle hooks for gate_proj/up_proj/down_proj pattern + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() module_tree = [ "model", @@ -39,11 +39,11 @@ class MiniMaxM2GPTQ(BaseQModel): "o_proj:1", ), "post_attention_layernorm": ("post_attention_layernorm:!",), - "block_sparse_moe:moe": { # MoE module + "mlp:moe": { # MoE module "gate": ("gate:!",), "e_score_correction_bias": ("e_score_correction_bias:!",), "experts": { - "#": ("w1:0", "w3:0", "w2:1"), + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), }, }, }, diff --git a/gptqmodel/models/definitions/mixtral.py b/gptqmodel/models/definitions/mixtral.py index de4498c37..f9e15df26 100644 --- a/gptqmodel/models/definitions/mixtral.py +++ b/gptqmodel/models/definitions/mixtral.py @@ -4,14 +4,14 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from ..base import BaseQModel -from ..moe_lifecycle import W1W3W2MoELifecycleHooks +from ..moe_lifecycle import GateUpDownMoELifecycleHooks class MixtralQModel(BaseQModel): pre_lm_head_norm_module = "model.norm" - # MoE lifecycle hooks for w1/w3/w2 pattern - moe_lifecycle_hooks = W1W3W2MoELifecycleHooks() + # MoE lifecycle hooks for gate_proj/up_proj/down_proj pattern + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() module_tree = [ "model", @@ -21,9 +21,9 @@ class MixtralQModel(BaseQModel): "input_layernorm": ("input_layernorm:!",), "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), "post_attention_layernorm": ("post_attention_layernorm:!",), - "block_sparse_moe:moe": { + "mlp:moe": { "experts": { - "#": ("w1:0", "w3:0", "w2:1"), + "#": ("gate_proj:0", "up_proj:0", "down_porj:1"), } } } diff --git a/gptqmodel/models/definitions/phi3.py b/gptqmodel/models/definitions/phi3.py index 84ab4683d..62047d7a1 100644 --- a/gptqmodel/models/definitions/phi3.py +++ b/gptqmodel/models/definitions/phi3.py @@ -29,9 +29,9 @@ class PhiMoEGPTQForCausalLM(BaseQModel): "input_layernorm": ("input_layernorm:!",), "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), "post_attention_layernorm": ("post_attention_layernorm:!",), - "block_sparse_moe:moe": { + "mlp:moe": { "experts": { - "#": ("w1:0", "w2:1"), + "#": ("gate_proj:0", "up_proj:0", "down_porj:1"), }, }, } From 2888ce4e936a78c7a88bc74f2bda0e87b74a069e Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 19:28:59 +0800 Subject: [PATCH 2/6] revert minimax_m2.py Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/minimax_m2.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gptqmodel/models/definitions/minimax_m2.py b/gptqmodel/models/definitions/minimax_m2.py index ba81f1161..2592cc51a 100644 --- a/gptqmodel/models/definitions/minimax_m2.py +++ b/gptqmodel/models/definitions/minimax_m2.py @@ -4,7 +4,7 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from ..base import BaseQModel -from ..moe_lifecycle import GateUpDownMoELifecycleHooks +from ..moe_lifecycle import W1W3W2MoELifecycleHooks class MiniMaxM2GPTQ(BaseQModel): @@ -21,8 +21,8 @@ class MiniMaxM2GPTQ(BaseQModel): dynamic_expert_index = "num_local_experts" - # MoE lifecycle hooks for gate_proj/up_proj/down_proj pattern - moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() + # MoE lifecycle hooks for w1/w3/w2 pattern + moe_lifecycle_hooks = W1W3W2MoELifecycleHooks() module_tree = [ "model", @@ -39,11 +39,11 @@ class MiniMaxM2GPTQ(BaseQModel): "o_proj:1", ), "post_attention_layernorm": ("post_attention_layernorm:!",), - "mlp:moe": { # MoE module + "block_sparse_moe:moe": { # MoE module "gate": ("gate:!",), "e_score_correction_bias": ("e_score_correction_bias:!",), "experts": { - "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), + "#": ("w1:0", "w3:0", "w2:1"), }, }, }, From dfa7ba204204b18db650249e9b505fc4ac5b23f3 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:00:15 +0800 Subject: [PATCH 3/6] update mixtral module_tree Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/mixtral.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gptqmodel/models/definitions/mixtral.py b/gptqmodel/models/definitions/mixtral.py index f9e15df26..18404c1e6 100644 --- a/gptqmodel/models/definitions/mixtral.py +++ b/gptqmodel/models/definitions/mixtral.py @@ -10,6 +10,8 @@ class MixtralQModel(BaseQModel): pre_lm_head_norm_module = "model.norm" + dynamic_expert_index = "num_local_experts" + # MoE lifecycle hooks for gate_proj/up_proj/down_proj pattern moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() @@ -23,7 +25,7 @@ class MixtralQModel(BaseQModel): "post_attention_layernorm": ("post_attention_layernorm:!",), "mlp:moe": { "experts": { - "#": ("gate_proj:0", "up_proj:0", "down_porj:1"), + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), } } } From 0acb22a24a1ac4c2ed920723536db33290d81fa6 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:01:09 +0800 Subject: [PATCH 4/6] fix test_mixtral.py Signed-off-by: ZX-ModelCloud --- tests/models/test_mixtral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_mixtral.py b/tests/models/test_mixtral.py index 178a0b080..e4912112e 100644 --- a/tests/models/test_mixtral.py +++ b/tests/models/test_mixtral.py @@ -12,7 +12,6 @@ class TestMixtral(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Mixtral-8x7B-Instruct-v0.1" # "mistralai/Mixtral-8x7B-Instruct-v0.1" NATIVE_ARC_CHALLENGE_ACC = 0.5213 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5247 - TRUST_REMOTE_CODE = True EVAL_BATCH_SIZE = 6 EVAL_TASKS = { EVAL.LM_EVAL.ARC_CHALLENGE: { @@ -21,6 +20,7 @@ class TestMixtral(ModelTest): "acc_norm": {"value": NATIVE_ARC_CHALLENGE_ACC_NORM}, }, } + OFFLOAD_TO_DISK = False # FIXME Currently, after defuser converted the model, OFFLOAD_TO_DISK must be False for quantization. def test_mixtral(self): self.quant_lm_eval() From d55ee50c656c9402dac86189d099026092195229 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:01:18 +0800 Subject: [PATCH 5/6] revert phi3 Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/phi3.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gptqmodel/models/definitions/phi3.py b/gptqmodel/models/definitions/phi3.py index 62047d7a1..7a3e218a1 100644 --- a/gptqmodel/models/definitions/phi3.py +++ b/gptqmodel/models/definitions/phi3.py @@ -19,7 +19,7 @@ class Phi3QModel(BaseQModel): ] class PhiMoEGPTQForCausalLM(BaseQModel): - require_pkgs = ["transformers<=4.44.2"] + dynamic_expert_index = "num_local_experts" module_tree = [ "model", @@ -29,9 +29,9 @@ class PhiMoEGPTQForCausalLM(BaseQModel): "input_layernorm": ("input_layernorm:!",), "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), "post_attention_layernorm": ("post_attention_layernorm:!",), - "mlp:moe": { + "block_sparse_moe:moe": { "experts": { - "#": ("gate_proj:0", "up_proj:0", "down_porj:1"), + "#": ("w1:0", "w2:1"), }, }, } From 27acaef40d6fdf66d3e2ed6cd6c8d692b6bdf6bc Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:01:49 +0800 Subject: [PATCH 6/6] revert phi3 Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/phi3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gptqmodel/models/definitions/phi3.py b/gptqmodel/models/definitions/phi3.py index 7a3e218a1..84ab4683d 100644 --- a/gptqmodel/models/definitions/phi3.py +++ b/gptqmodel/models/definitions/phi3.py @@ -19,7 +19,7 @@ class Phi3QModel(BaseQModel): ] class PhiMoEGPTQForCausalLM(BaseQModel): - dynamic_expert_index = "num_local_experts" + require_pkgs = ["transformers<=4.44.2"] module_tree = [ "model",