From 35506a1fe4072cc9f80b8dd237f0e952aecb644a Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:05:09 +0800 Subject: [PATCH 1/2] update mixtral's module_tree Signed-off-by: ZX-ModelCloud --- gptqmodel/models/definitions/mixtral.py | 12 +++++++----- tests/models/test_mixtral.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gptqmodel/models/definitions/mixtral.py b/gptqmodel/models/definitions/mixtral.py index de4498c37..38c64cd98 100644 --- a/gptqmodel/models/definitions/mixtral.py +++ b/gptqmodel/models/definitions/mixtral.py @@ -4,14 +4,16 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from ..base import BaseQModel -from ..moe_lifecycle import W1W3W2MoELifecycleHooks +from ..moe_lifecycle import GateUpDownMoELifecycleHooks class MixtralQModel(BaseQModel): pre_lm_head_norm_module = "model.norm" - # MoE lifecycle hooks for w1/w3/w2 pattern - moe_lifecycle_hooks = W1W3W2MoELifecycleHooks() + dynamic_expert_index = "num_local_experts" + + # MoE lifecycle hooks for gate_proj/up_proj/down_proj pattern + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() module_tree = [ "model", @@ -21,9 +23,9 @@ class MixtralQModel(BaseQModel): "input_layernorm": ("input_layernorm:!",), "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), "post_attention_layernorm": ("post_attention_layernorm:!",), - "block_sparse_moe:moe": { + "mlp:moe:?": { "experts": { - "#": ("w1:0", "w3:0", "w2:1"), + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), } } } diff --git a/tests/models/test_mixtral.py b/tests/models/test_mixtral.py index 178a0b080..cbf6f91ef 100644 --- a/tests/models/test_mixtral.py +++ b/tests/models/test_mixtral.py @@ -12,7 +12,6 @@ class TestMixtral(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Mixtral-8x7B-Instruct-v0.1" # "mistralai/Mixtral-8x7B-Instruct-v0.1" NATIVE_ARC_CHALLENGE_ACC = 0.5213 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5247 - TRUST_REMOTE_CODE = True EVAL_BATCH_SIZE = 6 EVAL_TASKS = { EVAL.LM_EVAL.ARC_CHALLENGE: { @@ -21,6 +20,7 @@ class TestMixtral(ModelTest): "acc_norm": {"value": NATIVE_ARC_CHALLENGE_ACC_NORM}, }, } + OFFLOAD_TO_DISK = False # FIXME Currently, after defuser converted the model, OFFLOAD_TO_DISK must be False for quantization. def test_mixtral(self): self.quant_lm_eval() From e32bbaa84ab6813b0ed9fb537491c2000f918b3a Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:06:07 +0800 Subject: [PATCH 2/2] update defuser version to 0.0.8 Signed-off-by: ZX-ModelCloud --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 825054515..f8262936d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,4 @@ pyarrow>=21.0 dill>=0.3.8 torchao>=0.14.1 kernels>=0.12.2 -defuser>=0.0.7 +defuser>=0.0.8