diff --git a/gptqmodel/models/definitions/mixtral.py b/gptqmodel/models/definitions/mixtral.py index de4498c37..18404c1e6 100644 --- a/gptqmodel/models/definitions/mixtral.py +++ b/gptqmodel/models/definitions/mixtral.py @@ -4,14 +4,16 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium from ..base import BaseQModel -from ..moe_lifecycle import W1W3W2MoELifecycleHooks +from ..moe_lifecycle import GateUpDownMoELifecycleHooks class MixtralQModel(BaseQModel): pre_lm_head_norm_module = "model.norm" - # MoE lifecycle hooks for w1/w3/w2 pattern - moe_lifecycle_hooks = W1W3W2MoELifecycleHooks() + dynamic_expert_index = "num_local_experts" + + # MoE lifecycle hooks for gate_proj/up_proj/down_proj pattern + moe_lifecycle_hooks = GateUpDownMoELifecycleHooks() module_tree = [ "model", @@ -21,9 +23,9 @@ class MixtralQModel(BaseQModel): "input_layernorm": ("input_layernorm:!",), "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), "post_attention_layernorm": ("post_attention_layernorm:!",), - "block_sparse_moe:moe": { + "mlp:moe": { "experts": { - "#": ("w1:0", "w3:0", "w2:1"), + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), } } } diff --git a/tests/models/test_mixtral.py b/tests/models/test_mixtral.py index 178a0b080..e4912112e 100644 --- a/tests/models/test_mixtral.py +++ b/tests/models/test_mixtral.py @@ -12,7 +12,6 @@ class TestMixtral(ModelTest): NATIVE_MODEL_ID = "/monster/data/model/Mixtral-8x7B-Instruct-v0.1" # "mistralai/Mixtral-8x7B-Instruct-v0.1" NATIVE_ARC_CHALLENGE_ACC = 0.5213 NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5247 - TRUST_REMOTE_CODE = True EVAL_BATCH_SIZE = 6 EVAL_TASKS = { EVAL.LM_EVAL.ARC_CHALLENGE: { @@ -21,6 +20,7 @@ class TestMixtral(ModelTest): "acc_norm": {"value": NATIVE_ARC_CHALLENGE_ACC_NORM}, }, } + OFFLOAD_TO_DISK = False # FIXME Currently, after defuser converted the model, OFFLOAD_TO_DISK must be False for quantization. def test_mixtral(self): self.quant_lm_eval()