support glm4v_moe_text, llama4_text and mllama_text_model

ZX-ModelCloud · ZX-ModelCloud · commit 89f59e4bf30b · 2026-05-28T07:05:09.000+08:00
diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -101,7 +101,7 @@
 from .definitions.glm4_moe import GLM4MoEGPTQ  # noqa: E402
 from .definitions.glm4_moe_lite import Glm4MoeLiteQModel  # noqa: E402
 from .definitions.glm4v import Glm4vGPTQ  # noqa: E402
-from .definitions.glm4v_moe import Glm4vMoeQModel  # noqa: E402
+from .definitions.glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel  # noqa: E402
 from .definitions.glm_moe_dsa import GlmMoeDsaQModel  # noqa: E402
 from .definitions.glm_ocr import GlmOCRGPTQ  # noqa: E402
 from .definitions.glmasr import GlmASRGPTQ  # noqa: E402
@@ -128,7 +128,7 @@
 from .definitions.lfm2_moe import LFM2MoeQModel  # noqa: E402
 from .definitions.llada2 import LLaDA2MoeQModel
 from .definitions.llama import LlamaQModel  # noqa: E402
-from .definitions.llama4 import Llama4QModel  # noqa: E402
+from .definitions.llama4 import Llama4QModel, Llama4TextQModel  # noqa: E402
 from .definitions.llava_qwen2 import LlavaQwen2QModel  # noqa: E402
 from .definitions.longcat_flash import LongCatFlashQModel  # noqa: E402
 from .definitions.mimo import MimoQModel  # noqa: E402
@@ -141,7 +141,7 @@
 from .definitions.minimax_m2 import MiniMaxM2GPTQ  # noqa: E402
 from .definitions.mistral3 import Mistral3GPTQ
 from .definitions.mixtral import MixtralQModel  # noqa: E402
-from .definitions.mllama import MLlamaQModel  # noqa: E402
+from .definitions.mllama import MLlamaQModel, MLlamaTextQModel  # noqa: E402
 from .definitions.mobilellm import MobileLLMQModel  # noqa: E402
 from .definitions.moss import MossQModel  # noqa: E402
 from .definitions.mpt import MptQModel  # noqa: E402
@@ -206,13 +206,15 @@
     "gpt2": GPT2QModel,
     "llama": LlamaQModel,
     "llama4": Llama4QModel,
+    "llama4_text": Llama4TextQModel,
     "opt": OptQModel,
     "moss": MossQModel,
     "chatglm": ChatGLMQModel,
     "glm": GlmQModel,
     "glm4": GlmQModel,
     "glm4v": Glm4vGPTQ,
     "glm4v_moe": Glm4vMoeQModel,
+    "glm4v_moe_text": Glm4vMoeTextQModel,
     "glmasr": GlmASRGPTQ,
     "glm_ocr": GlmOCRGPTQ,
     "glm4_moe": GLM4MoEGPTQ,
@@ -287,6 +289,7 @@
     "exaone4": Exaone4QModel,
     "grinmoe": GrinMoeQModel,
     "mllama": MLlamaQModel,
+    "mllama_text_model": MLlamaTextQModel,
     "marin": Qwen3QModel,
     "granite": LlamaQModel, # 100% llama clone
     "granitemoehybrid": GraniteMoeHybridQModel,
@@ -449,25 +452,6 @@ def _get_config_load_kwargs(kwargs: dict) -> dict:
     return get_hf_gguf_load_kwargs(kwargs)
 
 
-def _normalize_supported_model_type(config) -> str:
-    model_type = config.model_type.lower()
-    config_class_name = type(config).__name__
-
-    if model_type == "qwen3_5":
-        if config_class_name == "Qwen3_5TextConfig":
-            return "qwen3_5_text"
-        if not hasattr(config, "text_config") and not hasattr(config, "vision_config"):
-            return "qwen3_5_text"
-
-    if model_type == "qwen3_5_moe":
-        if config_class_name == "Qwen3_5MoeTextConfig":
-            return "qwen3_5_moe_text"
-        if not hasattr(config, "text_config") and not hasattr(config, "vision_config"):
-            return "qwen3_5_moe_text"
-
-    return model_type
-
-
 def check_and_get_model_definition(model_dir, trust_remote_code=False, **config_load_kwargs):
     if "gguf_file" not in config_load_kwargs:
         model_dir = normalize_model_id_or_path_for_hf_gguf(
diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
@@ -49,7 +49,7 @@
 from .internlm2 import InternLM2QModel
 from .interns1 import InternS1QModel
 from .internvl_chat import InternVLChatQModel
-from .llama4 import Llama4QModel
+from .llama4 import Llama4QModel, Llama4TextQModel
 from .mimo import MimoQModel
 from .minicpm3 import MiniCpm3QModel
 from .minicpm_o import MiniCPMOQModel
@@ -58,7 +58,7 @@
 from .minimax_m2 import MiniMaxM2GPTQ
 from .mimo_v2 import MimoV2QModel
 from .mixtral import MixtralQModel
-from .mllama import MLlamaQModel
+from .mllama import MLlamaQModel, MLlamaTextQModel
 from .mobilellm import MobileLLMQModel
 from .moss import MossQModel
 from .mpt import MptQModel
@@ -97,6 +97,6 @@
 from .mistral3 import Mistral3GPTQ
 from .afmoe import AfMoeQModel
 from .glm4v import Glm4vGPTQ
-from .glm4v_moe import Glm4vMoeQModel
+from .glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel
 from .voxtral import VoxtralGPTQ
 from .glm4_moe_lite import Glm4MoeLiteQModel
diff --git a/gptqmodel/models/definitions/glm4v_moe.py b/gptqmodel/models/definitions/glm4v_moe.py
@@ -3,6 +3,8 @@
 # SPDX-License-Identifier: Apache-2.0
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
+from transformers import AutoModel
+
 from ..moe_lifecycle import GateUpDownMoELifecycleHooks
 from .glm4v import Glm4vGPTQ
 
@@ -42,4 +44,33 @@ class Glm4vMoeQModel(Glm4vGPTQ):
     ]
 
 
-__all__ = ["Glm4vMoeQModel"]
+class Glm4vMoeTextQModel(Glm4vMoeQModel):
+    loader = AutoModel
+
+    pre_lm_head_norm_module = "norm"
+    rotary_embedding = "rotary_emb"
+
+    module_tree = [
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp:moe": {
+                "gate": ("gate:!",),
+                "experts": {
+                    "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                },
+                "shared_experts": {
+                    "gate_proj": ("gate_proj:0",),
+                    "up_proj": ("up_proj:0",),
+                    "down_proj": ("down_proj:1",),
+                },
+                "": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+            },
+        },
+    ]
+
+
+__all__ = ["Glm4vMoeQModel", "Glm4vMoeTextQModel"]
diff --git a/gptqmodel/models/definitions/llama4.py b/gptqmodel/models/definitions/llama4.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
-from transformers import AutoModelForImageTextToText
+from transformers import AutoModelForCausalLM, AutoModelForImageTextToText
 
 from ..base import BaseQModel
 from ..moe_lifecycle import GateUpDownMoELifecycleHooks
@@ -40,3 +40,32 @@ class Llama4QModel(BaseQModel):
             },
         }
     ]
+
+
+class Llama4TextQModel(Llama4QModel):
+    loader = AutoModelForCausalLM
+
+    pre_lm_head_norm_module = "model.norm"
+    rotary_embedding = "model.rotary_emb"
+
+    module_tree = [
+        "model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "feed_forward:moe": {
+                "router": ("router:!",),
+                "experts:0": {
+                    "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                },
+                "shared_expert:0": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                "": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+            },
+        },
+    ]
+
+
+__all__ = ["Llama4QModel", "Llama4TextQModel"]
diff --git a/gptqmodel/models/definitions/mllama.py b/gptqmodel/models/definitions/mllama.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
-from transformers import AutoModelForPreTraining
+from transformers import AutoModelForCausalLM, AutoModelForPreTraining
 
 from ..base import BaseQModel
 
@@ -27,3 +27,25 @@ class MLlamaQModel(BaseQModel):
             "mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
         }
     ]
+
+
+class MLlamaTextQModel(MLlamaQModel):
+    loader = AutoModelForCausalLM
+
+    pre_lm_head_norm_module = "model.norm"
+    rotary_embedding = "model.rotary_emb"
+
+    module_tree = [
+        "model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+        },
+    ]
+
+
+__all__ = ["MLlamaQModel", "MLlamaTextQModel"]