From 89f59e4bf30b92ee13f9d6e04bf8e0866c312ba0 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 28 May 2026 07:05:09 +0800 Subject: [PATCH] support glm4v_moe_text, llama4_text and mllama_text_model --- gptqmodel/models/auto.py | 28 +++++-------------- gptqmodel/models/definitions/__init__.py | 6 ++--- gptqmodel/models/definitions/glm4v_moe.py | 33 ++++++++++++++++++++++- gptqmodel/models/definitions/llama4.py | 31 ++++++++++++++++++++- gptqmodel/models/definitions/mllama.py | 24 ++++++++++++++++- 5 files changed, 94 insertions(+), 28 deletions(-) diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py index fe9722c67..f161ff3ed 100644 --- a/gptqmodel/models/auto.py +++ b/gptqmodel/models/auto.py @@ -101,7 +101,7 @@ from .definitions.glm4_moe import GLM4MoEGPTQ # noqa: E402 from .definitions.glm4_moe_lite import Glm4MoeLiteQModel # noqa: E402 from .definitions.glm4v import Glm4vGPTQ # noqa: E402 -from .definitions.glm4v_moe import Glm4vMoeQModel # noqa: E402 +from .definitions.glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel # noqa: E402 from .definitions.glm_moe_dsa import GlmMoeDsaQModel # noqa: E402 from .definitions.glm_ocr import GlmOCRGPTQ # noqa: E402 from .definitions.glmasr import GlmASRGPTQ # noqa: E402 @@ -128,7 +128,7 @@ from .definitions.lfm2_moe import LFM2MoeQModel # noqa: E402 from .definitions.llada2 import LLaDA2MoeQModel from .definitions.llama import LlamaQModel # noqa: E402 -from .definitions.llama4 import Llama4QModel # noqa: E402 +from .definitions.llama4 import Llama4QModel, Llama4TextQModel # noqa: E402 from .definitions.llava_qwen2 import LlavaQwen2QModel # noqa: E402 from .definitions.longcat_flash import LongCatFlashQModel # noqa: E402 from .definitions.mimo import MimoQModel # noqa: E402 @@ -141,7 +141,7 @@ from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402 from .definitions.mistral3 import Mistral3GPTQ from .definitions.mixtral import MixtralQModel # noqa: E402 -from .definitions.mllama import MLlamaQModel # noqa: E402 +from .definitions.mllama import MLlamaQModel, MLlamaTextQModel # noqa: E402 from .definitions.mobilellm import MobileLLMQModel # noqa: E402 from .definitions.moss import MossQModel # noqa: E402 from .definitions.mpt import MptQModel # noqa: E402 @@ -206,6 +206,7 @@ "gpt2": GPT2QModel, "llama": LlamaQModel, "llama4": Llama4QModel, + "llama4_text": Llama4TextQModel, "opt": OptQModel, "moss": MossQModel, "chatglm": ChatGLMQModel, @@ -213,6 +214,7 @@ "glm4": GlmQModel, "glm4v": Glm4vGPTQ, "glm4v_moe": Glm4vMoeQModel, + "glm4v_moe_text": Glm4vMoeTextQModel, "glmasr": GlmASRGPTQ, "glm_ocr": GlmOCRGPTQ, "glm4_moe": GLM4MoEGPTQ, @@ -287,6 +289,7 @@ "exaone4": Exaone4QModel, "grinmoe": GrinMoeQModel, "mllama": MLlamaQModel, + "mllama_text_model": MLlamaTextQModel, "marin": Qwen3QModel, "granite": LlamaQModel, # 100% llama clone "granitemoehybrid": GraniteMoeHybridQModel, @@ -449,25 +452,6 @@ def _get_config_load_kwargs(kwargs: dict) -> dict: return get_hf_gguf_load_kwargs(kwargs) -def _normalize_supported_model_type(config) -> str: - model_type = config.model_type.lower() - config_class_name = type(config).__name__ - - if model_type == "qwen3_5": - if config_class_name == "Qwen3_5TextConfig": - return "qwen3_5_text" - if not hasattr(config, "text_config") and not hasattr(config, "vision_config"): - return "qwen3_5_text" - - if model_type == "qwen3_5_moe": - if config_class_name == "Qwen3_5MoeTextConfig": - return "qwen3_5_moe_text" - if not hasattr(config, "text_config") and not hasattr(config, "vision_config"): - return "qwen3_5_moe_text" - - return model_type - - def check_and_get_model_definition(model_dir, trust_remote_code=False, **config_load_kwargs): if "gguf_file" not in config_load_kwargs: model_dir = normalize_model_id_or_path_for_hf_gguf( diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py index 2760a3672..b0604eb55 100644 --- a/gptqmodel/models/definitions/__init__.py +++ b/gptqmodel/models/definitions/__init__.py @@ -49,7 +49,7 @@ from .internlm2 import InternLM2QModel from .interns1 import InternS1QModel from .internvl_chat import InternVLChatQModel -from .llama4 import Llama4QModel +from .llama4 import Llama4QModel, Llama4TextQModel from .mimo import MimoQModel from .minicpm3 import MiniCpm3QModel from .minicpm_o import MiniCPMOQModel @@ -58,7 +58,7 @@ from .minimax_m2 import MiniMaxM2GPTQ from .mimo_v2 import MimoV2QModel from .mixtral import MixtralQModel -from .mllama import MLlamaQModel +from .mllama import MLlamaQModel, MLlamaTextQModel from .mobilellm import MobileLLMQModel from .moss import MossQModel from .mpt import MptQModel @@ -97,6 +97,6 @@ from .mistral3 import Mistral3GPTQ from .afmoe import AfMoeQModel from .glm4v import Glm4vGPTQ -from .glm4v_moe import Glm4vMoeQModel +from .glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel from .voxtral import VoxtralGPTQ from .glm4_moe_lite import Glm4MoeLiteQModel diff --git a/gptqmodel/models/definitions/glm4v_moe.py b/gptqmodel/models/definitions/glm4v_moe.py index 0ce200e07..f8402801a 100644 --- a/gptqmodel/models/definitions/glm4v_moe.py +++ b/gptqmodel/models/definitions/glm4v_moe.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 # Contact: qubitium@modelcloud.ai, x.com/qubitium +from transformers import AutoModel + from ..moe_lifecycle import GateUpDownMoELifecycleHooks from .glm4v import Glm4vGPTQ @@ -42,4 +44,33 @@ class Glm4vMoeQModel(Glm4vGPTQ): ] -__all__ = ["Glm4vMoeQModel"] +class Glm4vMoeTextQModel(Glm4vMoeQModel): + loader = AutoModel + + pre_lm_head_norm_module = "norm" + rotary_embedding = "rotary_emb" + + module_tree = [ + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "mlp:moe": { + "gate": ("gate:!",), + "experts": { + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + "shared_experts": { + "gate_proj": ("gate_proj:0",), + "up_proj": ("up_proj:0",), + "down_proj": ("down_proj:1",), + }, + "": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + }, + ] + + +__all__ = ["Glm4vMoeQModel", "Glm4vMoeTextQModel"] diff --git a/gptqmodel/models/definitions/llama4.py b/gptqmodel/models/definitions/llama4.py index f2c1d121b..c523ea27b 100644 --- a/gptqmodel/models/definitions/llama4.py +++ b/gptqmodel/models/definitions/llama4.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # Contact: qubitium@modelcloud.ai, x.com/qubitium -from transformers import AutoModelForImageTextToText +from transformers import AutoModelForCausalLM, AutoModelForImageTextToText from ..base import BaseQModel from ..moe_lifecycle import GateUpDownMoELifecycleHooks @@ -40,3 +40,32 @@ class Llama4QModel(BaseQModel): }, } ] + + +class Llama4TextQModel(Llama4QModel): + loader = AutoModelForCausalLM + + pre_lm_head_norm_module = "model.norm" + rotary_embedding = "model.rotary_emb" + + module_tree = [ + "model", + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "feed_forward:moe": { + "router": ("router:!",), + "experts:0": { + "#": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + "shared_expert:0": ("gate_proj:0", "up_proj:0", "down_proj:1"), + "": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + }, + ] + + +__all__ = ["Llama4QModel", "Llama4TextQModel"] diff --git a/gptqmodel/models/definitions/mllama.py b/gptqmodel/models/definitions/mllama.py index 5a5dc5406..b144fcdc9 100644 --- a/gptqmodel/models/definitions/mllama.py +++ b/gptqmodel/models/definitions/mllama.py @@ -3,7 +3,7 @@ # SPDX-License-Identifier: Apache-2.0 # Contact: qubitium@modelcloud.ai, x.com/qubitium -from transformers import AutoModelForPreTraining +from transformers import AutoModelForCausalLM, AutoModelForPreTraining from ..base import BaseQModel @@ -27,3 +27,25 @@ class MLlamaQModel(BaseQModel): "mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"), } ] + + +class MLlamaTextQModel(MLlamaQModel): + loader = AutoModelForCausalLM + + pre_lm_head_norm_module = "model.norm" + rotary_embedding = "model.rotary_emb" + + module_tree = [ + "model", + "layers", + "#", + { + "input_layernorm": ("input_layernorm:!",), + "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"), + "post_attention_layernorm": ("post_attention_layernorm:!",), + "mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"), + }, + ] + + +__all__ = ["MLlamaQModel", "MLlamaTextQModel"]