Skip to content

Commit 89f59e4

Browse files
committed
support glm4v_moe_text, llama4_text and mllama_text_model
1 parent 28b3870 commit 89f59e4

5 files changed

Lines changed: 94 additions & 28 deletions

File tree

gptqmodel/models/auto.py

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@
101101
from .definitions.glm4_moe import GLM4MoEGPTQ # noqa: E402
102102
from .definitions.glm4_moe_lite import Glm4MoeLiteQModel # noqa: E402
103103
from .definitions.glm4v import Glm4vGPTQ # noqa: E402
104-
from .definitions.glm4v_moe import Glm4vMoeQModel # noqa: E402
104+
from .definitions.glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel # noqa: E402
105105
from .definitions.glm_moe_dsa import GlmMoeDsaQModel # noqa: E402
106106
from .definitions.glm_ocr import GlmOCRGPTQ # noqa: E402
107107
from .definitions.glmasr import GlmASRGPTQ # noqa: E402
@@ -128,7 +128,7 @@
128128
from .definitions.lfm2_moe import LFM2MoeQModel # noqa: E402
129129
from .definitions.llada2 import LLaDA2MoeQModel
130130
from .definitions.llama import LlamaQModel # noqa: E402
131-
from .definitions.llama4 import Llama4QModel # noqa: E402
131+
from .definitions.llama4 import Llama4QModel, Llama4TextQModel # noqa: E402
132132
from .definitions.llava_qwen2 import LlavaQwen2QModel # noqa: E402
133133
from .definitions.longcat_flash import LongCatFlashQModel # noqa: E402
134134
from .definitions.mimo import MimoQModel # noqa: E402
@@ -141,7 +141,7 @@
141141
from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402
142142
from .definitions.mistral3 import Mistral3GPTQ
143143
from .definitions.mixtral import MixtralQModel # noqa: E402
144-
from .definitions.mllama import MLlamaQModel # noqa: E402
144+
from .definitions.mllama import MLlamaQModel, MLlamaTextQModel # noqa: E402
145145
from .definitions.mobilellm import MobileLLMQModel # noqa: E402
146146
from .definitions.moss import MossQModel # noqa: E402
147147
from .definitions.mpt import MptQModel # noqa: E402
@@ -206,13 +206,15 @@
206206
"gpt2": GPT2QModel,
207207
"llama": LlamaQModel,
208208
"llama4": Llama4QModel,
209+
"llama4_text": Llama4TextQModel,
209210
"opt": OptQModel,
210211
"moss": MossQModel,
211212
"chatglm": ChatGLMQModel,
212213
"glm": GlmQModel,
213214
"glm4": GlmQModel,
214215
"glm4v": Glm4vGPTQ,
215216
"glm4v_moe": Glm4vMoeQModel,
217+
"glm4v_moe_text": Glm4vMoeTextQModel,
216218
"glmasr": GlmASRGPTQ,
217219
"glm_ocr": GlmOCRGPTQ,
218220
"glm4_moe": GLM4MoEGPTQ,
@@ -287,6 +289,7 @@
287289
"exaone4": Exaone4QModel,
288290
"grinmoe": GrinMoeQModel,
289291
"mllama": MLlamaQModel,
292+
"mllama_text_model": MLlamaTextQModel,
290293
"marin": Qwen3QModel,
291294
"granite": LlamaQModel, # 100% llama clone
292295
"granitemoehybrid": GraniteMoeHybridQModel,
@@ -449,25 +452,6 @@ def _get_config_load_kwargs(kwargs: dict) -> dict:
449452
return get_hf_gguf_load_kwargs(kwargs)
450453

451454

452-
def _normalize_supported_model_type(config) -> str:
453-
model_type = config.model_type.lower()
454-
config_class_name = type(config).__name__
455-
456-
if model_type == "qwen3_5":
457-
if config_class_name == "Qwen3_5TextConfig":
458-
return "qwen3_5_text"
459-
if not hasattr(config, "text_config") and not hasattr(config, "vision_config"):
460-
return "qwen3_5_text"
461-
462-
if model_type == "qwen3_5_moe":
463-
if config_class_name == "Qwen3_5MoeTextConfig":
464-
return "qwen3_5_moe_text"
465-
if not hasattr(config, "text_config") and not hasattr(config, "vision_config"):
466-
return "qwen3_5_moe_text"
467-
468-
return model_type
469-
470-
471455
def check_and_get_model_definition(model_dir, trust_remote_code=False, **config_load_kwargs):
472456
if "gguf_file" not in config_load_kwargs:
473457
model_dir = normalize_model_id_or_path_for_hf_gguf(

gptqmodel/models/definitions/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from .internlm2 import InternLM2QModel
5050
from .interns1 import InternS1QModel
5151
from .internvl_chat import InternVLChatQModel
52-
from .llama4 import Llama4QModel
52+
from .llama4 import Llama4QModel, Llama4TextQModel
5353
from .mimo import MimoQModel
5454
from .minicpm3 import MiniCpm3QModel
5555
from .minicpm_o import MiniCPMOQModel
@@ -58,7 +58,7 @@
5858
from .minimax_m2 import MiniMaxM2GPTQ
5959
from .mimo_v2 import MimoV2QModel
6060
from .mixtral import MixtralQModel
61-
from .mllama import MLlamaQModel
61+
from .mllama import MLlamaQModel, MLlamaTextQModel
6262
from .mobilellm import MobileLLMQModel
6363
from .moss import MossQModel
6464
from .mpt import MptQModel
@@ -97,6 +97,6 @@
9797
from .mistral3 import Mistral3GPTQ
9898
from .afmoe import AfMoeQModel
9999
from .glm4v import Glm4vGPTQ
100-
from .glm4v_moe import Glm4vMoeQModel
100+
from .glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel
101101
from .voxtral import VoxtralGPTQ
102102
from .glm4_moe_lite import Glm4MoeLiteQModel

gptqmodel/models/definitions/glm4v_moe.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# SPDX-License-Identifier: Apache-2.0
44
# Contact: qubitium@modelcloud.ai, x.com/qubitium
55

6+
from transformers import AutoModel
7+
68
from ..moe_lifecycle import GateUpDownMoELifecycleHooks
79
from .glm4v import Glm4vGPTQ
810

@@ -42,4 +44,33 @@ class Glm4vMoeQModel(Glm4vGPTQ):
4244
]
4345

4446

45-
__all__ = ["Glm4vMoeQModel"]
47+
class Glm4vMoeTextQModel(Glm4vMoeQModel):
48+
loader = AutoModel
49+
50+
pre_lm_head_norm_module = "norm"
51+
rotary_embedding = "rotary_emb"
52+
53+
module_tree = [
54+
"layers",
55+
"#",
56+
{
57+
"input_layernorm": ("input_layernorm:!",),
58+
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
59+
"post_attention_layernorm": ("post_attention_layernorm:!",),
60+
"mlp:moe": {
61+
"gate": ("gate:!",),
62+
"experts": {
63+
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
64+
},
65+
"shared_experts": {
66+
"gate_proj": ("gate_proj:0",),
67+
"up_proj": ("up_proj:0",),
68+
"down_proj": ("down_proj:1",),
69+
},
70+
"": ("gate_proj:0", "up_proj:0", "down_proj:1"),
71+
},
72+
},
73+
]
74+
75+
76+
__all__ = ["Glm4vMoeQModel", "Glm4vMoeTextQModel"]

gptqmodel/models/definitions/llama4.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
# Contact: qubitium@modelcloud.ai, x.com/qubitium
55

6-
from transformers import AutoModelForImageTextToText
6+
from transformers import AutoModelForCausalLM, AutoModelForImageTextToText
77

88
from ..base import BaseQModel
99
from ..moe_lifecycle import GateUpDownMoELifecycleHooks
@@ -40,3 +40,32 @@ class Llama4QModel(BaseQModel):
4040
},
4141
}
4242
]
43+
44+
45+
class Llama4TextQModel(Llama4QModel):
46+
loader = AutoModelForCausalLM
47+
48+
pre_lm_head_norm_module = "model.norm"
49+
rotary_embedding = "model.rotary_emb"
50+
51+
module_tree = [
52+
"model",
53+
"layers",
54+
"#",
55+
{
56+
"input_layernorm": ("input_layernorm:!",),
57+
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
58+
"post_attention_layernorm": ("post_attention_layernorm:!",),
59+
"feed_forward:moe": {
60+
"router": ("router:!",),
61+
"experts:0": {
62+
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
63+
},
64+
"shared_expert:0": ("gate_proj:0", "up_proj:0", "down_proj:1"),
65+
"": ("gate_proj:0", "up_proj:0", "down_proj:1"),
66+
},
67+
},
68+
]
69+
70+
71+
__all__ = ["Llama4QModel", "Llama4TextQModel"]

gptqmodel/models/definitions/mllama.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# SPDX-License-Identifier: Apache-2.0
44
# Contact: qubitium@modelcloud.ai, x.com/qubitium
55

6-
from transformers import AutoModelForPreTraining
6+
from transformers import AutoModelForCausalLM, AutoModelForPreTraining
77

88
from ..base import BaseQModel
99

@@ -27,3 +27,25 @@ class MLlamaQModel(BaseQModel):
2727
"mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
2828
}
2929
]
30+
31+
32+
class MLlamaTextQModel(MLlamaQModel):
33+
loader = AutoModelForCausalLM
34+
35+
pre_lm_head_norm_module = "model.norm"
36+
rotary_embedding = "model.rotary_emb"
37+
38+
module_tree = [
39+
"model",
40+
"layers",
41+
"#",
42+
{
43+
"input_layernorm": ("input_layernorm:!",),
44+
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
45+
"post_attention_layernorm": ("post_attention_layernorm:!",),
46+
"mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
47+
},
48+
]
49+
50+
51+
__all__ = ["MLlamaQModel", "MLlamaTextQModel"]

0 commit comments

Comments
 (0)