Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 6 additions & 22 deletions gptqmodel/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
from .definitions.glm4_moe import GLM4MoEGPTQ # noqa: E402
from .definitions.glm4_moe_lite import Glm4MoeLiteQModel # noqa: E402
from .definitions.glm4v import Glm4vGPTQ # noqa: E402
from .definitions.glm4v_moe import Glm4vMoeQModel # noqa: E402
from .definitions.glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel # noqa: E402
from .definitions.glm_moe_dsa import GlmMoeDsaQModel # noqa: E402
from .definitions.glm_ocr import GlmOCRGPTQ # noqa: E402
from .definitions.glmasr import GlmASRGPTQ # noqa: E402
Expand All @@ -128,7 +128,7 @@
from .definitions.lfm2_moe import LFM2MoeQModel # noqa: E402
from .definitions.llada2 import LLaDA2MoeQModel
from .definitions.llama import LlamaQModel # noqa: E402
from .definitions.llama4 import Llama4QModel # noqa: E402
from .definitions.llama4 import Llama4QModel, Llama4TextQModel # noqa: E402
from .definitions.llava_qwen2 import LlavaQwen2QModel # noqa: E402
from .definitions.longcat_flash import LongCatFlashQModel # noqa: E402
from .definitions.mimo import MimoQModel # noqa: E402
Expand All @@ -141,7 +141,7 @@
from .definitions.minimax_m2 import MiniMaxM2GPTQ # noqa: E402
from .definitions.mistral3 import Mistral3GPTQ
from .definitions.mixtral import MixtralQModel # noqa: E402
from .definitions.mllama import MLlamaQModel # noqa: E402
from .definitions.mllama import MLlamaQModel, MLlamaTextQModel # noqa: E402
from .definitions.mobilellm import MobileLLMQModel # noqa: E402
from .definitions.moss import MossQModel # noqa: E402
from .definitions.mpt import MptQModel # noqa: E402
Expand Down Expand Up @@ -206,13 +206,15 @@
"gpt2": GPT2QModel,
"llama": LlamaQModel,
"llama4": Llama4QModel,
"llama4_text": Llama4TextQModel,
"opt": OptQModel,
"moss": MossQModel,
"chatglm": ChatGLMQModel,
"glm": GlmQModel,
"glm4": GlmQModel,
"glm4v": Glm4vGPTQ,
"glm4v_moe": Glm4vMoeQModel,
"glm4v_moe_text": Glm4vMoeTextQModel,
"glmasr": GlmASRGPTQ,
"glm_ocr": GlmOCRGPTQ,
"glm4_moe": GLM4MoEGPTQ,
Expand Down Expand Up @@ -287,6 +289,7 @@
"exaone4": Exaone4QModel,
"grinmoe": GrinMoeQModel,
"mllama": MLlamaQModel,
"mllama_text_model": MLlamaTextQModel,
"marin": Qwen3QModel,
"granite": LlamaQModel, # 100% llama clone
"granitemoehybrid": GraniteMoeHybridQModel,
Expand Down Expand Up @@ -449,25 +452,6 @@ def _get_config_load_kwargs(kwargs: dict) -> dict:
return get_hf_gguf_load_kwargs(kwargs)


def _normalize_supported_model_type(config) -> str:
model_type = config.model_type.lower()
config_class_name = type(config).__name__

if model_type == "qwen3_5":
if config_class_name == "Qwen3_5TextConfig":
return "qwen3_5_text"
if not hasattr(config, "text_config") and not hasattr(config, "vision_config"):
return "qwen3_5_text"

if model_type == "qwen3_5_moe":
if config_class_name == "Qwen3_5MoeTextConfig":
return "qwen3_5_moe_text"
if not hasattr(config, "text_config") and not hasattr(config, "vision_config"):
return "qwen3_5_moe_text"

return model_type


def check_and_get_model_definition(model_dir, trust_remote_code=False, **config_load_kwargs):
if "gguf_file" not in config_load_kwargs:
model_dir = normalize_model_id_or_path_for_hf_gguf(
Expand Down
6 changes: 3 additions & 3 deletions gptqmodel/models/definitions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from .internlm2 import InternLM2QModel
from .interns1 import InternS1QModel
from .internvl_chat import InternVLChatQModel
from .llama4 import Llama4QModel
from .llama4 import Llama4QModel, Llama4TextQModel
from .mimo import MimoQModel
from .minicpm3 import MiniCpm3QModel
from .minicpm_o import MiniCPMOQModel
Expand All @@ -58,7 +58,7 @@
from .minimax_m2 import MiniMaxM2GPTQ
from .mimo_v2 import MimoV2QModel
from .mixtral import MixtralQModel
from .mllama import MLlamaQModel
from .mllama import MLlamaQModel, MLlamaTextQModel
from .mobilellm import MobileLLMQModel
from .moss import MossQModel
from .mpt import MptQModel
Expand Down Expand Up @@ -97,6 +97,6 @@
from .mistral3 import Mistral3GPTQ
from .afmoe import AfMoeQModel
from .glm4v import Glm4vGPTQ
from .glm4v_moe import Glm4vMoeQModel
from .glm4v_moe import Glm4vMoeQModel, Glm4vMoeTextQModel
from .voxtral import VoxtralGPTQ
from .glm4_moe_lite import Glm4MoeLiteQModel
33 changes: 32 additions & 1 deletion gptqmodel/models/definitions/glm4v_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from transformers import AutoModel

from ..moe_lifecycle import GateUpDownMoELifecycleHooks
from .glm4v import Glm4vGPTQ

Expand Down Expand Up @@ -42,4 +44,33 @@ class Glm4vMoeQModel(Glm4vGPTQ):
]


__all__ = ["Glm4vMoeQModel"]
class Glm4vMoeTextQModel(Glm4vMoeQModel):
loader = AutoModel

pre_lm_head_norm_module = "norm"
rotary_embedding = "rotary_emb"

module_tree = [
"layers",
"#",
{
"input_layernorm": ("input_layernorm:!",),
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
"post_attention_layernorm": ("post_attention_layernorm:!",),
"mlp:moe": {
"gate": ("gate:!",),
"experts": {
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
},
"shared_experts": {
"gate_proj": ("gate_proj:0",),
"up_proj": ("up_proj:0",),
"down_proj": ("down_proj:1",),
},
"": ("gate_proj:0", "up_proj:0", "down_proj:1"),
},
},
]


__all__ = ["Glm4vMoeQModel", "Glm4vMoeTextQModel"]
31 changes: 30 additions & 1 deletion gptqmodel/models/definitions/llama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from transformers import AutoModelForImageTextToText
from transformers import AutoModelForCausalLM, AutoModelForImageTextToText

from ..base import BaseQModel
from ..moe_lifecycle import GateUpDownMoELifecycleHooks
Expand Down Expand Up @@ -40,3 +40,32 @@ class Llama4QModel(BaseQModel):
},
}
]


class Llama4TextQModel(Llama4QModel):
loader = AutoModelForCausalLM

pre_lm_head_norm_module = "model.norm"
rotary_embedding = "model.rotary_emb"

module_tree = [
"model",
"layers",
"#",
{
"input_layernorm": ("input_layernorm:!",),
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
"post_attention_layernorm": ("post_attention_layernorm:!",),
"feed_forward:moe": {
"router": ("router:!",),
"experts:0": {
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
},
"shared_expert:0": ("gate_proj:0", "up_proj:0", "down_proj:1"),
"": ("gate_proj:0", "up_proj:0", "down_proj:1"),
},
},
]


__all__ = ["Llama4QModel", "Llama4TextQModel"]
24 changes: 23 additions & 1 deletion gptqmodel/models/definitions/mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# SPDX-License-Identifier: Apache-2.0
# Contact: qubitium@modelcloud.ai, x.com/qubitium

from transformers import AutoModelForPreTraining
from transformers import AutoModelForCausalLM, AutoModelForPreTraining

from ..base import BaseQModel

Expand All @@ -27,3 +27,25 @@ class MLlamaQModel(BaseQModel):
"mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
}
]


class MLlamaTextQModel(MLlamaQModel):
loader = AutoModelForCausalLM

pre_lm_head_norm_module = "model.norm"
rotary_embedding = "model.rotary_emb"

module_tree = [
"model",
"layers",
"#",
{
"input_layernorm": ("input_layernorm:!",),
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
"post_attention_layernorm": ("post_attention_layernorm:!",),
"mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
},
]


__all__ = ["MLlamaQModel", "MLlamaTextQModel"]