Skip to content

Commit ed42bc5

Browse files
[MODEL] support deepseek_v4 (#2877)
* LazyTurtle support deepseek_v4 Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * support deepseek_v4's WeightConverter Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * support deepseek_v4 Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * update README Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * Potential fix for pull request finding 'Unused import' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --------- Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com>
1 parent 1a50635 commit ed42bc5

8 files changed

Lines changed: 592 additions & 58 deletions

File tree

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
## Latest News
2323

24-
* 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` model support
24+
* 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` and `DeepSeek V4` model support
2525
* 05/07/2026 7.1.0-dev `main`: ✨ Added `GLM-4.5V`, `GLM-4.6V`, `Zamba` and `Zamba2` model support
2626
* 04/29/2026 7.1.0-dev `main`: ✨ Added PoolSideAI `Laguna` model support for fused Laguna MoE checkpoints. Added `ERNIE 4.5 VL MoE`, `Ling-2.6-flash` and NVIDIA `Nemotron 3 Nano Omni` model support.
2727
* 04/28/2026 [7.0.0](https://github.com/ModelCloud/GPTQModel/releases/tag/v7.0.0): 🚀 Added Huawei Ascend NPU support through native torch kernels for GPTQ, AWQ, ParoQuant, GGUF, QQQ, and EXL3. Added `internvl_chat`, `gemma3n`, `GLM-OCR`, `GLM-ASR`, and `falcon_mamba` model support.
@@ -254,7 +254,7 @@ Selected public references where teams or companies explicitly mention GPT-QMode
254254
| Cohere 1-2 || GPT-Neo / NeoX || Llama 1-3.3 || Nemotron H / Omni || StarCoder2 ||
255255
| DBRX Converted || GPT-2 || Llama 3.2 VL || Nemotron Ultra || TeleChat2 ||
256256
| Deci || GPT-J || Llama 4 || OPT || Trinity ||
257-
| DeepSeek-V2/V3/R1 || GPT-OSS || LongCat Flash || OLMo2 / LLaDA2 || Yi ||
257+
| DeepSeek-V2/V3/V4/R1 || GPT-OSS || LongCat Flash || OLMo2 / LLaDA2 || Yi ||
258258
| DeepSeek-V2-Lite || Granite / Granite MoE || LongLLaMA || Ovis 1.6/2 || Seed-OSS ||
259259
| Dream || GRIN-MoE || Instella || Phi 1-4 || Voxtral ||
260260
| ERNIE 4.5 / MoE / VL MoE || GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR || GLM4 MoE / Lite / 4.5V MoE || MiniCPM 3/O/V/V 4_6 || PanGu-α ||

gptqmodel/models/auto.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
from .definitions.decilm import DeciLMQModel # noqa: E402
8383
from .definitions.deepseek_v2 import DeepSeekV2QModel # noqa: E402
8484
from .definitions.deepseek_v3 import DeepSeekV3QModel # noqa: E402
85+
from .definitions.deepseek_v4 import DeepSeekV4QModel # noqa: E402
8586
from .definitions.dots1 import Dots1QModel # noqa: E402
8687
from .definitions.dream import DreamQModel # noqa: E402
8788
from .definitions.ernie4_5 import Ernie4_5QModel # noqa: E402
@@ -264,6 +265,7 @@
264265
"dbrx_converted": DbrxConvertedQModel,
265266
"deepseek_v2": DeepSeekV2QModel,
266267
"deepseek_v3": DeepSeekV3QModel,
268+
"deepseek_v4": DeepSeekV4QModel,
267269
"dots1": Dots1QModel,
268270
"exaone": ExaOneQModel,
269271
"exaone4": Exaone4QModel,

gptqmodel/models/definitions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from .decilm import DeciLMQModel
1919
from .deepseek_v2 import DeepSeekV2QModel
2020
from .deepseek_v3 import DeepSeekV3QModel
21+
from .deepseek_v4 import DeepSeekV4QModel
2122
from .dots1 import Dots1QModel
2223
from .dream import DreamQModel
2324
from .exaone import ExaOneQModel
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
2+
# SPDX-FileCopyrightText: 2026 qubitium@modelcloud.ai
3+
# SPDX-License-Identifier: Apache-2.0
4+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
5+
6+
from .deepseek_v3 import DeepSeekV3QModel
7+
8+
9+
class DeepSeekV4QModel(DeepSeekV3QModel):
10+
dynamic_expert_index = "n_routed_experts"
11+
rotary_embedding = "model.rotary_emb"
12+
module_tree = [
13+
"model",
14+
"layers",
15+
"#",
16+
{
17+
"input_layernorm": ("input_layernorm:!",),
18+
"self_attn": (
19+
"q_a_norm:!",
20+
"q_a_proj:0",
21+
"q_b_norm:!",
22+
"q_b_proj:0",
23+
"o_a_proj:!",
24+
"o_b_proj:1",
25+
"kv_norm:!",
26+
"kv_proj:2",
27+
),
28+
"post_attention_layernorm": ("post_attention_layernorm:!",),
29+
"mlp:moe": {
30+
"gate": ("gate:!",),
31+
"experts": {
32+
"#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
33+
},
34+
"shared_experts": ("gate_proj:0", "up_proj:0", "down_proj:1"),
35+
},
36+
},
37+
]
38+
39+
40+
41+
__all__ = ["DeepSeekV4QModel"]

gptqmodel/utils/structure.py

Lines changed: 192 additions & 56 deletions
Large diffs are not rendered by default.

tests/models/test_deepseek_v4.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
2+
# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
3+
# SPDX-License-Identifier: Apache-2.0
4+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
5+
from model_test import ModelTest
6+
7+
from gptqmodel import BACKEND
8+
9+
10+
class TestDeepseekV4(ModelTest):
11+
NATIVE_MODEL_ID = "/monster/data/model/DeepSeek-V4-Flash-BF16" # "deepseek-ai/DeepSeek-V4-Flash-BF16"
12+
NATIVE_ARC_CHALLENGE_ACC = 0.4753
13+
NATIVE_ARC_CHALLENGE_ACC_NORM = 0.4855
14+
NATIVE_ARC_CHALLENGE_ACC_SLOW = NATIVE_ARC_CHALLENGE_ACC
15+
NATIVE_ARC_CHALLENGE_ACC_NORM_SLOW = NATIVE_ARC_CHALLENGE_ACC_NORM
16+
NATIVE_ARC_CHALLENGE_ACC_FAST = NATIVE_ARC_CHALLENGE_ACC_SLOW
17+
NATIVE_ARC_CHALLENGE_ACC_NORM_FAST = NATIVE_ARC_CHALLENGE_ACC_NORM_SLOW
18+
TRUST_REMOTE_CODE = True
19+
EVAL_TASKS_SLOW = {
20+
"arc_challenge": {
21+
"chat_template": True,
22+
"acc": {"value": NATIVE_ARC_CHALLENGE_ACC},
23+
"acc_norm": {"value": NATIVE_ARC_CHALLENGE_ACC_NORM},
24+
},
25+
}
26+
EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW)
27+
LOAD_BACKEND = BACKEND.AUTO
28+
USE_FLASH_ATTN = False
29+
MODEL_COMPAT_FAST_LAYER_POSITION = "first"
30+
31+
def test_deepseekv2lite(self):
32+
self.quantize_and_evaluate()
33+

tests/test_deepseek_v4_support.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from types import SimpleNamespace
2+
3+
from gptqmodel.models import auto
4+
from gptqmodel.models.definitions.deepseek_v4 import DeepSeekV4QModel
5+
6+
7+
def test_deepseek_v4_model_type_selects_definition(monkeypatch):
8+
fake_config = SimpleNamespace(model_type="deepseek_v4")
9+
10+
monkeypatch.setattr(auto, "resolve_trust_remote_code", lambda path, trust_remote_code=False: trust_remote_code)
11+
monkeypatch.setattr(auto.AutoConfig, "from_pretrained", lambda *args, **kwargs: fake_config)
12+
13+
assert auto.check_and_get_model_definition("/tmp/deepseek-v4") is DeepSeekV4QModel
14+
15+
16+
def test_deepseek_v4_module_tree_matches_v4_attention_and_fused_experts():
17+
layer_modules = DeepSeekV4QModel.simple_layer_modules(
18+
model_config=SimpleNamespace(n_routed_experts=256),
19+
quantize_config=SimpleNamespace(dynamic=None),
20+
)
21+
flat_modules = {name for block in layer_modules for name in block}
22+
23+
assert "self_attn.q_a_proj" in flat_modules
24+
assert "self_attn.q_b_proj" in flat_modules
25+
assert "self_attn.kv_proj" in flat_modules
26+
assert "self_attn.o_b_proj" in flat_modules
27+
# grouped projection must stay native and should not be part of quant blocks
28+
assert "self_attn.o_a_proj" not in flat_modules
29+
assert "mlp.experts.99.gate_proj" in flat_modules
30+
assert "mlp.experts.99.up_proj" in flat_modules
31+
assert "mlp.experts.99.down_proj" in flat_modules
32+
assert "mlp.shared_experts.gate_proj" in flat_modules

0 commit comments

Comments
 (0)