Skip to content

Commit c104604

Browse files
[MODEL] support nemotron_labs_diffusion (#2909)
* support nemotron_labs_diffusion Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * update score Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * update README.md Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> --------- Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
1 parent 0f44a4c commit c104604

5 files changed

Lines changed: 75 additions & 16 deletions

File tree

README.md

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
## Latest News
2323

24+
* 05/21/2026 7.1.0-dev `main`: ✨ Added `nemotron_labs_diffusion` model support
2425
* 05/20/2026 7.1.0-dev `main`: ✨ Added `interns1`, `ovis2_5`, `ovis2_6_moe` and `ovis2_6_next` model support
2526
* 05/15/2026 7.1.0-dev `main`: ✨ Added `mimo_v2` model support
2627
* 05/13/2026 7.1.0-dev `main`: ✨ Added `minicpmv_4_6` and `DeepSeek V4` model support
@@ -247,23 +248,23 @@ Selected public references where teams or companies explicitly mention GPT-QMode
247248

248249
## Model Support
249250

250-
| Model | | | | | | | | | |
251-
|--------------------------|---|---------------------------------|---|------------------|---|---------------------|---|-------------------------|---|
252-
| Apertus || EXAONE 3/4 || Dots1 || Mistral3 || Qwen 2/3/3.5 (Next/MoE) ||
253-
| Baichuan || Falcon (H1 / Mamba) || InternLM 1/2/2.5 || Mixtral || Qwen 2/2.5/3 VL ||
254-
| Bloom || FastVLM || Kimi K2 || MobileLLM || Qwen 2.5/3 Omni ||
255-
| ChatGLM || Gemma 1-4 / 3n || Klear || MOSS || RefinedWeb ||
256-
| CodeGen || GPTBigCode || LING/RING || MPT || StableLM ||
257-
| Cohere 1-2 || GPT-Neo / NeoX || Llama 1-3.3 || Nemotron H / Omni || StarCoder2 ||
258-
| DBRX Converted || GPT-2 || Llama 3.2 VL || Nemotron Ultra || TeleChat2 ||
259-
| Deci || GPT-J || Llama 4 || OPT || Trinity ||
260-
| DeepSeek-V2/V3/V4/R1 || GPT-OSS || LongCat Flash || OLMo2 / LLaDA2 || Yi ||
251+
| Model | | | | | | | | | |
252+
|--------------------------|---|---------------------------------|---|------------------|---|---------------------------------|---|-------------------------|---|
253+
| Apertus || EXAONE 3/4 || Dots1 || Mistral3 || Qwen 2/3/3.5 (Next/MoE) ||
254+
| Baichuan || Falcon (H1 / Mamba) || InternLM 1/2/2.5 || Mixtral || Qwen 2/2.5/3 VL ||
255+
| Bloom || FastVLM || Kimi K2 || MobileLLM || Qwen 2.5/3 Omni ||
256+
| ChatGLM || Gemma 1-4 / 3n || Klear || MOSS || RefinedWeb ||
257+
| CodeGen || GPTBigCode || LING/RING || MPT || StableLM ||
258+
| Cohere 1-2 || GPT-Neo / NeoX || Llama 1-3.3 || Nemotron H / Omni || StarCoder2 ||
259+
| DBRX Converted || GPT-2 || Llama 3.2 VL || Nemotron Ultra / Labs-Diffusion || TeleChat2 ||
260+
| Deci || GPT-J || Llama 4 || OPT || Trinity ||
261+
| DeepSeek-V2/V3/V4/R1 || GPT-OSS || LongCat Flash || OLMo2 / LLaDA2 || Yi ||
261262
| DeepSeek-V2-Lite || Granite / Granite MoE || LongLLaMA || Ovis 1.6/2/2.5/2.6 MoE/2.6 Next || Seed-OSS ||
262-
| Dream || GRIN-MoE || Instella || Phi 1-4 || Voxtral ||
263-
| ERNIE 4.5 / MoE / VL MoE || GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR || GLM4 MoE / Lite / 4.5V MoE || MiniCPM 3/O/V/V 4_6 || PanGu-α ||
264-
| XVERSE || Brumby || Hymba || Mistral || Qwen 1/2/3/3.5 ||
265-
| MiniMax M2 || AfMoE || Bailing-MoE || LFM2-MoE || Marin ||
266-
| InternVL Chat || Laguna || Mimo / Mimo V2 || Zamba / Zamba2 || Intern S1 ||
263+
| Dream || GRIN-MoE || Instella || Phi 1-4 || Voxtral ||
264+
| ERNIE 4.5 / MoE / VL MoE || GLM 4/4V/4.5V/4.6V/5/5.1/OCR/ASR || GLM4 MoE / Lite / 4.5V MoE || MiniCPM 3/O/V/V 4_6 || PanGu-α ||
265+
| XVERSE || Brumby || Hymba || Mistral || Qwen 1/2/3/3.5 ||
266+
| MiniMax M2 || AfMoE || Bailing-MoE || LFM2-MoE || Marin ||
267+
| InternVL Chat || Laguna || Mimo / Mimo V2 || Zamba / Zamba2 || Intern S1 ||
267268

268269
Prism Bonsai GGUF checkpoints are supported for inference only through GPT-QModel's native GGUF path and internal GGUF runtime. Bonsai checkpoints load through the normal model path or repo argument and do not require the external `gguf` package. Prism model quantization is not included.
269270

gptqmodel/models/auto.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@
143143
from .definitions.mobilellm import MobileLLMQModel # noqa: E402
144144
from .definitions.moss import MossQModel # noqa: E402
145145
from .definitions.mpt import MptQModel # noqa: E402
146+
from .definitions.nemotron_labs_diffusion import NemotronLabsDiffusionQModel # noqa: E402
146147
from .definitions.nemotron_h import NemotronHQModel # noqa: E402
147148
from .definitions.nemotron_omni import NemotronOmniQModel # noqa: E402
148149
from .definitions.opt import OptQModel # noqa: E402
@@ -310,6 +311,7 @@
310311
"longcat_flash": LongCatFlashQModel,
311312
"llava_qwen2": LlavaQwen2QModel,
312313
"nemotron_h": NemotronHQModel,
314+
"nemotron_labs_diffusion": NemotronLabsDiffusionQModel,
313315
"nemotronh_nano_omni_reasoning_v3": NemotronOmniQModel,
314316
"bailing_moe": BailingMoeQModel,
315317
"bailing_hybrid": BailingMoeQModel,

gptqmodel/models/definitions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from .mobilellm import MobileLLMQModel
6161
from .moss import MossQModel
6262
from .mpt import MptQModel
63+
from .nemotron_labs_diffusion import NemotronLabsDiffusionQModel
6364
from .opt import OptQModel
6465
from .ovis import OvisQModel
6566
from .ovis2_5 import Ovis2_5QModel
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# SPDX-FileCopyrightText: 2026 ModelCloud.ai
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from transformers import AutoModel
5+
6+
from ..base import BaseQModel
7+
8+
9+
class NemotronLabsDiffusionQModel(BaseQModel):
10+
require_trust_remote_code = True
11+
loader = AutoModel
12+
13+
lm_head = "diffusion_head"
14+
pre_lm_head_norm_module = "encoder.norm"
15+
16+
awq_scale_optimize_shape_dependent_modules = ["self_attn.o_proj"]
17+
18+
# Nemotron Labs Diffusion uses a custom AutoModel with an internal
19+
# Ministral-style decoder stack under encoder.layers.
20+
module_tree = [
21+
"encoder",
22+
"layers",
23+
"#",
24+
{
25+
"input_layernorm": ("input_layernorm:!",),
26+
"self_attn": ("q_proj:0", "k_proj:0", "v_proj:0", "o_proj:1"),
27+
"post_attention_layernorm": ("post_attention_layernorm:!",),
28+
"mlp": ("gate_proj:0", "up_proj:0", "down_proj:1"),
29+
},
30+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
2+
# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
3+
# SPDX-License-Identifier: Apache-2.0
4+
# Contact: qubitium@modelcloud.ai, x.com/qubitium
5+
6+
from model_test import ModelTest
7+
8+
9+
class TestNemotronUltra(ModelTest):
10+
NATIVE_MODEL_ID = "/monster/data/model/Nemotron-Labs-Diffusion-3B" # nvidia/Nemotron-Labs-Diffusion-3B
11+
# FIXME Evalution appears to be incompatible with NemoStation/Marlin-2B support; the original model's scores are also quite low.
12+
# original model score: {'arc_challenge': {'accuracy,loglikelihood': 0.19795221843003413, 'accuracy,loglikelihood_norm': 0.20819112627986347}}
13+
EVAL_TASKS_SLOW = {
14+
"arc_challenge": {
15+
"chat_template": True,
16+
"acc": {"value": 0.197098976109215, "floor_pct": 0.36},
17+
"acc_norm": {"value": 0.2235494880546075, "floor_pct": 0.36},
18+
},
19+
}
20+
EVAL_TASKS_FAST = ModelTest.derive_fast_eval_tasks(EVAL_TASKS_SLOW)
21+
TRUST_REMOTE_CODE = True
22+
SAVE_PATH = "./temp/Nemotron-Labs-Diffusion"
23+
24+
def test_nemotron_ultra(self):
25+
self.quantize_and_evaluate()

0 commit comments

Comments
 (0)