Skip to content

Commit 17fd111

Browse files
committed
Add qwen3 moe experts only test
1 parent d45219b commit 17fd111

File tree

1 file changed

+51
-0
lines changed

1 file changed

+51
-0
lines changed

tests/unit/torch/quantization/plugins/test_huggingface.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,57 @@ def test_is_homogeneous_hf_model_gpt_oss():
234234
assert is_homogeneous_hf_model(model)
235235

236236

237+
def test_qwen3_moe_nvfp4_experts_only_export_exclude_modules(tmp_path):
238+
"""Test that NVFP4_EXPERTS_ONLY_CFG correctly excludes non-expert modules in HF export.
239+
240+
For a Qwen3 MoE model, only routed expert layers (mlp.experts.*) should be quantized.
241+
Attention layers and lm_head should appear in the exported hf_quant_config.json
242+
exclude_modules.
243+
244+
Reference: https://huggingface.co/nvidia/Qwen3.5-397B-A17B-NVFP4/blob/main/hf_quant_config.json
245+
"""
246+
from modelopt.torch.export.unified_export_hf import export_hf_checkpoint
247+
248+
model = get_tiny_qwen3_moe()
249+
# from_config doesn't set architectures; export code requires it
250+
model.config.architectures = ["Qwen3MoeForCausalLM"]
251+
252+
# Quantize with NVFP4_EXPERTS_ONLY_CFG (targets only *mlp.experts* patterns)
253+
mtq.quantize(model, mtq.NVFP4_EXPERTS_ONLY_CFG, lambda m: m(**m.dummy_inputs))
254+
255+
# Export
256+
export_dir = tmp_path / "qwen3_moe_nvfp4_experts_only"
257+
export_hf_checkpoint(model, export_dir=export_dir)
258+
259+
# Load the generated hf_quant_config.json
260+
import json
261+
262+
hf_quant_config_path = export_dir / "hf_quant_config.json"
263+
assert hf_quant_config_path.exists(), "hf_quant_config.json should be generated"
264+
with open(hf_quant_config_path) as f:
265+
hf_quant_config = json.load(f)
266+
267+
quant_section = hf_quant_config["quantization"]
268+
assert quant_section["quant_algo"] == "NVFP4"
269+
exclude_modules = quant_section["exclude_modules"]
270+
271+
# Attention layers must be excluded
272+
assert any("self_attn" in m for m in exclude_modules), (
273+
f"self_attn should be in exclude_modules, got: {exclude_modules}"
274+
)
275+
276+
# lm_head must be excluded
277+
assert any("lm_head" in m for m in exclude_modules), (
278+
f"lm_head should be in exclude_modules, got: {exclude_modules}"
279+
)
280+
281+
# No exclude pattern should match the routed experts
282+
for pattern in exclude_modules:
283+
assert not ("mlp.experts." in pattern and "shared" not in pattern), (
284+
f"Routed expert pattern should NOT be excluded: {pattern}"
285+
)
286+
287+
237288
def test_hf_decoder_discoverer_registration_path():
238289
model = get_tiny_llama()
239290
assert any(

0 commit comments

Comments
 (0)