@@ -234,6 +234,57 @@ def test_is_homogeneous_hf_model_gpt_oss():
234234 assert is_homogeneous_hf_model (model )
235235
236236
237+ def test_qwen3_moe_nvfp4_experts_only_export_exclude_modules (tmp_path ):
238+ """Test that NVFP4_EXPERTS_ONLY_CFG correctly excludes non-expert modules in HF export.
239+
240+ For a Qwen3 MoE model, only routed expert layers (mlp.experts.*) should be quantized.
241+ Attention layers and lm_head should appear in the exported hf_quant_config.json
242+ exclude_modules.
243+
244+ Reference: https://huggingface.co/nvidia/Qwen3.5-397B-A17B-NVFP4/blob/main/hf_quant_config.json
245+ """
246+ from modelopt .torch .export .unified_export_hf import export_hf_checkpoint
247+
248+ model = get_tiny_qwen3_moe ()
249+ # from_config doesn't set architectures; export code requires it
250+ model .config .architectures = ["Qwen3MoeForCausalLM" ]
251+
252+ # Quantize with NVFP4_EXPERTS_ONLY_CFG (targets only *mlp.experts* patterns)
253+ mtq .quantize (model , mtq .NVFP4_EXPERTS_ONLY_CFG , lambda m : m (** m .dummy_inputs ))
254+
255+ # Export
256+ export_dir = tmp_path / "qwen3_moe_nvfp4_experts_only"
257+ export_hf_checkpoint (model , export_dir = export_dir )
258+
259+ # Load the generated hf_quant_config.json
260+ import json
261+
262+ hf_quant_config_path = export_dir / "hf_quant_config.json"
263+ assert hf_quant_config_path .exists (), "hf_quant_config.json should be generated"
264+ with open (hf_quant_config_path ) as f :
265+ hf_quant_config = json .load (f )
266+
267+ quant_section = hf_quant_config ["quantization" ]
268+ assert quant_section ["quant_algo" ] == "NVFP4"
269+ exclude_modules = quant_section ["exclude_modules" ]
270+
271+ # Attention layers must be excluded
272+ assert any ("self_attn" in m for m in exclude_modules ), (
273+ f"self_attn should be in exclude_modules, got: { exclude_modules } "
274+ )
275+
276+ # lm_head must be excluded
277+ assert any ("lm_head" in m for m in exclude_modules ), (
278+ f"lm_head should be in exclude_modules, got: { exclude_modules } "
279+ )
280+
281+ # No exclude pattern should match the routed experts
282+ for pattern in exclude_modules :
283+ assert not ("mlp.experts." in pattern and "shared" not in pattern ), (
284+ f"Routed expert pattern should NOT be excluded: { pattern } "
285+ )
286+
287+
237288def test_hf_decoder_discoverer_registration_path ():
238289 model = get_tiny_llama ()
239290 assert any (
0 commit comments