update: Qwen3-8B PTQ with FP8+NVFP4 and MMLU threshold 0.68

ChenhanYu · claude · ChenhanYu · commit 039a9c16d2c9 · 2026-04-17T16:26:16.000-07:00
- Add FP8_DEFAULT_CFG as task_1 alongside NVFP4 task_0
- Pass MMLU_LOWER_BOUND from typed config to quantize.sh
- Raise MMLU threshold to 0.68 (matches quantize_resume test)

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
Signed-off-by: Chenhan Yu &lt;chenhany@nvidia.com&gt;
diff --git a/tools/launcher/common/megatron_lm/quantize/quantize.sh b/tools/launcher/common/megatron_lm/quantize/quantize.sh
@@ -38,7 +38,7 @@ EXPORT_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/export.sh"
 export MLM_EXTRA_ARGS=${@}
 ${QUANTIZE_EXE} ${MLM_MODEL_CFG} ${QUANT_CFG}
 
-export MLM_EXTRA_ARGS="--mmlu-dataset ${MMLU_DATASET:-/hf-local/cais/mmlu} --fraction 0.01 --lower-bound 0.38 --disable-tqdm"
+export MLM_EXTRA_ARGS="--mmlu-dataset ${MMLU_DATASET:-/hf-local/cais/mmlu} --fraction 0.01 --lower-bound ${MMLU_LOWER_BOUND:-0.38} --disable-tqdm"
 MLM_MODEL_CKPT=${MLM_MODEL_SAVE} ${MMLU_EXE} ${MLM_MODEL_CFG}
 
 ###################################################################################################
diff --git a/tools/launcher/common/megatron_lm/quantize/task.py b/tools/launcher/common/megatron_lm/quantize/task.py
@@ -102,4 +102,5 @@ def __post_init__(self):
                 {"HF_MODEL_CKPT": f"{c.hf_local}{c.model}"},
                 {"MMLU_DATASET": f"{c.hf_local}{c.mmlu_dataset}"},
                 {"TP": str(c.tp)},
+                {"MMLU_LOWER_BOUND": str(c.mmlu_lower_bound)},
             ]
diff --git a/tools/launcher/examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml b/tools/launcher/examples/Qwen/Qwen3-8B/megatron_lm_ptq.yaml
@@ -1,4 +1,6 @@
-# Qwen3-8B NVFP4 quantization (8 GPUs, for Slurm clusters).
+# Qwen3-8B PTQ quantization (8 GPUs, for Slurm clusters).
+#
+# 2-step pipeline: NVFP4 then FP8, each followed by MMLU evaluation.
 #
 # Uses MegatronLMQuantizeTask with typed config — see common/megatron_lm/quantize/task.py
 # for all available fields.
@@ -8,7 +10,7 @@
 #
 # For single-GPU local Docker, use megatron_lm_ptq_local.yaml instead.
 
-job_name: Qwen3-8B_NVFP4_DEFAULT_CFG
+job_name: Qwen3-8B_PTQ
 pipeline:
   skip: false
   allow_to_fail: false
@@ -23,6 +25,24 @@ pipeline:
       calib_dataset: abisee/cnn_dailymail
       calib_size: 32
       mmlu_dataset: cais/mmlu
+      mmlu_lower_bound: 0.68
+      hf_local: /hf-local/
+    slurm_config:
+      _factory_: "slurm_factory"
+      nodes: 1
+      ntasks_per_node: 8
+      gpus_per_node: 8
+
+  task_1:
+    _target_: common.megatron_lm.quantize.task.MegatronLMQuantizeTask
+    config:
+      model: Qwen/Qwen3-8B
+      quant_cfg: FP8_DEFAULT_CFG
+      tp: 8
+      calib_dataset: abisee/cnn_dailymail
+      calib_size: 32
+      mmlu_dataset: cais/mmlu
+      mmlu_lower_bound: 0.68
       hf_local: /hf-local/
     slurm_config:
       _factory_: "slurm_factory"

Original file line number	Diff line number	Diff line change
`@@ -102,4 +102,5 @@ def __post_init__(self):`
`102`	`102`	`{"HF_MODEL_CKPT": f"{c.hf_local}{c.model}"},`
`103`	`103`	`{"MMLU_DATASET": f"{c.hf_local}{c.mmlu_dataset}"},`
`104`	`104`	`{"TP": str(c.tp)},`
	`105`	`+ {"MMLU_LOWER_BOUND": str(c.mmlu_lower_bound)},`
`105`	`106`	`]`