File tree Expand file tree Collapse file tree
common/megatron_lm/quantize Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ EXPORT_EXE="bash modules/Megatron-LM/examples/post_training/modelopt/export.sh"
3838export MLM_EXTRA_ARGS=${@ }
3939${QUANTIZE_EXE} ${MLM_MODEL_CFG} ${QUANT_CFG}
4040
41- export MLM_EXTRA_ARGS=" --mmlu-dataset ${MMLU_DATASET:-/ hf-local/ cais/ mmlu} --fraction 0.01 --lower-bound 0.38 --disable-tqdm"
41+ export MLM_EXTRA_ARGS=" --mmlu-dataset ${MMLU_DATASET:-/ hf-local/ cais/ mmlu} --fraction 0.01 --lower-bound ${MMLU_LOWER_BOUND :- 0.38} --disable-tqdm"
4242MLM_MODEL_CKPT=${MLM_MODEL_SAVE} ${MMLU_EXE} ${MLM_MODEL_CFG}
4343
4444# ##################################################################################################
Original file line number Diff line number Diff line change @@ -102,4 +102,5 @@ def __post_init__(self):
102102 {"HF_MODEL_CKPT" : f"{ c .hf_local } { c .model } " },
103103 {"MMLU_DATASET" : f"{ c .hf_local } { c .mmlu_dataset } " },
104104 {"TP" : str (c .tp )},
105+ {"MMLU_LOWER_BOUND" : str (c .mmlu_lower_bound )},
105106 ]
Original file line number Diff line number Diff line change 1- # Qwen3-8B NVFP4 quantization (8 GPUs, for Slurm clusters).
1+ # Qwen3-8B PTQ quantization (8 GPUs, for Slurm clusters).
2+ #
3+ # 2-step pipeline: NVFP4 then FP8, each followed by MMLU evaluation.
24#
35# Uses MegatronLMQuantizeTask with typed config — see common/megatron_lm/quantize/task.py
46# for all available fields.
810#
911# For single-GPU local Docker, use megatron_lm_ptq_local.yaml instead.
1012
11- job_name : Qwen3-8B_NVFP4_DEFAULT_CFG
13+ job_name : Qwen3-8B_PTQ
1214pipeline :
1315 skip : false
1416 allow_to_fail : false
@@ -23,6 +25,24 @@ pipeline:
2325 calib_dataset : abisee/cnn_dailymail
2426 calib_size : 32
2527 mmlu_dataset : cais/mmlu
28+ mmlu_lower_bound : 0.68
29+ hf_local : /hf-local/
30+ slurm_config :
31+ _factory_ : " slurm_factory"
32+ nodes : 1
33+ ntasks_per_node : 8
34+ gpus_per_node : 8
35+
36+ task_1 :
37+ _target_ : common.megatron_lm.quantize.task.MegatronLMQuantizeTask
38+ config :
39+ model : Qwen/Qwen3-8B
40+ quant_cfg : FP8_DEFAULT_CFG
41+ tp : 8
42+ calib_dataset : abisee/cnn_dailymail
43+ calib_size : 32
44+ mmlu_dataset : cais/mmlu
45+ mmlu_lower_bound : 0.68
2646 hf_local : /hf-local/
2747 slurm_config :
2848 _factory_ : " slurm_factory"
You can’t perform that action at this time.
0 commit comments