minor

kevalmorabia97 · kevalmorabia97 · commit 1cec8ec0e480 · 2026-04-17T10:12:34.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/examples/megatron_bridge/prune_minitron.py b/examples/megatron_bridge/prune_minitron.py
@@ -18,7 +18,7 @@
 while skipping pruning of num_attention_heads using following defaults:
     1024 samples from nemotron-post-training-dataset-v2 for calibration,
     at-most 20% depth (num_layers) and 40% width is pruned per prunable hparam (hidden_size, ffn_hidden_size, ...),
-    top-10 candidates are evaluated for MMLU score (10% sampled data) to select the best model.
+    top-10 candidates are evaluated for MMLU score (5% sampled data) to select the best model.
 
     torchrun --nproc_per_node 2 prune_minitron.py \
         --hf_model_name_or_path Qwen/Qwen3-8B \
@@ -140,11 +140,11 @@ def get_args() -> argparse.Namespace:
     parser.add_argument(
         "--prune_score_func",
         type=str,
-        default="mmlu_10pct",
+        default="mmlu_5pct",
         help=(
             "Score function to use for NAS-based pruning (--prune_target_params). Only supports MMLU at the moment. "
             "Format: mmlu_<N>pct where <N> is the percentage of MMLU data to sample per subject "
-            "(e.g. mmlu_10pct for 10%, mmlu_100pct for full eval)."
+            "(e.g. mmlu_5pct for 5%, mmlu_100pct for full eval)."
         ),
     )
     parser.add_argument(
@@ -299,17 +299,13 @@ def main(args: argparse.Namespace):
         match = re.fullmatch(r"mmlu_(\d+)pct", args.prune_score_func)
         if not match:
             raise ValueError(
-                f"Invalid score function: {args.prune_score_func}. "
-                "Expected format: mmlu_<N>pct (e.g. mmlu_10pct)"
+                f"Invalid score function: {args.prune_score_func}. Expected format: mmlu_<N>pct (e.g. mmlu_5pct)"
             )
-        mmlu_pct = int(match.group(1))
-        if not 0 < mmlu_pct <= 100:
-            raise ValueError("--prune_score_func percentage must be in the range [1, 100].")
-        _mmlu_frac = mmlu_pct / 100.0
+        mmlu_frac = float(match.group(1)) / 100.0
 
         def score_func(m):
             return megatron_mmlu(
-                m, tokenizer, few_shots=0, fraction=_mmlu_frac, batch_size=args.calib_mbs
+                m, tokenizer, few_shots=0, fraction=mmlu_frac, batch_size=args.calib_mbs
             )
 
         pruning_config["score_func"] = score_func
diff --git a/examples/pruning/README.md b/examples/pruning/README.md
@@ -124,7 +124,7 @@ This mode can be useful when you don't know the exact dimensions you want to pru
 from modelopt.torch.utils.plugins.megatron_mmlu import megatron_mmlu
 
 def score_func(m):
-    return megatron_mmlu(m, tokenizer, fraction=0.1, batch_size=4)  # 10% sampled data for faster eval
+    return megatron_mmlu(m, tokenizer, fraction=0.05, batch_size=4)  # 5% sampled data for faster eval
 
 # Specify target parameter count and configure the auto pruning algorithm
 # Save minitron scores at checkpoint so we can resume pruning without running the forward loop again
@@ -147,7 +147,7 @@ mtp.prune(...)
 
 1. **Importance Scoring**: Same as manual pruning - computes activation magnitudes for all parameters (takes ~5 minutes for an 8B model)
 2. **Search Space Construction**: Generates a search space of possible architectures based search space config and other configs (`max_width_pruning`, `max_depth_pruning`, `hparams_to_skip`)
-3. **Architecture Search**: Find candidate architectures that meet the parameter constraint and evaluate `top_k` (based on number of parameters) of them using `score_func` e.g. MMLU, negative validation loss, etc. (takes ~5 min per candidate for an 8B model MMLU score with 10% sampled data)
+3. **Architecture Search**: Find candidate architectures that meet the parameter constraint and evaluate `top_k` (based on number of parameters) of them using `score_func` e.g. MMLU, negative validation loss, etc. (takes 2-3 mins per candidate for an 8B model MMLU score with 5% sampled data)
 4. **Best Architecture Selection**: Returns the architecture (best `export_config`) with the highest actual score from the top-K evaluated architectures
 5. **Weight Slicing**: Slices the model weights according to the best pruned architecture found
 
diff --git a/modelopt/torch/utils/plugins/megatron_mmlu.py b/modelopt/torch/utils/plugins/megatron_mmlu.py
@@ -78,6 +78,7 @@ def megatron_mmlu(
         f"\nMMLU ({fraction * 100}%, {few_shots}-shot, Batch Size: {batch_size}) evaluation started...\n"
         "First batch may take longer to evaluate for Pipeline Parallel models."
     )
+    assert 0 < fraction <= 1, "Fraction must be between 0 and 1"
 
     # Token IDs for " A", " B", " C", " D" — the last subword handles edge cases.
     choice_ids = [tokenizer.encode(f" {c}", add_special_tokens=False)[-1] for c in _CHOICES]

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,7 @@ def megatron_mmlu(`
`78`	`78`	`f"\nMMLU ({fraction * 100}%, {few_shots}-shot, Batch Size: {batch_size}) evaluation started...\n"`
`79`	`79`	`"First batch may take longer to evaluate for Pipeline Parallel models."`
`80`	`80`	`)`
	`81`	`+ assert 0 < fraction <= 1, "Fraction must be between 0 and 1"`
`81`	`82`
`82`	`83`	`# Token IDs for " A", " B", " C", " D" — the last subword handles edge cases.`
`83`	`84`	`choice_ids = [tokenizer.encode(f" {c}", add_special_tokens=False)[-1] for c in _CHOICES]`