Update examples/llm_ptq/hf_ptq.py

arendu · coderabbitai[bot] · arendu · commit a4530c1148fa · 2026-03-02T12:20:47.000-08:00
Co-authored-by: coderabbitai[bot] &lt;136622811+coderabbitai[bot]@users.noreply.github.com&gt;
Signed-off-by: Adi Renduchintala &lt;adithya.r@gmail.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -1088,9 +1088,9 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--skip_generate",
         help=(
-            "Skip the pre/post-quantization generation preview calls. "
-            "Use this for very large models that cannot run forward passes "
-            "(e.g. models split across GPU and CPU via device_map)."
+            "Skip pre/post-quantization preview calls that invoke model.generate(). "
+            "Note: this does not skip calibration or batch-size probing. "
+            "For very large models, pair with --batch_size 1 to avoid max-batch probing."
         ),
         default=False,
         action="store_true",