Add nvfp4_mse and nvfp4_local_hessian options to the ptq script. (#1113)

bkartal-dev · web-flow · commit 92622a9aa607 · 2026-04-18T07:28:49.000Z
### What does this PR do? Type of change: Bugfix  Add newly added quant configs to the example PTQ script. ### Testing I have locally run auto_quantize with these two quant_configs, and obtained successfully exported HF artifacts. ### Before your PR is "*Ready for review*" Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md) and your commits are signed (`git commit -s -S`). Make sure you read and follow the [Security Best Practices](https://github.com/NVIDIA/Model-Optimizer/blob/main/SECURITY.md#security-coding-practices-for-contributors) (e.g. avoiding hardcoded `trust_remote_code=True`, `torch.load(..., weights_only=False)`, `pickle`, etc.). - Is this change backward compatible?: ✅ / ❌ / N/A  - If you copied code from any other sources or added a new PIP dependency, did you follow guidance in `CONTRIBUTING.md`: ✅ / ❌ / N/A  - Did you write any new necessary tests?: ✅ / ❌ / N/A  - Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?: ✅ / ❌ / N/A  ### Additional Information  ## Summary by CodeRabbit * **New Features** * Added support for three new quantization formats: nvfp4_mse, nvfp4_local_hessian, and nvfp4_experts_only, expanding available export options when using auto-quantize. * **Bug Fixes / UX** * Updated the invalid-quantization error message to include the newly accepted format identifiers.  Signed-off-by: Bilal Kartal <bkartal@nvidia.com> Signed-off-by: bkartal-dev <bkartal@nvidia.com>
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -337,6 +337,7 @@ def auto_quantize(
             "nvfp4_mlp_only",
             "nvfp4_experts_only",
             "nvfp4_omlp_only",
+            "nvfp4_local_hessian",
             "mxfp8",
         ]
         for qformat in qformat_list
diff --git a/examples/llm_ptq/scripts/huggingface_example.sh b/examples/llm_ptq/scripts/huggingface_example.sh
@@ -53,9 +53,9 @@ esac
 IFS=","
 for qformat in $QFORMAT; do
     case $qformat in
-    fp8 | fp8_pc_pt | fp8_pb_wo | int8_wo | int8_sq | int4_awq | w4a8_awq | fp16 | bf16 | nvfp4 | nvfp4_awq | w4a8_nvfp4_fp8 | w4a8_mxfp4_fp8 | nvfp4_mlp_only | nvfp4_experts_only | nvfp4_omlp_only | nvfp4_svdquant | mxfp8) ;;
+    fp8 | fp8_pc_pt | fp8_pb_wo | int8_wo | int8_sq | int4_awq | w4a8_awq | fp16 | bf16 | nvfp4 | nvfp4_awq | nvfp4_mse | w4a8_nvfp4_fp8 | w4a8_mxfp4_fp8 | nvfp4_experts_only | nvfp4_mlp_only | nvfp4_omlp_only | nvfp4_svdquant | mxfp8 | nvfp4_local_hessian) ;;
     *)
-        echo "Unknown quant argument: Expected one of: [fp8, fp8_pc_pt, fp8_pb_wo, int8_wo, int8_sq, int4_awq, w4a8_awq, fp16, bf16, nvfp4, nvfp4_awq, w4a8_nvfp4_fp8, w4a8_mxfp4_fp8, nvfp4_mlp_only, nvfp4_experts_only, nvfp4_omlp_only, nvfp4_svdquant, mxfp8]" >&2
+        echo "Unknown quant argument: Expected one of: [fp8, fp8_pc_pt, fp8_pb_wo, int8_wo, int8_sq, int4_awq, w4a8_awq, fp16, bf16, nvfp4, nvfp4_awq, nvfp4_mse, w4a8_nvfp4_fp8, w4a8_mxfp4_fp8, nvfp4_experts_only, nvfp4_mlp_only, nvfp4_omlp_only, nvfp4_svdquant, mxfp8, nvfp4_local_hessian]" >&2
         exit 1
         ;;
     esac

Original file line number	Diff line number	Diff line change
`@@ -337,6 +337,7 @@ def auto_quantize(`
`337`	`337`	`"nvfp4_mlp_only",`
`338`	`338`	`"nvfp4_experts_only",`
`339`	`339`	`"nvfp4_omlp_only",`
	`340`	`+ "nvfp4_local_hessian",`
`340`	`341`	`"mxfp8",`
`341`	`342`	`]`
`342`	`343`	`for qformat in qformat_list`