add w4a16_nvfp4 recipes

hychiang-git · hychiang-git · commit 9808e01bc03b · 2026-05-06T22:40:57.000Z
Signed-off-by: Hung-Yueh Chiang &lt;hungyuehc@nvidia.com&gt;
diff --git a/modelopt_recipes/configs/ptq/units/w4a16_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/w4a16_nvfp4.yaml
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# W4A16 NVFP4: NVFP4 E2M1 dynamic weight quantizer only; activations remain in BF16.
+
+# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
+imports:
+  nvfp4: configs/numerics/nvfp4
+---
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: nvfp4
diff --git a/modelopt_recipes/general/ptq/nvfp4_weight_only-kv_fp16.yaml b/modelopt_recipes/general/ptq/nvfp4_weight_only-kv_fp16.yaml
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+imports:
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+  w4a16_nvfp4: configs/ptq/units/w4a16_nvfp4
+
+metadata:
+  recipe_type: ptq
+  description: NVFP4 W4A16 weight-only, BF16 activations, max calibration. No calibration forward pass required.
+quantize:
+  algorithm: max
+  quant_cfg:
+    - $import: base_disable_all
+    - $import: w4a16_nvfp4
+    - $import: default_disabled_quantizers