Skip to content

Commit dc67001

Browse files
committed
nvfp4_dynamic is default
Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
1 parent dcf10a6 commit dc67001

9 files changed

Lines changed: 38 additions & 18 deletions

File tree

docs/source/guides/10_recipes.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ list order), then inline keys last.
205205
cfg:
206206
$import: nvfp4
207207
208-
# Import + override — import nvfp4_dynamic, then override type inline
208+
# Import + override — import nvfp4, then override type inline
209209
cfg:
210210
$import: nvfp4 # imports {num_bits: e2m1, block_sizes: {-1: 16, type: dynamic, ...}}
211211
block_sizes:
@@ -301,8 +301,8 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``:
301301
- Description
302302
* - ``configs/numerics/fp8``
303303
- FP8 E4M3 quantizer attributes
304-
* - ``configs/numerics/nvfp4_dynamic``
305-
- NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales
304+
* - ``configs/numerics/nvfp4``
305+
- NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales (default)
306306
* - ``configs/numerics/nvfp4_static``
307307
- NVFP4 E2M1 blockwise, static calibration, FP8 scales
308308
* - ``configs/ptq/base_disable_all``
@@ -597,8 +597,8 @@ The ``modelopt_recipes/`` package is organized as follows:
597597
+-- configs/ # Reusable config snippets (imported via $import)
598598
+-- numerics/ # Numeric format definitions
599599
| +-- fp8.yml
600-
| +-- nvfp4_dynamic.yml
601600
| +-- nvfp4_static.yml
601+
| +-- nvfp4.yml
602602
+-- ptq/ # PTQ-specific entry snippets
603603
+-- base_disable_all.yaml
604604
+-- default_disabled_quantizers.yaml

modelopt_recipes/configs/numerics/nvfp4_dynamic.yml renamed to modelopt_recipes/configs/numerics/nvfp4.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
# NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales.
16+
# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (dynamic calibration, the default).
1717
num_bits: e2m1
1818
block_sizes:
1919
-1: 16

modelopt_recipes/configs/numerics/nvfp4_static.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
# NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales.
16+
# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (static calibration).
1717
num_bits: e2m1
1818
block_sizes:
1919
-1: 16
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# W4A4 NVFP4: NVFP4 E2M1 dynamic weight and activation quantizers.
17+
imports:
18+
nvfp4: configs/numerics/nvfp4
19+
---
20+
- quantizer_name: '*weight_quantizer'
21+
cfg:
22+
$import: nvfp4
23+
- quantizer_name: '*input_quantizer'
24+
cfg:
25+
$import: nvfp4

modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
imports:
1717
base_disable_all: configs/ptq/base_disable_all
1818
default_disabled: configs/ptq/default_disabled_quantizers
19-
nvfp4: configs/numerics/nvfp4_dynamic
19+
w4a4_nvfp4_nvfp4: configs/ptq/w4a4_nvfp4_nvfp4
2020
fp8_kv: configs/ptq/fp8_kv
2121

2222
metadata:
@@ -26,11 +26,6 @@ quantize:
2626
algorithm: max
2727
quant_cfg:
2828
- $import: base_disable_all
29-
- quantizer_name: '*weight_quantizer'
30-
cfg:
31-
$import: nvfp4
32-
- quantizer_name: '*input_quantizer'
33-
cfg:
34-
$import: nvfp4
29+
- $import: w4a4_nvfp4_nvfp4
3530
- $import: fp8_kv
3631
- $import: default_disabled

modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ imports:
1717
base_disable_all: configs/ptq/base_disable_all
1818
default_disabled: configs/ptq/default_disabled_quantizers
1919
nvfp4_static: configs/numerics/nvfp4_static
20-
nvfp4_dynamic: configs/numerics/nvfp4_dynamic
20+
nvfp4: configs/numerics/nvfp4
2121

2222
metadata:
2323
recipe_type: ptq
@@ -33,7 +33,7 @@ quantize:
3333
$import: nvfp4_static
3434
- quantizer_name: '*input_quantizer'
3535
cfg:
36-
$import: nvfp4_dynamic
36+
$import: nvfp4
3737
- quantizer_name: '*[kv]_bmm_quantizer'
3838
enable: false
3939
- $import: default_disabled

modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
imports:
1717
base_disable_all: configs/ptq/base_disable_all
1818
default_disabled: configs/ptq/default_disabled_quantizers
19-
nvfp4: configs/numerics/nvfp4_dynamic
19+
nvfp4: configs/numerics/nvfp4
2020
fp8_kv: configs/ptq/fp8_kv
2121

2222
metadata:

modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
imports:
1717
base_disable_all: configs/ptq/base_disable_all
1818
default_disabled: configs/ptq/default_disabled_quantizers
19-
nvfp4: configs/numerics/nvfp4_dynamic
19+
nvfp4: configs/numerics/nvfp4
2020
fp8_kv: configs/ptq/fp8_kv
2121

2222
metadata:

modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
imports:
1717
base_disable_all: configs/ptq/base_disable_all
1818
default_disabled: configs/ptq/default_disabled_quantizers
19-
nvfp4: configs/numerics/nvfp4_dynamic
19+
nvfp4: configs/numerics/nvfp4
2020
fp8_kv: configs/ptq/fp8_kv
2121

2222
metadata:

0 commit comments

Comments
 (0)