Skip to content

Commit 9808e01

Browse files
committed
add w4a16_nvfp4 recipes
Signed-off-by: Hung-Yueh Chiang <hungyuehc@nvidia.com>
1 parent 8594574 commit 9808e01

2 files changed

Lines changed: 53 additions & 0 deletions

File tree

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# W4A16 NVFP4: NVFP4 E2M1 dynamic weight quantizer only; activations remain in BF16.
17+
18+
# modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
19+
imports:
20+
nvfp4: configs/numerics/nvfp4
21+
---
22+
- quantizer_name: '*weight_quantizer'
23+
cfg:
24+
$import: nvfp4
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
imports:
17+
base_disable_all: configs/ptq/units/base_disable_all
18+
default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
19+
w4a16_nvfp4: configs/ptq/units/w4a16_nvfp4
20+
21+
metadata:
22+
recipe_type: ptq
23+
description: NVFP4 W4A16 weight-only, BF16 activations, max calibration. No calibration forward pass required.
24+
quantize:
25+
algorithm: max
26+
quant_cfg:
27+
- $import: base_disable_all
28+
- $import: w4a16_nvfp4
29+
- $import: default_disabled_quantizers

0 commit comments

Comments
 (0)