File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+ # SPDX-License-Identifier: Apache-2.0
3+ #
4+ # Licensed under the Apache License, Version 2.0 (the "License");
5+ # you may not use this file except in compliance with the License.
6+ # You may obtain a copy of the License at
7+ #
8+ # http://www.apache.org/licenses/LICENSE-2.0
9+ #
10+ # Unless required by applicable law or agreed to in writing, software
11+ # distributed under the License is distributed on an "AS IS" BASIS,
12+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ # See the License for the specific language governing permissions and
14+ # limitations under the License.
15+
16+ # W8A8 FP8: FP8 E4M3 weight and activation quantizers.
17+ imports :
18+ fp8 : configs/numerics/fp8
19+ ---
20+ - quantizer_name : ' *weight_quantizer'
21+ cfg :
22+ $import : fp8
23+ - quantizer_name : ' *input_quantizer'
24+ cfg :
25+ $import : fp8
Original file line number Diff line number Diff line change 1616imports :
1717 base_disable_all : configs/ptq/base_disable_all
1818 default_disabled : configs/ptq/default_disabled_quantizers
19- fp8 : configs/numerics/fp8
19+ w8a8_fp8_fp8 : configs/ptq/w8a8_fp8_fp8
2020 fp8_kv : configs/ptq/fp8_kv
2121
2222metadata :
@@ -26,11 +26,6 @@ quantize:
2626 algorithm : max
2727 quant_cfg :
2828 - $import : base_disable_all
29- - quantizer_name : ' *input_quantizer'
30- cfg :
31- $import : fp8
32- - quantizer_name : ' *weight_quantizer'
33- cfg :
34- $import : fp8
29+ - $import : w8a8_fp8_fp8
3530 - $import : fp8_kv
3631 - $import : default_disabled
You can’t perform that action at this time.
0 commit comments