Skip to content

Commit a7f65e3

Browse files
committed
Add WaterSIC ZSIC core algorithm with unit tests
Signed-off-by: Kai Xu <kaix@nvidia.com>
1 parent da0e8ff commit a7f65e3

5 files changed

Lines changed: 658 additions & 8 deletions

File tree

modelopt/torch/quantization/algorithms.py renamed to modelopt/torch/quantization/algorithms/__init__.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@
3838
from modelopt.torch.utils import create_param_grad_clear_hook, print_rank_0, report_memory
3939
from modelopt.torch.utils.distributed import DistributedProcessGroup, ParallelState, is_master
4040

41-
from . import config as mtq_config
42-
from . import model_calib
43-
from .config import QuantizeConfig, QuantizerAttributeConfig
44-
from .conversion import set_quantizer_by_cfg
45-
from .nn import QuantLinearConvBase, QuantModule, SequentialQuantizer, TensorQuantizer
46-
from .utils import is_quantized_linear
41+
from .. import config as mtq_config
42+
from .. import model_calib
43+
from ..config import QuantizeConfig, QuantizerAttributeConfig
44+
from ..conversion import set_quantizer_by_cfg
45+
from ..nn import QuantLinearConvBase, QuantModule, SequentialQuantizer, TensorQuantizer
46+
from ..utils import is_quantized_linear
4747

4848

4949
def estimate_quant_compression(quant_cfg: QuantizeConfig) -> float:
@@ -615,8 +615,8 @@ def before_search(self):
615615
# Import here to avoid circular import
616616
from modelopt.torch.quantization.model_quant import calibrate
617617

618-
from .conversion import restore_quantizer_state, update_quantize_metadata
619-
from .utils import get_quantizer_state_dict, set_quantizer_state_dict
618+
from ..conversion import restore_quantizer_state, update_quantize_metadata
619+
from ..utils import get_quantizer_state_dict, set_quantizer_state_dict
620620

621621
super().before_search()
622622
restored_method = getattr(self, "method", None)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""WaterSIC KV-cache quantization algorithm."""
17+
18+
from __future__ import annotations

0 commit comments

Comments
 (0)