Skip to content

Commit d45fd45

Browse files
committed
Fix CI
Signed-off-by: Chenjie Luo <chenjiel@nvidia.com>
1 parent 0bb9291 commit d45fd45

File tree

3 files changed

+3
-2
lines changed

3 files changed

+3
-2
lines changed

examples/deepseek/ptq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
from modelopt.torch.export.model_config import KV_CACHE_FP8
5757
from modelopt.torch.export.quant_utils import get_quant_config
5858
from modelopt.torch.quantization.nn import TensorQuantizer
59-
from modelopt.torch.quantization.triton.fp8_kernel import weight_dequant
59+
from modelopt.torch.quantization.triton import weight_dequant
6060
from modelopt.torch.quantization.utils import (
6161
is_quantized_column_parallel_linear,
6262
is_quantized_parallel_linear,

modelopt/torch/quantization/plugins/huggingface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from ..conversion import register
5050
from ..nn import QuantInputBase, QuantModule, QuantModuleRegistry, TensorQuantizer
5151
from ..nn.modules.quant_linear import _QuantLinear
52-
from ..triton.fp8_kernel import weight_dequant
52+
from ..triton import weight_dequant
5353
from ..utils import replace_function
5454
from .attention import register_attention_for_kv_quant
5555
from .custom import CUSTOM_MODEL_PLUGINS, _ParallelLinear, _QuantFunctionalMixin

modelopt/torch/quantization/triton/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
):
3333
# fp4_kernel works on any CUDA GPU with triton
3434
from .fp4_kernel import *
35+
from .fp8_kernel import *
3536

3637
# fp4_kernel_hopper requires compute >= 8.9 (uses tl.float8e4nv)
3738
if torch.cuda.get_device_capability() >= (8, 9):

0 commit comments

Comments
 (0)