diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 99c729efbc..0e2feaae9e 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -228,6 +228,8 @@ def find_quant_cfg_entry_by_path( "enable": False, }, # Skip the MOE router {"quantizer_name": "*linear_attn.conv1d*", "enable": False}, + {"quantizer_name": "*linear_attn.in_proj_a*", "enable": False}, + {"quantizer_name": "*linear_attn.in_proj_b*", "enable": False}, {"quantizer_name": "*mixer.conv1d*", "enable": False}, # Skip mamba conv1d {"quantizer_name": "*output_layer*", "enable": False}, {"quantizer_name": "output.*", "enable": False},