We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent dec2952 commit b0eadcdCopy full SHA for b0eadcd
modelopt/torch/quantization/config.py
@@ -228,6 +228,8 @@ def find_quant_cfg_entry_by_path(
228
"enable": False,
229
}, # Skip the MOE router
230
{"quantizer_name": "*linear_attn.conv1d*", "enable": False},
231
+ {"quantizer_name": "*linear_attn.in_proj_a*", "enable": False},
232
+ {"quantizer_name": "*linear_attn.in_proj_b*", "enable": False},
233
{"quantizer_name": "*mixer.conv1d*", "enable": False}, # Skip mamba conv1d
234
{"quantizer_name": "*output_layer*", "enable": False},
235
{"quantizer_name": "output.*", "enable": False},
0 commit comments