From f532a9b831359e2821c53b65957f0161b5af5ef2 Mon Sep 17 00:00:00 2001 From: Anurag Mukkara <134339030+amukkara@users.noreply.github.com> Date: Thu, 16 Apr 2026 23:27:51 +0000 Subject: [PATCH] Skip quant for linear_attn.in_proj_a/b Signed-off-by: Anurag Mukkara <134339030+amukkara@users.noreply.github.com> --- modelopt/torch/quantization/config.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 99c729efbc..0e2feaae9e 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -228,6 +228,8 @@ def find_quant_cfg_entry_by_path( "enable": False, }, # Skip the MOE router {"quantizer_name": "*linear_attn.conv1d*", "enable": False}, + {"quantizer_name": "*linear_attn.in_proj_a*", "enable": False}, + {"quantizer_name": "*linear_attn.in_proj_b*", "enable": False}, {"quantizer_name": "*mixer.conv1d*", "enable": False}, # Skip mamba conv1d {"quantizer_name": "*output_layer*", "enable": False}, {"quantizer_name": "output.*", "enable": False},