From f532a9b831359e2821c53b65957f0161b5af5ef2 Mon Sep 17 00:00:00 2001
From: Anurag Mukkara <134339030+amukkara@users.noreply.github.com>
Date: Thu, 16 Apr 2026 23:27:51 +0000
Subject: [PATCH] Skip quant for linear_attn.in_proj_a/b

Signed-off-by: Anurag Mukkara <134339030+amukkara@users.noreply.github.com>
---
 modelopt/torch/quantization/config.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
index 99c729efbc..0e2feaae9e 100644
--- a/modelopt/torch/quantization/config.py
+++ b/modelopt/torch/quantization/config.py
@@ -228,6 +228,8 @@ def find_quant_cfg_entry_by_path(
         "enable": False,
     },  # Skip the MOE router
     {"quantizer_name": "*linear_attn.conv1d*", "enable": False},
+    {"quantizer_name": "*linear_attn.in_proj_a*", "enable": False},
+    {"quantizer_name": "*linear_attn.in_proj_b*", "enable": False},
     {"quantizer_name": "*mixer.conv1d*", "enable": False},  # Skip mamba conv1d
     {"quantizer_name": "*output_layer*", "enable": False},
     {"quantizer_name": "output.*", "enable": False},