Dedupe disabled_quantizers units by importing default_disabled_quantizers

shengliangxu · shengliangxu · commit 44970f878261 · 2026-05-19T16:45:25.000-07:00
Before: huggingface/{nemotron_vl,phi4mm}/ptq/disabled_quantizers.yaml each
duplicated the 14-entry default_disabled_quantizers list verbatim and then
appended the model-specific exclusions.

After: both units use multi-document YAML to declare an `imports:` section,
`$import: default_disabled_quantizers` as the first list entry, and only
the model-specific exclusions explicitly. Recipes that import these units
are unaffected; the resolved quant_cfg is unchanged.

Signed-off-by: Shengliang Xu &lt;shengliangx@nvidia.com&gt;
diff --git a/modelopt_recipes/huggingface/nemotron_vl/ptq/disabled_quantizers.yaml b/modelopt_recipes/huggingface/nemotron_vl/ptq/disabled_quantizers.yaml
@@ -13,45 +13,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# QuantizerCfgList snippet of disabled quantizers for Nemotron VL. Merges the
-# standard `default_disabled_quantizers` exclusions with Nemotron-VL-specific
-# ones (only the decoder is quantized; vision/encoder branches, including the
-# Nemotron-Parse radio/model_encoder modules, are skipped). Recipes that
+# QuantizerCfgList snippet of disabled quantizers for Nemotron VL. Splices in
+# the standard `default_disabled_quantizers` exclusions and appends
+# Nemotron-VL-specific ones so that only the decoder (text-generation
+# component) is quantized; vision/encoder branches, including the
+# Nemotron-Parse radio/model_encoder modules, are skipped. Recipes that
 # import this should NOT also import `default_disabled_quantizers`.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
-  - quantizer_name: '*block_sparse_moe.gate*'
-    enable: false
-  - quantizer_name: '*linear_attn.conv1d*'
-    enable: false
-  - quantizer_name: '*lm_head*'
-    enable: false
-  - quantizer_name: '*mixer.conv1d*'
-    enable: false
-  - quantizer_name: '*mlp.gate.*'
-    enable: false
-  - quantizer_name: '*mlp.shared_expert_gate.*'
-    enable: false
-  - quantizer_name: '*output_layer*'
-    enable: false
-  - quantizer_name: '*proj_out.*'
-    enable: false
-  - quantizer_name: '*router*'
-    enable: false
-  - quantizer_name: 'output.*'
-    enable: false
-  - parent_class: 'nn.BatchNorm1d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.BatchNorm2d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.BatchNorm3d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.LeakyReLU'
-    quantizer_name: '*'
-    enable: false
+imports:
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+---
+  - $import: default_disabled_quantizers
   - quantizer_name: '*vision*'
     enable: false
   - quantizer_name: '*image*'
diff --git a/modelopt_recipes/huggingface/phi4mm/ptq/disabled_quantizers.yaml b/modelopt_recipes/huggingface/phi4mm/ptq/disabled_quantizers.yaml
@@ -13,45 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# QuantizerCfgList snippet of disabled quantizers for Phi-4-Multimodal. Merges
-# the standard `default_disabled_quantizers` exclusions with Phi-4-MM-specific
-# ones (only the language model is quantized; speech/audio/image/vision
-# branches are skipped). Recipes that import this should NOT also import
-# `default_disabled_quantizers`.
+# QuantizerCfgList snippet of disabled quantizers for Phi-4-Multimodal.
+# Splices in the standard `default_disabled_quantizers` exclusions and appends
+# Phi-4-MM-specific ones so that only the language model is quantized;
+# speech/audio/image/vision branches are skipped. Recipes that import this
+# should NOT also import `default_disabled_quantizers`.
 
 # modelopt-schema: modelopt.torch.quantization.config.QuantizerCfgListConfig
-  - quantizer_name: '*block_sparse_moe.gate*'
-    enable: false
-  - quantizer_name: '*linear_attn.conv1d*'
-    enable: false
-  - quantizer_name: '*lm_head*'
-    enable: false
-  - quantizer_name: '*mixer.conv1d*'
-    enable: false
-  - quantizer_name: '*mlp.gate.*'
-    enable: false
-  - quantizer_name: '*mlp.shared_expert_gate.*'
-    enable: false
-  - quantizer_name: '*output_layer*'
-    enable: false
-  - quantizer_name: '*proj_out.*'
-    enable: false
-  - quantizer_name: '*router*'
-    enable: false
-  - quantizer_name: 'output.*'
-    enable: false
-  - parent_class: 'nn.BatchNorm1d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.BatchNorm2d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.BatchNorm3d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.LeakyReLU'
-    quantizer_name: '*'
-    enable: false
+imports:
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
+---
+  - $import: default_disabled_quantizers
   - quantizer_name: '*speech*'
     enable: false
   - quantizer_name: '*audio*'