Address quant config review feedback

shengliangxu · shengliangxu · commit fe6dbcf8cd2b · 2026-05-07T18:02:20.000-07:00
Signed-off-by: Shengliang Xu &lt;shengliangx@nvidia.com&gt;
diff --git a/examples/llm_ptq/example_utils.py b/examples/llm_ptq/example_utils.py
@@ -42,7 +42,7 @@
     ProcessorMixin,
 )
 
-from modelopt.torch.quantization.config import QuantizeConfig
+from modelopt.torch.quantization.config import QuantizeConfig, QuantizerCfgEntry
 
 try:
     from huggingface_hub import snapshot_download
@@ -249,10 +249,14 @@ def build_quant_cfg(
 
     if model_type == "phi4mm":
         # Only quantize the language model
-        quant_cfg_obj["quant_cfg"].append({"quantizer_name": "*speech*", "enable": False})
-        quant_cfg_obj["quant_cfg"].append({"quantizer_name": "*audio*", "enable": False})
-        quant_cfg_obj["quant_cfg"].append({"quantizer_name": "*image*", "enable": False})
-        quant_cfg_obj["quant_cfg"].append({"quantizer_name": "*vision*", "enable": False})
+        quant_cfg_obj["quant_cfg"].extend(
+            [
+                QuantizerCfgEntry(quantizer_name="*speech*", enable=False),
+                QuantizerCfgEntry(quantizer_name="*audio*", enable=False),
+                QuantizerCfgEntry(quantizer_name="*image*", enable=False),
+                QuantizerCfgEntry(quantizer_name="*vision*", enable=False),
+            ]
+        )
 
     return quant_cfg_obj
 
diff --git a/examples/vllm_serve/vllm_ptq_utils.py b/examples/vllm_serve/vllm_ptq_utils.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 import dataclasses
-from collections.abc import Callable
+from collections.abc import Callable, Mapping
 from typing import Any
 
 import torch
@@ -119,7 +119,11 @@ def update_kv_cfg_for_mla(model: torch.nn.Module, kv_quant_cfg: list) -> list:
         return kv_quant_cfg
 
     kv_entry = next(
-        (e for e in kv_quant_cfg if e.get("quantizer_name") == "*[kv]_bmm_quantizer"),
+        (
+            e
+            for e in kv_quant_cfg
+            if isinstance(e, Mapping) and e.get("quantizer_name") == "*[kv]_bmm_quantizer"
+        ),
         None,
     )
     if kv_entry is not None:
diff --git a/modelopt/torch/opt/config.py b/modelopt/torch/opt/config.py
@@ -119,7 +119,10 @@ def __setitem__(self, key: str, value: Any) -> None:
 
     def __delitem__(self, key: str) -> None:
         """Unset the given key so exclude_unset dumps omit it."""
-        field_name = self.get_field_name_from_key(key)
+        try:
+            field_name = self.get_field_name_from_key(key)
+        except AttributeError as e:
+            raise KeyError(key) from e
         if field_name in self._iterable_model_extra:
             assert self.model_extra is not None
             del self.model_extra[field_name]
@@ -129,8 +132,8 @@ def __delitem__(self, key: str) -> None:
         field_info = type(self).model_fields[field_name]
         default = field_info.get_default(call_default_factory=True)
         if default is PydanticUndefined:
-            raise AttributeError(f"Key {key} cannot be unset because it has no default.")
-        setattr(self, field_name, default)
+            raise KeyError(f"Key {key} cannot be unset because it has no default.")
+        self.__dict__[field_name] = default
         self.model_fields_set.discard(field_name)
 
     def get(self, key: str, default: Any = None) -> Any:
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
@@ -561,6 +561,10 @@ def validate_quantizer_cfg_entry(cls, values):
                 "Each quant_cfg entry must specify 'cfg', 'enable', or both. "
                 "An entry with only 'quantizer_name' has no effect."
             )
+        if "cfg" in values and values["cfg"] is None:
+            raise ValueError("cfg must be omitted or a valid mapping/list, not null.")
+        if "enable" in values and values["enable"] is None:
+            raise ValueError("enable must be a boolean when provided, not null.")
 
         cfg = values.get("cfg")
         enable = values.get("enable", True)
@@ -1008,15 +1012,15 @@ class GPTQCalibConfig(QuantizeAlgorithmConfig):
 
 
 QuantizerCfgListConfig = list[QuantizerCfgEntry]
-QuantizeQuantCfgInputType = Sequence[QuantizerCfgEntry | Mapping[str, Any]]
+QuantizeQuantCfgInputType = Mapping[str, Any] | Sequence[QuantizerCfgEntry | Mapping[str, Any]]
 
 _QuantizeAlgoCfgType = str | dict | QuantizeAlgorithmConfig | None
 
 QuantizeAlgoCfgType = _QuantizeAlgoCfgType | list[_QuantizeAlgoCfgType] | None
 
 
 def normalize_quant_cfg_list(
-    v: Mapping[str, Any] | list[QuantizerCfgEntry | Mapping[str, Any]],
+    v: Mapping[str, Any] | Sequence[QuantizerCfgEntry | Mapping[str, Any]],
 ) -> list[QuantizerCfgEntry]:
     """Normalize a raw quant_cfg into a list of :class:`QuantizerCfgEntry` objects.
 
@@ -1099,15 +1103,19 @@ def _dict_to_entry(key: str, value: Any) -> list[dict[str, Any]]:
                 if isinstance(sub_cfg, QuantizerAttributeConfig):
                     enable = None
                     cfg = sub_cfg
-                else:
+                elif isinstance(sub_cfg, Mapping):
                     sub_cfg = dict(sub_cfg)
                     enable = sub_cfg.pop("enable", None)
                     cfg = sub_cfg or None
+                else:
+                    enable = None
+                    cfg = sub_cfg
                 entry: dict[str, Any] = {
                     "parent_class": key,
                     "quantizer_name": q_path,
-                    "cfg": cfg,
                 }
+                if cfg is not None:
+                    entry["cfg"] = cfg
                 if enable is not None:
                     entry["enable"] = enable
                 entries.append(entry)
@@ -1119,7 +1127,9 @@ def _dict_to_entry(key: str, value: Any) -> list[dict[str, Any]]:
             else:
                 cfg = value
                 enable = None
-            entry = {"quantizer_name": key, "cfg": cfg}
+            entry = {"quantizer_name": key}
+            if cfg is not None:
+                entry["cfg"] = cfg
             if enable is not None:
                 entry["enable"] = enable
             return [entry]
@@ -1165,6 +1175,17 @@ def _dict_to_entry(key: str, value: Any) -> list[dict[str, Any]]:
             # Validate: when cfg is present and enable=True, cfg must be a non-empty
             # dict or list.  An empty cfg would attempt to create a
             # QuantizerAttributeConfig with no actual configuration.
+            if "cfg" in entry and entry["cfg"] is None:
+                raise ValueError(
+                    f"Invalid quant_cfg entry: {raw!r} - 'cfg' must be omitted or a "
+                    "valid mapping/list, not null."
+                )
+            if "enable" in entry and entry["enable"] is None:
+                raise ValueError(
+                    f"Invalid quant_cfg entry: {raw!r} - 'enable' must be a boolean "
+                    "when provided, not null."
+                )
+
             cfg = entry.get("cfg")
             enable = entry.get("enable", True)
             if enable and cfg is not None:
@@ -1190,9 +1211,8 @@ def _dict_to_entry(key: str, value: Any) -> list[dict[str, Any]]:
                         "explicitly."
                     )
 
-            # Normalize: make enable and cfg always explicit.
+            # Normalize: make enable explicit. cfg remains omitted when it is intentionally unset.
             entry.setdefault("enable", True)
-            entry.setdefault("cfg", None)
 
             result.append(QuantizerCfgEntry.model_validate(entry))
     return result
@@ -1201,6 +1221,18 @@ def _dict_to_entry(key: str, value: Any) -> list[dict[str, Any]]:
 class QuantizeConfig(ModeloptBaseConfig):
     """Default configuration for ``quantize`` mode."""
 
+    def model_dump(self, **kwargs):
+        """Dump quant_cfg entries without unset optional fields."""
+        data = super().model_dump(**kwargs)
+        if "quant_cfg" in data:
+            data["quant_cfg"] = [
+                entry.model_dump(exclude_unset=True)
+                if isinstance(entry, QuantizerCfgEntry)
+                else {k: v for k, v in entry.items() if v is not None}
+                for entry in self.quant_cfg
+            ]
+        return data
+
     quant_cfg: QuantizerCfgListConfig = ModeloptField(
         default=[{"quantizer_name": "*", "cfg": {"num_bits": 8, "axis": None}}],
         title="Quantization configuration",
diff --git a/modelopt/torch/quantization/conversion.py b/modelopt/torch/quantization/conversion.py
@@ -249,7 +249,7 @@ def set_quantizer_by_cfg(quant_model: nn.Module, quant_cfg: QuantizeQuantCfgInpu
 
     See :ref:`quant-cfg` for the full format reference and common patterns.
     """
-    quant_cfg = normalize_quant_cfg_list(list(quant_cfg))
+    quant_cfg = normalize_quant_cfg_list(quant_cfg)
 
     for entry in quant_cfg:
         quantizer_name: str = entry["quantizer_name"]
@@ -496,7 +496,7 @@ def set_quantizer_by_cfg_context(quant_model: nn.Module, quant_cfg: QuantizeQuan
     Yields:
         None — the context body runs with the new quantizer attributes active.
     """
-    quant_cfg = normalize_quant_cfg_list(list(quant_cfg))
+    quant_cfg = normalize_quant_cfg_list(quant_cfg)
 
     for entry in quant_cfg:
         if isinstance(entry.get("cfg"), list):
diff --git a/modelopt/torch/quantization/nn/modules/tensor_quantizer.py b/modelopt/torch/quantization/nn/modules/tensor_quantizer.py
@@ -1431,6 +1431,8 @@ def set_from_attribute_config(
         if not isinstance(attributes, (list, tuple)):
             assert isinstance(attributes, Mapping), "attributes must be a list or a mapping."
             attributes = [attributes] * len(self)
+        elif len(attributes) != len(self):
+            raise ValueError(f"Expected {len(self)} attribute configs, but got {len(attributes)}.")
 
         for attribute, quantizer in zip(attributes, self):
             quantizer.set_from_attribute_config(attribute)
diff --git a/modelopt/torch/quantization/utils/core_utils.py b/modelopt/torch/quantization/utils/core_utils.py
@@ -935,7 +935,7 @@ def update_quant_cfg_with_kv_cache_quant(
     inner = list(
         updated_quant_cfg.get("quant_cfg") or [QuantizerCfgEntry(quantizer_name="*", enable=False)]
     )
-    updated_quant_cfg["quant_cfg"] = inner + list(kv_cache_quant_cfg)
+    updated_quant_cfg["quant_cfg"] = inner + copy.deepcopy(list(kv_cache_quant_cfg))
 
     # Set default algorithm for kv cache quantization if not provided.
     if not updated_quant_cfg.get("algorithm"):
diff --git a/tests/unit/torch/quantization/test_config_validation.py b/tests/unit/torch/quantization/test_config_validation.py
@@ -126,6 +126,9 @@ def test_quantizer_cfg_entry_mutable_mapping_delitem_unsets_field():
         "enable": True,
     }
 
+    with pytest.raises(KeyError):
+        del entry["missing"]
+
 
 def test_public_preset_quant_cfg_entries_are_typed_and_dict_like():
     """Public preset constants are typed but keep dict-style entry access."""
@@ -177,6 +180,22 @@ def test_mixed_raw_dict_and_modelopt_config_entries_normalize_after_mutation():
     assert weight_entry["cfg"]["num_bits"] == "e4m3"
 
 
+@pytest.mark.parametrize(
+    ("raw", "match"),
+    [
+        ({"quantizer_name": "*", "cfg": None}, "'?cfg'? must be omitted"),
+        ({"quantizer_name": "*", "enable": None}, "'?enable'? must be a boolean"),
+    ],
+)
+def test_quantizer_cfg_entry_rejects_explicit_null_values(raw, match):
+    """Explicit null cfg/enable values are rejected instead of treated as omitted."""
+    with pytest.raises(ValidationError, match=match):
+        QuantizerCfgEntry.model_validate(raw)
+
+    with pytest.raises(ValueError, match=match):
+        normalize_quant_cfg_list([raw])
+
+
 def test_quantizer_cfg_entry_rejects_no_effect_entry():
     """Direct QuantizerCfgEntry construction rejects entries with no cfg or enable."""
     with pytest.raises(ValidationError, match="must specify 'cfg', 'enable'"):
@@ -469,6 +488,26 @@ def test_legacy_nn_class_with_cfg(self):
         assert _cfg_to_dict(result[0]["cfg"]) == {"num_bits": 4, "axis": 0}
         assert result[0]["enable"] is True
 
+    def test_legacy_nn_class_with_list_valued_cfg(self):
+        """Legacy nn.* scoped format preserves list-valued SequentialQuantizer cfg."""
+        raw = [
+            {
+                "nn.Linear": {
+                    "*weight_quantizer": [
+                        {"num_bits": 4, "block_sizes": {-1: 128, "type": "static"}},
+                        {"num_bits": 8, "axis": 0},
+                    ]
+                }
+            }
+        ]
+        result = normalize_quant_cfg_list(raw)
+        assert len(result) == 1
+        assert result[0]["parent_class"] == "nn.Linear"
+        assert result[0]["quantizer_name"] == "*weight_quantizer"
+        assert isinstance(result[0]["cfg"], list)
+        assert _cfg_to_dict(result[0]["cfg"]) == raw[0]["nn.Linear"]["*weight_quantizer"]
+        assert result[0]["enable"] is True
+
     def test_legacy_list_valued_cfg(self):
         """Legacy dict format with list-valued cfg (SequentialQuantizer) normalizes correctly."""
         raw = [
diff --git a/tests/unit/torch/quantization/test_quantize_cpu.py b/tests/unit/torch/quantization/test_quantize_cpu.py
@@ -401,6 +401,12 @@ def test_list_attributes_creates_sequential_quantizer(self):
                 assert isinstance(module, SequentialQuantizer)
                 assert len(module) == 2
 
+    def test_sequential_quantizer_rejects_mismatched_attribute_list_length(self):
+        """SequentialQuantizer rejects partial list configs instead of silently zipping."""
+        quantizer = SequentialQuantizer(TensorQuantizer(), TensorQuantizer())
+        with pytest.raises(ValueError, match="Expected 2 attribute configs, but got 1"):
+            quantizer.set_from_attribute_config([QuantizerAttributeConfig(num_bits=8)])
+
 
 def test_ordering_later_entry_overrides_earlier():
     """Later entries in quant_cfg override earlier ones for the same quantizer."""

Original file line number	Diff line number	Diff line change
`@@ -935,7 +935,7 @@ def update_quant_cfg_with_kv_cache_quant(`
`935`	`935`	`inner = list(`
`936`	`936`	`updated_quant_cfg.get("quant_cfg") or [QuantizerCfgEntry(quantizer_name="*", enable=False)]`
`937`	`937`	`)`
`938`		`- updated_quant_cfg["quant_cfg"] = inner + list(kv_cache_quant_cfg)`
	`938`	`+ updated_quant_cfg["quant_cfg"] = inner + copy.deepcopy(list(kv_cache_quant_cfg))`
`939`	`939`
`940`	`940`	`# Set default algorithm for kv cache quantization if not provided.`
`941`	`941`	`if not updated_quant_cfg.get("algorithm"):`