NVIDIA
diff --git a/‎examples/diffusers/quantization/config.py‎
Lines changed: 14 additions & 75 deletions b/‎examples/diffusers/quantization/config.py‎
Lines changed: 14 additions & 75 deletions
diff --git a/‎examples/diffusers/quantization/quantize.py‎
Lines changed: 15 additions & 9 deletions b/‎examples/diffusers/quantization/quantize.py‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎examples/llm_autodeploy/run_auto_quantize.py‎
Lines changed: 3 additions & 2 deletions b/‎examples/llm_autodeploy/run_auto_quantize.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎modelopt/torch/opt/config_loader.py‎
Lines changed: 19 additions & 1 deletion b/‎modelopt/torch/opt/config_loader.py‎
Lines changed: 19 additions & 1 deletion
@@ -16,82 +16,21 @@
 import torch.nn as nn
 from calib.plugin_calib import PercentileCalibrator
 
-FP8_DEFAULT_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*input_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {"quantizer_name": "*softmax_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-    ],
-    "algorithm": "max",
-}
+from modelopt.torch.opt.config_loader import load_config
+from modelopt.torch.quantization.config import QuantizeConfig
 
-INT8_DEFAULT_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {"quantizer_name": "*weight_quantizer", "cfg": {"num_bits": 8, "axis": 0}},
-        {"quantizer_name": "*input_quantizer", "cfg": {"num_bits": 8, "axis": None}},
-        {"quantizer_name": "*output_quantizer", "enable": False},
-    ],
-    "algorithm": "max",
-}
-
-NVFP4_DEFAULT_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {"quantizer_name": "*softmax_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-    ],
-    "algorithm": "max",
-}
-
-NVFP4_FP8_MHA_CONFIG = {
-    "quant_cfg": [
-        {"quantizer_name": "*", "enable": False},
-        {
-            "quantizer_name": "**weight_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {
-            "quantizer_name": "**input_quantizer",
-            "cfg": {
-                "num_bits": (2, 1),
-                "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)},
-                "axis": None,
-            },
-            "enable": True,
-        },
-        {"quantizer_name": "*output_quantizer", "enable": False},
-        {"quantizer_name": "*[qkv]_bmm_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*softmax_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-        {"quantizer_name": "*bmm2_output_quantizer", "cfg": {"num_bits": (4, 3), "axis": None}},
-    ],
-    "algorithm": {"method": "svdquant", "lowrank": 32},
-}
+FP8_DEFAULT_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/fp8", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
+INT8_DEFAULT_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/int8", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
+NVFP4_DEFAULT_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/nvfp4", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
+NVFP4_FP8_MHA_CONFIG = load_config(
+    "configs/ptq/presets/diffusers/nvfp4_fp8_mha", schema_type=QuantizeConfig
+).model_dump(exclude_unset=True)
 
 
 def set_quant_config_attr(quant_config, trt_high_precision_dtype, quant_algo, **kwargs):
 
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import argparse
+import copy
 import logging
 import sys
 import time as time
@@ -114,19 +115,13 @@ def get_quant_config(self, n_steps: int, backbone: torch.nn.Module) -> Any:
         """
         self.logger.info(f"Building quantization config for {self.config.format.value}")
 
+        apply_int8_percentile_calibrator = False
         if self.config.format == QuantFormat.INT8:
             if self.config.algo == QuantAlgo.SMOOTHQUANT:
                 base_cfg = mtq.INT8_SMOOTHQUANT_CFG
             else:
                 base_cfg = INT8_DEFAULT_CONFIG
-            if self.config.collect_method != CollectMethod.DEFAULT:
-                reset_set_int8_config(
-                    base_cfg,
-                    self.config.percentile,
-                    n_steps,
-                    collect_method=self.config.collect_method.value,
-                    backbone=backbone,
-                )
+            apply_int8_percentile_calibrator = self.config.collect_method != CollectMethod.DEFAULT
         elif self.config.format == QuantFormat.FP8:
             base_cfg = FP8_DEFAULT_CONFIG
         elif self.config.format == QuantFormat.FP4:
@@ -137,7 +132,18 @@ def get_quant_config(self, n_steps: int, backbone: torch.nn.Module) -> Any:
         else:
             raise NotImplementedError(f"Unknown format {self.config.format}")
 
-        # Build a fresh config dict so we never mutate the global constants.
+        # Build a fresh config dict so runtime overrides never mutate the global constants.
+        base_cfg = copy.deepcopy(base_cfg)
+
+        if apply_int8_percentile_calibrator:
+            reset_set_int8_config(
+                base_cfg,
+                self.config.percentile,
+                n_steps,
+                collect_method=self.config.collect_method.value,
+                backbone=backbone,
+            )
+
         quant_cfg_list = list(base_cfg["quant_cfg"])
 
         if self.config.format == QuantFormat.FP4:
 
@@ -15,6 +15,7 @@
 
 import argparse
 from collections import defaultdict
+from typing import Any
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -24,7 +25,7 @@
 from modelopt.torch.utils import create_forward_loop
 from modelopt.torch.utils.dataset_utils import get_dataset_dataloader
 
-SUPPORT_QUANT_FORMAT = {
+SUPPORT_QUANT_FORMAT: dict[str, dict[str, Any]] = {
     "fp8": mtq.FP8_DEFAULT_CFG,
     "nvfp4": mtq.NVFP4_DEFAULT_CFG,
 }
@@ -87,7 +88,7 @@ def loss_func(output, data):
         data_loader=calib_dataloader,
         forward_step=lambda model, batch: model(**batch),
         loss_func=loss_func,
-        quantization_formats=[SUPPORT_QUANT_FORMAT[format] for format in qformat_list],
+        quantization_formats=[SUPPORT_QUANT_FORMAT[quant_format] for quant_format in qformat_list],
         num_calib_steps=len(calib_dataloader),
         num_score_steps=min(
             len(calib_dataloader), 128 // batch_size
 
@@ -336,7 +336,19 @@ def _schema_equal(left: Any | None, right: Any | None) -> bool:
 def _list_element_schema(schema_type: Any | None) -> Any | None:
     """Return the element schema for a typed ``list[T]`` annotation."""
     schema_type = _unwrap_schema_type(schema_type)
-    if get_origin(schema_type) is not list:
+    origin = get_origin(schema_type)
+    if origin in (UnionType, Union):
+        element_schemas = []
+        for arg in get_args(schema_type):
+            if arg is NoneType:
+                continue
+            element_schema = _list_element_schema(arg)
+            if element_schema is None:
+                continue
+            if not any(_schema_equal(element_schema, seen) for seen in element_schemas):
+                element_schemas.append(element_schema)
+        return element_schemas[0] if len(element_schemas) == 1 else None
+    if origin is not list:
         return None
     args = get_args(schema_type)
     if len(args) != 1 or args[0] is Any:
@@ -510,6 +522,12 @@ def _resolve_list_import(
         if _schema_equal(imported.schema_type, element_schema):
             return [imported.data]
 
+        element_schema_unwrapped = _unwrap_schema_type(element_schema)
+        if isinstance(imported.data, dict) and (
+            element_schema_unwrapped is dict or get_origin(element_schema_unwrapped) is dict
+        ):
+            return [imported.data]
+
         raise ValueError(
             f"$import {ref_name!r} in list at {context} has schema "
             f"{_schema_label(imported.schema_type, imported.schema)!r}; expected either "