quant config

shengliangxu · shengliangxu · commit 5baba0b532df · 2026-04-15T14:10:23.000-07:00
Signed-off-by: Shengliang Xu &lt;shengliangx@nvidia.com&gt;
diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py
@@ -62,8 +62,8 @@ def _parse_exmy(s: str) -> tuple[int, int] | str:
     return s
 
 
-def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
-    """Load a config yaml.
+def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
+    """Load a config YAML without resolving ``$import`` references.
 
     config_file: Path to a config yaml file. The path suffix can be omitted.
     """
@@ -141,3 +141,127 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[
             f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}"
         )
     return _parse_exmy_num_bits(_raw)
+
+
+# ---------------------------------------------------------------------------
+# $import resolution
+# ---------------------------------------------------------------------------
+
+_IMPORT_KEY = "$import"
+
+
+def _resolve_imports(
+    data: dict[str, Any], _loading: frozenset[str] | None = None
+) -> dict[str, Any]:
+    """Resolve the ``imports`` section and ``$import`` references.
+
+    See ``modelopt.recipe.loader`` module docstring for the full specification.
+    This function lives in ``_config_loader`` (not ``loader``) so that it can be
+    used from ``modelopt.torch.quantization.config`` without circular imports.
+    """
+    imports_dict = data.pop("imports", None)
+    if not imports_dict:
+        return data
+
+    if not isinstance(imports_dict, dict):
+        raise ValueError(
+            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
+        )
+
+    if _loading is None:
+        _loading = frozenset()
+
+    # Build name → config mapping (recursively resolve nested imports)
+    import_map: dict[str, Any] = {}
+    for name, config_path in imports_dict.items():
+        if not config_path:
+            raise ValueError(f"Import {name!r} has an empty config path.")
+        if config_path in _loading:
+            raise ValueError(
+                f"Circular import detected: {config_path!r} is already being loaded. "
+                f"Import chain: {sorted(_loading)}"
+            )
+        snippet = _load_raw_config(config_path)
+        if isinstance(snippet, dict) and "imports" in snippet:
+            snippet = _resolve_imports(snippet, _loading | {config_path})
+        # Unwrap _list_content (multi-document YAML: imports + list content)
+        if isinstance(snippet, dict) and "_list_content" in snippet:
+            snippet = snippet["_list_content"]
+        import_map[name] = snippet
+
+    def _lookup(ref_name: str, context: str) -> Any:
+        if ref_name not in import_map:
+            raise ValueError(
+                f"Unknown $import reference {ref_name!r} in {context}. "
+                f"Available imports: {list(import_map.keys())}"
+            )
+        return import_map[ref_name]
+
+    def _resolve_list(entries: list[Any]) -> list[Any]:
+        """Resolve $import markers in a list of entries."""
+        resolved: list[Any] = []
+        for entry in entries:
+            if isinstance(entry, dict) and _IMPORT_KEY in entry:
+                if len(entry) > 1:
+                    raise ValueError(
+                        f"$import must be the only key in the dict, got extra keys: "
+                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
+                    )
+                imported = _lookup(entry[_IMPORT_KEY], "list entry")
+                if not isinstance(imported, list):
+                    raise ValueError(
+                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
+                        f"list, got {type(imported).__name__}."
+                    )
+                resolved.extend(imported)
+            elif (
+                isinstance(entry, dict)
+                and isinstance(entry.get("cfg"), dict)
+                and _IMPORT_KEY in entry["cfg"]
+            ):
+                ref = entry["cfg"].pop(_IMPORT_KEY)
+                inline_keys = dict(entry["cfg"])
+                ref_names = ref if isinstance(ref, list) else [ref]
+
+                merged: dict[str, Any] = {}
+                for rname in ref_names:
+                    snippet = _lookup(rname, f"cfg of {entry}")
+                    if not isinstance(snippet, dict):
+                        raise ValueError(
+                            f"$import {rname!r} in cfg must resolve to a dict, "
+                            f"got {type(snippet).__name__}."
+                        )
+                    merged.update(snippet)
+
+                merged.update(inline_keys)
+                entry["cfg"] = merged
+                resolved.append(entry)
+            else:
+                resolved.append(entry)
+        return resolved
+
+    # Resolve in quant_cfg (top-level or nested under quantize)
+    for container in [data, data.get("quantize", {})]:
+        if isinstance(container, dict):
+            quant_cfg = container.get("quant_cfg")
+            if isinstance(quant_cfg, list):
+                container["quant_cfg"] = _resolve_list(quant_cfg)
+
+    # Resolve in _list_content (multi-document snippets)
+    if "_list_content" in data:
+        data["_list_content"] = _resolve_list(data["_list_content"])
+
+    return data
+
+
+def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]:
+    """Load a YAML config and resolve all ``$import`` references.
+
+    This is the primary config loading entry point.  It loads the YAML file,
+    resolves any ``imports`` / ``$import`` directives, and returns the final
+    config dict or list.
+    """
+    data = _load_raw_config(config_path)
+    if isinstance(data, dict) and "imports" in data:
+        data = _resolve_imports(data)
+    return data
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
@@ -22,139 +22,12 @@
 from pathlib import Path
 from typing import Any
 
-from ._config_loader import BUILTIN_RECIPES_LIB, load_config
+from ._config_loader import BUILTIN_RECIPES_LIB, _load_raw_config, _resolve_imports, load_config
 from .config import ModelOptPTQRecipe, ModelOptRecipeBase, RecipeType
 
 __all__ = ["load_config", "load_recipe"]
 
 
-_IMPORT_KEY = "$import"
-
-
-def _resolve_imports(
-    data: dict[str, Any], _loading: frozenset[str] | None = None
-) -> dict[str, Any]:
-    """Resolve the ``imports`` section and ``$import`` references in a recipe.
-
-    An ``imports`` block is a dict mapping short names to config file paths::
-
-        imports:
-          fp8: configs/numerics/fp8
-          nvfp4: configs/numerics/nvfp4_dynamic
-
-    References use the explicit ``$import`` marker so they are never confused
-    with literal string values::
-
-        quant_cfg:
-          - $import: base_disable_all           # entire entry replaced (or list spliced)
-          - quantizer_name: '*weight_quantizer'
-            cfg:
-              $import: fp8                      # cfg value replaced
-
-    Resolution is **recursive**: an imported snippet may itself contain an
-    ``imports`` section.  Circular imports are detected and raise ``ValueError``.
-    """
-    imports_dict = data.pop("imports", None)
-    if not imports_dict:
-        return data
-
-    if not isinstance(imports_dict, dict):
-        raise ValueError(
-            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
-        )
-
-    if _loading is None:
-        _loading = frozenset()
-
-    # Build name → config mapping (recursively resolve nested imports)
-    import_map: dict[str, Any] = {}
-    for name, config_path in imports_dict.items():
-        if not config_path:
-            raise ValueError(f"Import {name!r} has an empty config path.")
-        if config_path in _loading:
-            raise ValueError(
-                f"Circular import detected: {config_path!r} is already being loaded. "
-                f"Import chain: {sorted(_loading)}"
-            )
-        snippet = load_config(config_path)
-        if isinstance(snippet, dict) and "imports" in snippet:
-            snippet = _resolve_imports(snippet, _loading | {config_path})
-        # Unwrap _list_content (multi-document YAML: imports + list content)
-        if isinstance(snippet, dict) and "_list_content" in snippet:
-            snippet = snippet["_list_content"]
-        import_map[name] = snippet
-
-    def _lookup(ref_name: str, context: str) -> Any:
-        if ref_name not in import_map:
-            raise ValueError(
-                f"Unknown $import reference {ref_name!r} in {context}. "
-                f"Available imports: {list(import_map.keys())}"
-            )
-        return import_map[ref_name]
-
-    def _resolve_list(entries: list[Any]) -> list[Any]:
-        """Resolve $import markers in a list of quant_cfg-style entries."""
-        resolved: list[Any] = []
-        for entry in entries:
-            if isinstance(entry, dict) and _IMPORT_KEY in entry:
-                # {$import: name} → splice imported list
-                if len(entry) > 1:
-                    raise ValueError(
-                        f"$import must be the only key in the dict, got extra keys: "
-                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
-                    )
-                imported = _lookup(entry[_IMPORT_KEY], "list entry")
-                if not isinstance(imported, list):
-                    raise ValueError(
-                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
-                        f"list, got {type(imported).__name__}."
-                    )
-                resolved.extend(imported)
-            elif (
-                isinstance(entry, dict)
-                and isinstance(entry.get("cfg"), dict)
-                and _IMPORT_KEY in entry["cfg"]
-            ):
-                # cfg: {$import: name_or_list, ...inline} → import then override
-                #
-                # Precedence (lowest → highest):
-                #   1. Imports in list order (later imports override earlier)
-                #   2. Inline keys (override all imports)
-                ref = entry["cfg"].pop(_IMPORT_KEY)
-                inline_keys = dict(entry["cfg"])
-                ref_names = ref if isinstance(ref, list) else [ref]
-
-                merged: dict[str, Any] = {}
-                for name in ref_names:
-                    snippet = _lookup(name, f"cfg of {entry}")
-                    if not isinstance(snippet, dict):
-                        raise ValueError(
-                            f"$import {name!r} in cfg must resolve to a dict, "
-                            f"got {type(snippet).__name__}."
-                        )
-                    merged.update(snippet)
-
-                merged.update(inline_keys)
-                entry["cfg"] = merged
-                resolved.append(entry)
-            else:
-                resolved.append(entry)
-        return resolved
-
-    # Resolve $import references in quant_cfg entries
-    quantize = data.get("quantize")
-    if isinstance(quantize, dict):
-        quant_cfg = quantize.get("quant_cfg")
-        if isinstance(quant_cfg, list):
-            quantize["quant_cfg"] = _resolve_list(quant_cfg)
-
-    # Resolve $import references in _list_content (multi-document snippets)
-    if "_list_content" in data:
-        data["_list_content"] = _resolve_list(data["_list_content"])
-
-    return data
-
-
 def _resolve_recipe_path(recipe_path: str | Path | Traversable) -> Path | Traversable:
     """Resolve a recipe path, checking the built-in library first then the filesystem.
 
@@ -214,7 +87,7 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas
     The file must contain a ``metadata`` section with at least ``recipe_type``,
     plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes.
     """
-    raw = load_config(recipe_file)
+    raw = _load_raw_config(recipe_file)
     assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping."
     data = _resolve_imports(raw)
 
@@ -247,7 +120,7 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
             f"Cannot find a recipe descriptor in {recipe_dir}. Looked for: recipe.yml, recipe.yaml"
         )
 
-    recipe_data = load_config(recipe_file)
+    recipe_data = _load_raw_config(recipe_file)
     assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping."
     metadata = recipe_data.get("metadata", {})
     recipe_type = metadata.get("recipe_type")
@@ -266,7 +139,7 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
                 f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml"
             )
         # Resolve imports: imports are in recipe.yml, quantize data is separate
-        quantize_data = load_config(quantize_file)
+        quantize_data = _load_raw_config(quantize_file)
         assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping."
         combined: dict[str, Any] = {"quantize": quantize_data}
         imports = recipe_data.get("imports")
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
@@ -157,6 +157,7 @@
 from pydantic import ValidationInfo, field_validator, model_validator
 from typing_extensions import Required, TypedDict
 
+from modelopt.recipe._config_loader import load_config
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 from modelopt.torch.utils.network import ConstructorLike
 
@@ -272,21 +273,7 @@ def find_quant_cfg_entry_by_path(
     "algorithm": "max",
 }
 
-FP8_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
+FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/fp8_default")
 
 MAMBA_MOE_FP8_AGGRESSIVE_CFG = {
     "quant_cfg": [
diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md
@@ -0,0 +1,14 @@
+# PTQ Preset Configs
+
+This directory holds preset quantization configurations that serve as the
+single source of truth for the hardcoded `*_CFG` dicts in
+`modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`).
+
+Each preset is a complete, self-contained config with `algorithm` and
+`quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose
+from the reusable snippets in `configs/numerics/` and `configs/ptq/` via
+the `$import` system.
+
+When adding a new preset, use existing snippets where possible and keep
+the YAML as the authoritative definition — the Python config should load
+from here rather than hardcoding the dict.
diff --git a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml b/modelopt_recipes/configs/ptq/presets/fp8_default.yaml
@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# FP8 per-tensor weight and activation (W8A8), max calibration.
+# Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py.
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  w8a8: configs/ptq/w8a8_fp8_fp8
+  default_disabled: configs/ptq/default_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w8a8
+  - $import: default_disabled