From 1d8a30b06f6e46e8e98f2b3e0b49f3bed5e9699a Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 15:59:29 -0700
Subject: [PATCH 01/30] Add import system for composable YAML configs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit description:

Introduce an import mechanism that lets recipe YAML files reference reusable
config snippets by name, reducing duplication across recipes.

Syntax:
  imports:
    fp8: configs/numerics/fp8
    base_disable_all: configs/ptq/base_disable_all

  quant_cfg:
    - base_disable_all # string entry → replaced with imported dict or spliced
list
    - quantizer_name: '*weight_quantizer'
      cfg: fp8 # string cfg → replaced with imported dict

Features:
- Dict-based imports (keys are names, values are config paths) — no name conflicts
- Three resolution modes: string cfg value, string list entry (dict), string list entry (list
 splice)
- Recursive resolution with circular import detection
- Path resolution via load_config (built-in library first, then filesystem)
- Works with both single-file and directory recipe formats

New reusable config snippets (modelopt_recipes/configs/):
- numerics/fp8.yml, nvfp4_dynamic.yml, nvfp4_static.yml
- ptq/base_disable_all.yaml, default_disabled_quantizers.yaml

All 6 built-in PTQ recipes converted to use imports, reducing each by ~30 lines.

Pre-commit hook updated to skip configs/ directory and allow string entries in
quant_cfg. load_config() now accepts YAML lists for list-valued snippets.

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 .pre-commit-config.yaml                       |   1 +
 modelopt/recipe/_config_loader.py             |   6 +-
 modelopt/recipe/loader.py                     | 100 ++++++-
 modelopt_recipes/configs/numerics/fp8.yml     |   2 +
 .../configs/numerics/nvfp4_dynamic.yml        |   6 +
 .../configs/numerics/nvfp4_static.yml         |   6 +
 .../configs/ptq/base_disable_all.yaml         |   3 +
 .../ptq/default_disabled_quantizers.yaml      |  33 +++
 .../general/ptq/fp8_default-fp8_kv.yaml       |  52 +---
 .../general/ptq/nvfp4_default-fp8_kv.yaml     |  59 +---
 .../ptq/nvfp4_default-none_kv_gptq.yaml       |  56 +---
 .../ptq/nvfp4_experts_only-fp8_kv.yaml        |  73 +----
 .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml    |  73 +----
 .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml   |  87 +-----
 tests/unit/recipe/test_loader.py              | 276 ++++++++++++++++++
 tools/precommit/check_modelopt_recipes.py     |   3 +
 16 files changed, 502 insertions(+), 334 deletions(-)
 create mode 100644 modelopt_recipes/configs/numerics/fp8.yml
 create mode 100644 modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
 create mode 100644 modelopt_recipes/configs/numerics/nvfp4_static.yml
 create mode 100644 modelopt_recipes/configs/ptq/base_disable_all.yaml
 create mode 100644 modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 3c4c11a090..0fc5c8eeaa 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -68,6 +68,7 @@ repos:
         entry: python tools/precommit/check_modelopt_recipes.py
         language: system
         files: ^modelopt_recipes/
+        exclude: ^modelopt_recipes/configs/
 
   # Instructions to change license file if ever needed:
   # https://github.com/Lucas-C/pre-commit-hooks#removing-old-license-and-replacing-it-with-a-new-one
diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py
index 188dcf236f..da6f4b7640 100644
--- a/modelopt/recipe/_config_loader.py
+++ b/modelopt/recipe/_config_loader.py
@@ -62,7 +62,7 @@ def _parse_exmy(s: str) -> tuple[int, int] | str:
     return s
 
 
-def load_config(config_file: str | Path | Traversable) -> dict[str, Any]:
+def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
     """Load a config yaml.
 
     config_file: Path to a config yaml file. The path suffix can be omitted.
@@ -106,8 +106,8 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any]:
     _raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
     if _raw is None:
         return {}
-    if not isinstance(_raw, dict):
+    if not isinstance(_raw, (dict, list)):
         raise ValueError(
-            f"Config file {config_path} must contain a YAML mapping, got {type(_raw).__name__}"
+            f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}"
         )
     return _parse_exmy_num_bits(_raw)
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index 3a9c66fb22..342e615d06 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -20,6 +20,7 @@
 except ImportError:  # Python < 3.11
     from importlib.abc import Traversable
 from pathlib import Path
+from typing import Any
 
 from ._config_loader import BUILTIN_RECIPES_LIB, load_config
 from .config import ModelOptPTQRecipe, ModelOptRecipeBase, RecipeType
@@ -27,6 +28,87 @@
 __all__ = ["load_config", "load_recipe"]
 
 
+def _resolve_imports(
+    data: dict[str, Any], _loading: frozenset[str] | None = None
+) -> dict[str, Any]:
+    """Resolve the ``imports`` section in a recipe and substitute named references.
+
+    An ``imports`` block is a dict mapping short names to config file paths::
+
+        imports:
+          fp8: configs/numerics/fp8
+          nvfp4: configs/numerics/nvfp4_dynamic
+
+    ``cfg`` values in ``quant_cfg`` entries that are plain strings are looked up
+    against the imported names and replaced with the loaded config dict.
+
+    Resolution is **recursive**: an imported snippet may itself contain an
+    ``imports`` section.  Circular imports are detected and raise ``ValueError``.
+    """
+    imports_dict = data.pop("imports", None)
+    if not imports_dict:
+        return data
+
+    if not isinstance(imports_dict, dict):
+        raise ValueError(
+            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
+        )
+
+    if _loading is None:
+        _loading = frozenset()
+
+    # Build name → config mapping (recursively resolve nested imports)
+    import_map: dict[str, Any] = {}
+    for name, config_path in imports_dict.items():
+        if not config_path:
+            raise ValueError(f"Import {name!r} has an empty config path.")
+        if config_path in _loading:
+            raise ValueError(
+                f"Circular import detected: {config_path!r} is already being loaded. "
+                f"Import chain: {sorted(_loading)}"
+            )
+        snippet = load_config(config_path)
+        if isinstance(snippet, dict) and "imports" in snippet:
+            snippet = _resolve_imports(snippet, _loading | {config_path})
+        import_map[name] = snippet
+
+    # Resolve string references in quant_cfg entries
+    quantize = data.get("quantize")
+    if isinstance(quantize, dict):
+        quant_cfg = quantize.get("quant_cfg")
+        if isinstance(quant_cfg, list):
+            resolved_cfg: list[Any] = []
+            for entry in quant_cfg:
+                if isinstance(entry, str):
+                    # Entire entry is a string → replace with the imported value
+                    if entry not in import_map:
+                        raise ValueError(
+                            f"Unknown import reference {entry!r} in quant_cfg list. "
+                            f"Available imports: {list(import_map.keys())}"
+                        )
+                    imported = import_map[entry]
+                    if isinstance(imported, list):
+                        # List import → splice all entries in place
+                        resolved_cfg.extend(imported)
+                    else:
+                        resolved_cfg.append(imported)
+                elif isinstance(entry, dict) and isinstance(entry.get("cfg"), str):
+                    # cfg field is a string → replace cfg value
+                    ref_name = entry["cfg"]
+                    if ref_name not in import_map:
+                        raise ValueError(
+                            f"Unknown import reference {ref_name!r} in quant_cfg entry "
+                            f"{entry!r}. Available imports: {list(import_map.keys())}"
+                        )
+                    entry["cfg"] = import_map[ref_name]
+                    resolved_cfg.append(entry)
+                else:
+                    resolved_cfg.append(entry)
+            quantize["quant_cfg"] = resolved_cfg
+
+    return data
+
+
 def _resolve_recipe_path(recipe_path: str | Path | Traversable) -> Path | Traversable:
     """Resolve a recipe path, checking the built-in library first then the filesystem.
 
@@ -86,7 +168,9 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas
     The file must contain a ``metadata`` section with at least ``recipe_type``,
     plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes.
     """
-    data = load_config(recipe_file)
+    raw = load_config(recipe_file)
+    assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping."
+    data = _resolve_imports(raw)
 
     metadata = data.get("metadata", {})
     recipe_type = metadata.get("recipe_type")
@@ -117,7 +201,9 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
             f"Cannot find a recipe descriptor in {recipe_dir}. Looked for: recipe.yml, recipe.yaml"
         )
 
-    metadata = load_config(recipe_file).get("metadata", {})
+    recipe_data = load_config(recipe_file)
+    assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping."
+    metadata = recipe_data.get("metadata", {})
     recipe_type = metadata.get("recipe_type")
     if recipe_type is None:
         raise ValueError(f"Recipe file {recipe_file} must contain a 'metadata.recipe_type' field.")
@@ -133,9 +219,17 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
             raise ValueError(
                 f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml"
             )
+        # Resolve imports: imports are in recipe.yml, quantize data is separate
+        quantize_data = load_config(quantize_file)
+        assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping."
+        combined: dict[str, Any] = {"quantize": quantize_data}
+        imports = recipe_data.get("imports")
+        if imports:
+            combined["imports"] = imports
+        combined = _resolve_imports(combined)
         return ModelOptPTQRecipe(
             recipe_type=RecipeType.PTQ,
             description=metadata.get("description", "PTQ recipe."),
-            quantize=load_config(quantize_file),
+            quantize=combined["quantize"],
         )
     raise ValueError(f"Unsupported recipe type: {recipe_type!r}")
diff --git a/modelopt_recipes/configs/numerics/fp8.yml b/modelopt_recipes/configs/numerics/fp8.yml
new file mode 100644
index 0000000000..e84779c8f4
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/fp8.yml
@@ -0,0 +1,2 @@
+# FP8 E4M3 quantizer attributes (no axis — used for KV cache, etc.).
+num_bits: e4m3
diff --git a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
new file mode 100644
index 0000000000..335e357a7f
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
@@ -0,0 +1,6 @@
+# NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales.
+num_bits: e2m1
+block_sizes:
+  -1: 16
+  type: dynamic
+  scale_bits: e4m3
diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yml
new file mode 100644
index 0000000000..90d15bf489
--- /dev/null
+++ b/modelopt_recipes/configs/numerics/nvfp4_static.yml
@@ -0,0 +1,6 @@
+# NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales.
+num_bits: e2m1
+block_sizes:
+  -1: 16
+  type: static
+  scale_bits: e4m3
diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/base_disable_all.yaml
new file mode 100644
index 0000000000..7035b55c5f
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/base_disable_all.yaml
@@ -0,0 +1,3 @@
+# Disable all quantizers by default (deny-all-then-configure pattern).
+quantizer_name: '*'
+enable: false
diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
new file mode 100644
index 0000000000..98934ae725
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
@@ -0,0 +1,33 @@
+# Standard quantizer exclusions: layers that should not be quantized.
+  - quantizer_name: '*block_sparse_moe.gate*'
+    enable: false
+  - quantizer_name: '*linear_attn.conv1d*'
+    enable: false
+  - quantizer_name: '*lm_head*'
+    enable: false
+  - quantizer_name: '*mixer.conv1d*'
+    enable: false
+  - quantizer_name: '*mlp.gate.*'
+    enable: false
+  - quantizer_name: '*mlp.shared_expert_gate.*'
+    enable: false
+  - quantizer_name: '*output_layer*'
+    enable: false
+  - quantizer_name: '*proj_out.*'
+    enable: false
+  - quantizer_name: '*router*'
+    enable: false
+  - quantizer_name: 'output.*'
+    enable: false
+  - parent_class: 'nn.BatchNorm1d'
+    quantizer_name: '*'
+    enable: false
+  - parent_class: 'nn.BatchNorm2d'
+    quantizer_name: '*'
+    enable: false
+  - parent_class: 'nn.BatchNorm3d'
+    quantizer_name: '*'
+    enable: false
+  - parent_class: 'nn.LeakyReLU'
+    quantizer_name: '*'
+    enable: false
diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index c80904e8eb..cfe6f1269b 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -13,55 +13,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  default_disabled: configs/ptq/default_disabled_quantizers
+  fp8: configs/numerics/fp8
+
 metadata:
   recipe_type: ptq
   description: FP8 per-tensor weight and activation (W8A8), FP8 KV cache, max calibration.
 quantize:
   algorithm: max
   quant_cfg:
-    - quantizer_name: '*'
-      enable: false
+    - base_disable_all
     - quantizer_name: '*input_quantizer'
-      cfg:
-        num_bits: e4m3
-        axis:
+      cfg: fp8
     - quantizer_name: '*weight_quantizer'
-      cfg:
-        num_bits: e4m3
-        axis:
+      cfg: fp8
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg:
-        num_bits: e4m3
-    - quantizer_name: '*block_sparse_moe.gate*'
-      enable: false
-    - quantizer_name: '*linear_attn.conv1d*'
-      enable: false
-    - quantizer_name: '*lm_head*'
-      enable: false
-    - quantizer_name: '*mixer.conv1d*'
-      enable: false
-    - quantizer_name: '*mlp.gate.*'
-      enable: false
-    - quantizer_name: '*mlp.shared_expert_gate.*'
-      enable: false
-    - quantizer_name: '*output_layer*'
-      enable: false
-    - quantizer_name: '*proj_out.*'
-      enable: false
-    - quantizer_name: '*router*'
-      enable: false
-    - quantizer_name: 'output.*'
-      enable: false
-    - parent_class: 'nn.BatchNorm1d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm2d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm3d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.LeakyReLU'
-      quantizer_name: '*'
-      enable: false
+      cfg: fp8
+    - default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index 6fe4a8c3d1..37e46bef56 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -13,63 +13,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  default_disabled: configs/ptq/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4_dynamic
+  fp8: configs/numerics/fp8
+
 metadata:
   recipe_type: ptq
   description: NVFP4 MLP/MoE weight only (W4A16), FP8 KV cache, max calibration.
 quantize:
   algorithm: max
   quant_cfg:
-    - quantizer_name: '*'
-      enable: false
+    - base_disable_all
     - quantizer_name: '*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg:
-        num_bits: e4m3
-    - quantizer_name: '*block_sparse_moe.gate*'
-      enable: false
-    - quantizer_name: '*linear_attn.conv1d*'
-      enable: false
-    - quantizer_name: '*lm_head*'
-      enable: false
-    - quantizer_name: '*mixer.conv1d*'
-      enable: false
-    - quantizer_name: '*mlp.gate.*'
-      enable: false
-    - quantizer_name: '*mlp.shared_expert_gate.*'
-      enable: false
-    - quantizer_name: '*output_layer*'
-      enable: false
-    - quantizer_name: '*proj_out.*'
-      enable: false
-    - quantizer_name: '*router*'
-      enable: false
-    - quantizer_name: 'output.*'
-      enable: false
-    - parent_class: 'nn.BatchNorm1d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm2d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm3d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.LeakyReLU'
-      quantizer_name: '*'
-      enable: false
+      cfg: fp8
+    - default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
index a62051b659..73b3fada7e 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
@@ -13,6 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  default_disabled: configs/ptq/default_disabled_quantizers
+  nvfp4_static: configs/numerics/nvfp4_static
+  nvfp4_dynamic: configs/numerics/nvfp4_dynamic
+
 metadata:
   recipe_type: ptq
   description: NVFP4 weight and activation (W4A4), gptq sequential calibration.
@@ -21,53 +27,11 @@ quantize:
     method: gptq
     use_sequential: true
   quant_cfg:
-    - quantizer_name: '*'
-      enable: false
+    - base_disable_all
     - quantizer_name: '*weight_quantizer'
-      cfg:
-        block_sizes:
-          -1: 16
-          type: static
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4_static
     - quantizer_name: '*input_quantizer'
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4_dynamic
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: false
-    - quantizer_name: '*block_sparse_moe.gate*'
-      enable: false
-    - quantizer_name: '*linear_attn.conv1d*'
-      enable: false
-    - quantizer_name: '*lm_head*'
-      enable: false
-    - quantizer_name: '*mixer.conv1d*'
-      enable: false
-    - quantizer_name: '*mlp.gate.*'
-      enable: false
-    - quantizer_name: '*mlp.shared_expert_gate.*'
-      enable: false
-    - quantizer_name: '*output_layer*'
-      enable: false
-    - quantizer_name: '*proj_out.*'
-      enable: false
-    - quantizer_name: '*router*'
-      enable: false
-    - quantizer_name: 'output.*'
-      enable: false
-    - parent_class: 'nn.BatchNorm1d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm2d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm3d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.LeakyReLU'
-      quantizer_name: '*'
-      enable: false
+    - default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index cc332733a0..7177a6d8aa 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -13,79 +13,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  default_disabled: configs/ptq/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4_dynamic
+  fp8: configs/numerics/fp8
+
 metadata:
   recipe_type: ptq
   description: NVFP4 static weight and dynamic activation for expert layers only (W4A4), FP8 KV cache, max calibration.
 quantize:
   algorithm: max
   quant_cfg:
-    - quantizer_name: '*'
-      enable: false
+    - base_disable_all
     - quantizer_name: '*mlp.experts*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*mlp.experts*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg:
-        num_bits: e4m3
-    - quantizer_name: '*block_sparse_moe.gate*'
-      enable: false
-    - quantizer_name: '*linear_attn.conv1d*'
-      enable: false
-    - quantizer_name: '*lm_head*'
-      enable: false
-    - quantizer_name: '*mixer.conv1d*'
-      enable: false
-    - quantizer_name: '*mlp.gate.*'
-      enable: false
-    - quantizer_name: '*mlp.shared_expert_gate.*'
-      enable: false
-    - quantizer_name: '*output_layer*'
-      enable: false
-    - quantizer_name: '*proj_out.*'
-      enable: false
-    - quantizer_name: '*router*'
-      enable: false
-    - quantizer_name: 'output.*'
-      enable: false
-    - parent_class: 'nn.BatchNorm1d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm2d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm3d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.LeakyReLU'
-      quantizer_name: '*'
-      enable: false
+      cfg: fp8
+    - default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index 0222274af0..990d686d5a 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -13,79 +13,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  default_disabled: configs/ptq/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4_dynamic
+  fp8: configs/numerics/fp8
+
 metadata:
   recipe_type: ptq
   description: NVFP4 static weight and dynamic activation for all linear layers (W4A4), FP8 KV cache, max calibration.
 quantize:
   algorithm: max
   quant_cfg:
-    - quantizer_name: '*'
-      enable: false
+    - base_disable_all
     - quantizer_name: '*mlp*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg:
-        num_bits: e4m3
-    - quantizer_name: '*block_sparse_moe.gate*'
-      enable: false
-    - quantizer_name: '*linear_attn.conv1d*'
-      enable: false
-    - quantizer_name: '*lm_head*'
-      enable: false
-    - quantizer_name: '*mixer.conv1d*'
-      enable: false
-    - quantizer_name: '*mlp.gate.*'
-      enable: false
-    - quantizer_name: '*mlp.shared_expert_gate.*'
-      enable: false
-    - quantizer_name: '*output_layer*'
-      enable: false
-    - quantizer_name: '*proj_out.*'
-      enable: false
-    - quantizer_name: '*router*'
-      enable: false
-    - quantizer_name: 'output.*'
-      enable: false
-    - parent_class: 'nn.BatchNorm1d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm2d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm3d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.LeakyReLU'
-      quantizer_name: '*'
-      enable: false
+      cfg: fp8
+    - default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index 3fdd79888d..5b92c97714 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -13,95 +13,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  default_disabled: configs/ptq/default_disabled_quantizers
+  nvfp4: configs/numerics/nvfp4_dynamic
+  fp8: configs/numerics/fp8
+
 metadata:
   recipe_type: ptq
   description: NVFP4 static weight and dynamic activation for all linear layers including output projections, FP8 KV cache, max calibration.
 quantize:
   algorithm: max
   quant_cfg:
-    - quantizer_name: '*'
-      enable: false
+    - base_disable_all
     - quantizer_name: '*mlp*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*o_proj*weight_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*o_proj*input_quantizer'
       enable: true
-      cfg:
-        block_sizes:
-          -1: 16
-          type: dynamic
-          scale_bits: e4m3
-        num_bits: e2m1
+      cfg: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg:
-        num_bits: e4m3
-    - quantizer_name: '*block_sparse_moe.gate*'
-      enable: false
-    - quantizer_name: '*linear_attn.conv1d*'
-      enable: false
-    - quantizer_name: '*lm_head*'
-      enable: false
-    - quantizer_name: '*mixer.conv1d*'
-      enable: false
-    - quantizer_name: '*mlp.gate.*'
-      enable: false
-    - quantizer_name: '*mlp.shared_expert_gate.*'
-      enable: false
-    - quantizer_name: '*output_layer*'
-      enable: false
-    - quantizer_name: '*proj_out.*'
-      enable: false
-    - quantizer_name: '*router*'
-      enable: false
-    - quantizer_name: 'output.*'
-      enable: false
-    - parent_class: 'nn.BatchNorm1d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm2d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.BatchNorm3d'
-      quantizer_name: '*'
-      enable: false
-    - parent_class: 'nn.LeakyReLU'
-      quantizer_name: '*'
-      enable: false
+      cfg: fp8
+    - default_disabled
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index b8da2d140f..706d99d023 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -107,6 +107,8 @@ def test_load_recipe_builtin_description():
 _BUILTIN_PTQ_RECIPES = [
     "general/ptq/fp8_default-fp8_kv",
     "general/ptq/nvfp4_default-fp8_kv",
+    "general/ptq/nvfp4_default-none_kv_gptq",
+    "general/ptq/nvfp4_experts_only-fp8_kv",
     "general/ptq/nvfp4_mlp_only-fp8_kv",
     "general/ptq/nvfp4_omlp_only-fp8_kv",
 ]
@@ -249,3 +251,277 @@ def _sort_key(entry):
 
     assert sorted(python_entries, key=_sort_key) == sorted(yaml_entries, key=_sort_key)
     assert model_cfg["algorithm"] == yaml_data["quantize"]["algorithm"]
+
+
+# ---------------------------------------------------------------------------
+# imports — named config snippet resolution
+# ---------------------------------------------------------------------------
+
+
+def test_import_resolves_cfg_reference(tmp_path):
+    """String cfg values are replaced with the imported config dict."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg: fp8\n"
+    )
+    recipe = load_recipe(recipe_file)
+    entry = recipe.quantize["quant_cfg"][0]
+    assert entry["cfg"] == {"num_bits": (4, 3), "axis": None}
+
+
+def test_import_same_name_used_twice(tmp_path):
+    """The same import can be referenced in multiple quant_cfg entries."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg: fp8\n"
+        f"    - quantizer_name: '*input_quantizer'\n"
+        f"      cfg: fp8\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert recipe.quantize["quant_cfg"][0]["cfg"] == recipe.quantize["quant_cfg"][1]["cfg"]
+
+
+def test_import_multiple_snippets(tmp_path):
+    """Multiple imports with different names resolve independently."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
+    (tmp_path / "nvfp4.yml").write_text("num_bits: e2m1\nblock_sizes:\n  -1: 16\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"  nvfp4: {tmp_path / 'nvfp4.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg: nvfp4\n"
+        f"    - quantizer_name: '*[kv]_bmm_quantizer'\n"
+        f"      cfg: fp8\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert recipe.quantize["quant_cfg"][0]["cfg"]["num_bits"] == (2, 1)
+    assert recipe.quantize["quant_cfg"][1]["cfg"]["num_bits"] == (4, 3)
+
+
+def test_import_inline_cfg_not_affected(tmp_path):
+    """Inline dict cfg entries are not touched by import resolution."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg: fp8\n"
+        f"    - quantizer_name: '*input_quantizer'\n"
+        f"      cfg:\n"
+        f"        num_bits: 8\n"
+        f"        axis: 0\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert recipe.quantize["quant_cfg"][1]["cfg"] == {"num_bits": 8, "axis": 0}
+
+
+def test_import_unknown_reference_raises(tmp_path):
+    """Referencing an undefined import name raises ValueError."""
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        "imports:\n"
+        "  fp8: configs/numerics/fp8\n"
+        "metadata:\n"
+        "  recipe_type: ptq\n"
+        "quantize:\n"
+        "  algorithm: max\n"
+        "  quant_cfg:\n"
+        "    - quantizer_name: '*weight_quantizer'\n"
+        "      cfg: nonexistent\n"
+    )
+    with pytest.raises(ValueError, match="Unknown import reference"):
+        load_recipe(recipe_file)
+
+
+def test_import_empty_path_raises(tmp_path):
+    """Import with empty config path raises ValueError."""
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        "imports:\n"
+        "  fp8:\n"
+        "metadata:\n"
+        "  recipe_type: ptq\n"
+        "quantize:\n"
+        "  algorithm: max\n"
+        "  quant_cfg: []\n"
+    )
+    with pytest.raises(ValueError, match="empty config path"):
+        load_recipe(recipe_file)
+
+
+def test_import_not_a_dict_raises(tmp_path):
+    """Import section that is not a dict raises ValueError."""
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        "imports:\n"
+        "  - configs/numerics/fp8\n"
+        "metadata:\n"
+        "  recipe_type: ptq\n"
+        "quantize:\n"
+        "  algorithm: max\n"
+        "  quant_cfg: []\n"
+    )
+    with pytest.raises(ValueError, match="must be a dict"):
+        load_recipe(recipe_file)
+
+
+def test_import_no_imports_section(tmp_path):
+    """Recipes without imports load normally."""
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        "metadata:\n"
+        "  recipe_type: ptq\n"
+        "quantize:\n"
+        "  algorithm: max\n"
+        "  quant_cfg:\n"
+        "    - quantizer_name: '*'\n"
+        "      enable: false\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert recipe.quantize["quant_cfg"][0]["enable"] is False
+
+
+def test_import_builtin_config_snippet():
+    """Imports can reference built-in config snippets by relative path."""
+    recipe = load_recipe("general/ptq/fp8_default-fp8_kv")
+    # This recipe doesn't use imports, but verify it still loads fine
+    assert recipe.quantize
+
+
+def test_import_dir_format(tmp_path):
+    """Imports in recipe.yml work with the directory recipe format."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
+    (tmp_path / "recipe.yml").write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"  description: Dir with imports.\n"
+    )
+    (tmp_path / "quantize.yml").write_text(
+        "algorithm: max\nquant_cfg:\n  - quantizer_name: '*weight_quantizer'\n    cfg: fp8\n"
+    )
+    recipe = load_recipe(tmp_path)
+    assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3), "axis": None}
+
+
+# ---------------------------------------------------------------------------
+# imports — recursive resolution and cycle detection
+# ---------------------------------------------------------------------------
+
+
+def test_import_recursive(tmp_path):
+    """A snippet can itself import other snippets."""
+    # base snippet — no imports
+    (tmp_path / "base.yml").write_text("num_bits: e4m3\n")
+    # mid-level snippet imports base
+    (tmp_path / "mid.yml").write_text(
+        f"imports:\n  base: {tmp_path / 'base.yml'}\nnum_bits: base\n"
+    )
+    # recipe imports mid
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  mid: {tmp_path / 'mid.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg: mid\n"
+    )
+    recipe = load_recipe(recipe_file)
+    # mid.yml's "num_bits: base" should have been resolved to the base snippet's content
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    assert cfg["num_bits"] == {"num_bits": (4, 3)}
+
+
+def test_import_circular_raises(tmp_path):
+    """Circular imports are detected and raise ValueError."""
+    (tmp_path / "a.yml").write_text(f"imports:\n  b: {tmp_path / 'b.yml'}\nnum_bits: 8\n")
+    (tmp_path / "b.yml").write_text(f"imports:\n  a: {tmp_path / 'a.yml'}\nnum_bits: 4\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  a: {tmp_path / 'a.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg: []\n"
+    )
+    with pytest.raises(ValueError, match="Circular import"):
+        load_recipe(recipe_file)
+
+
+def test_import_cross_file_same_name_no_conflict(tmp_path):
+    """Same import name in parent and child files resolve independently (no conflict).
+
+    recipe.yml imports ``fmt`` → fp8.yml (num_bits: e4m3)
+    recipe.yml also imports ``child`` → child.yml
+    child.yml  imports ``fmt`` → nvfp4.yml (num_bits: e2m1, block_sizes: ...)
+
+    The parent's ``fmt`` and the child's ``fmt`` are different configs.
+    The parent should get fp8 for its own ``fmt`` reference, and the child's
+    ``fmt`` should be resolved within the child's scope only.
+    """
+    # Two different snippets, both will be imported under the name "fmt"
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "nvfp4.yml").write_text("num_bits: e2m1\nblock_sizes:\n  -1: 16\n")
+
+    # Child snippet imports "fmt" → nvfp4
+    (tmp_path / "child.yml").write_text(
+        f"imports:\n  fmt: {tmp_path / 'nvfp4.yml'}\nweight_format: fmt\n"
+    )
+
+    # Parent recipe imports "fmt" → fp8, and also imports "child"
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fmt: {tmp_path / 'fp8.yml'}\n"
+        f"  child: {tmp_path / 'child.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg: fmt\n"
+    )
+    recipe = load_recipe(recipe_file)
+
+    # Parent's "fmt" should resolve to fp8 (e4m3), not nvfp4
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    assert cfg == {"num_bits": (4, 3)}
diff --git a/tools/precommit/check_modelopt_recipes.py b/tools/precommit/check_modelopt_recipes.py
index b964b4b040..f31145bc50 100644
--- a/tools/precommit/check_modelopt_recipes.py
+++ b/tools/precommit/check_modelopt_recipes.py
@@ -48,6 +48,9 @@ def _check_quant_cfg(quant_cfg, label: str) -> list[str]:
         )
     elif isinstance(quant_cfg, list):
         for i, entry in enumerate(quant_cfg):
+            if isinstance(entry, str):
+                # String entries are import references — resolved at load time
+                continue
             if not isinstance(entry, dict):
                 errors.append(
                     f"{label}: quant_cfg[{i}] must be a dict with "

From 99120f89faed981afdc65f46817211b7182f8129 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 16:34:34 -0700
Subject: [PATCH 02/30] reimplement using $import

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 .pre-commit-config.yaml                       |   1 +
 modelopt/recipe/loader.py                     |  53 ++++----
 .../general/ptq/fp8_default-fp8_kv.yaml       |  13 +-
 .../general/ptq/nvfp4_default-fp8_kv.yaml     |  13 +-
 .../ptq/nvfp4_default-none_kv_gptq.yaml       |  10 +-
 .../ptq/nvfp4_experts_only-fp8_kv.yaml        |  19 +--
 .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml    |  19 +--
 .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml   |  25 ++--
 tests/unit/recipe/test_loader.py              | 117 ++++++++++++------
 tools/precommit/check_modelopt_recipes.py     |  10 +-
 10 files changed, 178 insertions(+), 102 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0fc5c8eeaa..dd546394c3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -68,6 +68,7 @@ repos:
         entry: python tools/precommit/check_modelopt_recipes.py
         language: system
         files: ^modelopt_recipes/
+        # configs/ contains reusable snippets (not full recipes) — skip recipe validation
         exclude: ^modelopt_recipes/configs/
 
   # Instructions to change license file if ever needed:
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index 342e615d06..68d0f32c63 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -28,10 +28,13 @@
 __all__ = ["load_config", "load_recipe"]
 
 
+_IMPORT_KEY = "$import"
+
+
 def _resolve_imports(
     data: dict[str, Any], _loading: frozenset[str] | None = None
 ) -> dict[str, Any]:
-    """Resolve the ``imports`` section in a recipe and substitute named references.
+    """Resolve the ``imports`` section and ``$import`` references in a recipe.
 
     An ``imports`` block is a dict mapping short names to config file paths::
 
@@ -39,8 +42,14 @@ def _resolve_imports(
           fp8: configs/numerics/fp8
           nvfp4: configs/numerics/nvfp4_dynamic
 
-    ``cfg`` values in ``quant_cfg`` entries that are plain strings are looked up
-    against the imported names and replaced with the loaded config dict.
+    References use the explicit ``$import`` marker so they are never confused
+    with literal string values::
+
+        quant_cfg:
+          - $import: base_disable_all           # entire entry replaced (or list spliced)
+          - quantizer_name: '*weight_quantizer'
+            cfg:
+              $import: fp8                      # cfg value replaced
 
     Resolution is **recursive**: an imported snippet may itself contain an
     ``imports`` section.  Circular imports are detected and raise ``ValueError``.
@@ -72,35 +81,35 @@ def _resolve_imports(
             snippet = _resolve_imports(snippet, _loading | {config_path})
         import_map[name] = snippet
 
-    # Resolve string references in quant_cfg entries
+    def _lookup(ref_name: str, context: str) -> Any:
+        if ref_name not in import_map:
+            raise ValueError(
+                f"Unknown $import reference {ref_name!r} in {context}. "
+                f"Available imports: {list(import_map.keys())}"
+            )
+        return import_map[ref_name]
+
+    # Resolve $import references in quant_cfg entries
     quantize = data.get("quantize")
     if isinstance(quantize, dict):
         quant_cfg = quantize.get("quant_cfg")
         if isinstance(quant_cfg, list):
             resolved_cfg: list[Any] = []
             for entry in quant_cfg:
-                if isinstance(entry, str):
-                    # Entire entry is a string → replace with the imported value
-                    if entry not in import_map:
-                        raise ValueError(
-                            f"Unknown import reference {entry!r} in quant_cfg list. "
-                            f"Available imports: {list(import_map.keys())}"
-                        )
-                    imported = import_map[entry]
+                if isinstance(entry, dict) and _IMPORT_KEY in entry:
+                    # {$import: name} → replace entire entry (or splice list)
+                    imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry")
                     if isinstance(imported, list):
-                        # List import → splice all entries in place
                         resolved_cfg.extend(imported)
                     else:
                         resolved_cfg.append(imported)
-                elif isinstance(entry, dict) and isinstance(entry.get("cfg"), str):
-                    # cfg field is a string → replace cfg value
-                    ref_name = entry["cfg"]
-                    if ref_name not in import_map:
-                        raise ValueError(
-                            f"Unknown import reference {ref_name!r} in quant_cfg entry "
-                            f"{entry!r}. Available imports: {list(import_map.keys())}"
-                        )
-                    entry["cfg"] = import_map[ref_name]
+                elif (
+                    isinstance(entry, dict)
+                    and isinstance(entry.get("cfg"), dict)
+                    and _IMPORT_KEY in entry["cfg"]
+                ):
+                    # cfg: {$import: name} → replace cfg value
+                    entry["cfg"] = _lookup(entry["cfg"][_IMPORT_KEY], f"cfg of {entry}")
                     resolved_cfg.append(entry)
                 else:
                     resolved_cfg.append(entry)
diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index cfe6f1269b..5048b4f74c 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -24,12 +24,15 @@ metadata:
 quantize:
   algorithm: max
   quant_cfg:
-    - base_disable_all
+    - $import: base_disable_all
     - quantizer_name: '*input_quantizer'
-      cfg: fp8
+      cfg:
+        $import: fp8
     - quantizer_name: '*weight_quantizer'
-      cfg: fp8
+      cfg:
+        $import: fp8
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg: fp8
-    - default_disabled
+      cfg:
+        $import: fp8
+    - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index 37e46bef56..d5ba09c44d 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -25,14 +25,17 @@ metadata:
 quantize:
   algorithm: max
   quant_cfg:
-    - base_disable_all
+    - $import: base_disable_all
     - quantizer_name: '*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg: fp8
-    - default_disabled
+      cfg:
+        $import: fp8
+    - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
index 73b3fada7e..45db9aa80c 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
@@ -27,11 +27,13 @@ quantize:
     method: gptq
     use_sequential: true
   quant_cfg:
-    - base_disable_all
+    - $import: base_disable_all
     - quantizer_name: '*weight_quantizer'
-      cfg: nvfp4_static
+      cfg:
+        $import: nvfp4_static
     - quantizer_name: '*input_quantizer'
-      cfg: nvfp4_dynamic
+      cfg:
+        $import: nvfp4_dynamic
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: false
-    - default_disabled
+    - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index 7177a6d8aa..c41005b8fd 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -25,20 +25,25 @@ metadata:
 quantize:
   algorithm: max
   quant_cfg:
-    - base_disable_all
+    - $import: base_disable_all
     - quantizer_name: '*mlp.experts*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*mlp.experts*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg: fp8
-    - default_disabled
+      cfg:
+        $import: fp8
+    - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index 990d686d5a..86b8ba7fac 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -25,20 +25,25 @@ metadata:
 quantize:
   algorithm: max
   quant_cfg:
-    - base_disable_all
+    - $import: base_disable_all
     - quantizer_name: '*mlp*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg: fp8
-    - default_disabled
+      cfg:
+        $import: fp8
+    - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index 5b92c97714..c329849ad0 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -25,26 +25,33 @@ metadata:
 quantize:
   algorithm: max
   quant_cfg:
-    - base_disable_all
+    - $import: base_disable_all
     - quantizer_name: '*mlp*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*o_proj*weight_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*o_proj*input_quantizer'
       enable: true
-      cfg: nvfp4
+      cfg:
+        $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: true
-      cfg: fp8
-    - default_disabled
+      cfg:
+        $import: fp8
+    - $import: default_disabled
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index 706d99d023..d217518732 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -259,7 +259,7 @@ def _sort_key(entry):
 
 
 def test_import_resolves_cfg_reference(tmp_path):
-    """String cfg values are replaced with the imported config dict."""
+    """$import in cfg is replaced with the imported config dict."""
     (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
@@ -271,7 +271,8 @@ def test_import_resolves_cfg_reference(tmp_path):
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg: fp8\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
     )
     recipe = load_recipe(recipe_file)
     entry = recipe.quantize["quant_cfg"][0]
@@ -291,9 +292,11 @@ def test_import_same_name_used_twice(tmp_path):
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg: fp8\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
         f"    - quantizer_name: '*input_quantizer'\n"
-        f"      cfg: fp8\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
     )
     recipe = load_recipe(recipe_file)
     assert recipe.quantize["quant_cfg"][0]["cfg"] == recipe.quantize["quant_cfg"][1]["cfg"]
@@ -314,9 +317,11 @@ def test_import_multiple_snippets(tmp_path):
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg: nvfp4\n"
+        f"      cfg:\n"
+        f"        $import: nvfp4\n"
         f"    - quantizer_name: '*[kv]_bmm_quantizer'\n"
-        f"      cfg: fp8\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
     )
     recipe = load_recipe(recipe_file)
     assert recipe.quantize["quant_cfg"][0]["cfg"]["num_bits"] == (2, 1)
@@ -324,7 +329,7 @@ def test_import_multiple_snippets(tmp_path):
 
 
 def test_import_inline_cfg_not_affected(tmp_path):
-    """Inline dict cfg entries are not touched by import resolution."""
+    """Inline dict cfg entries without $import are not touched."""
     (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
@@ -336,7 +341,8 @@ def test_import_inline_cfg_not_affected(tmp_path):
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg: fp8\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
         f"    - quantizer_name: '*input_quantizer'\n"
         f"      cfg:\n"
         f"        num_bits: 8\n"
@@ -358,9 +364,10 @@ def test_import_unknown_reference_raises(tmp_path):
         "  algorithm: max\n"
         "  quant_cfg:\n"
         "    - quantizer_name: '*weight_quantizer'\n"
-        "      cfg: nonexistent\n"
+        "      cfg:\n"
+        "        $import: nonexistent\n"
     )
-    with pytest.raises(ValueError, match="Unknown import reference"):
+    with pytest.raises(ValueError, match=r"Unknown \$import reference"):
         load_recipe(recipe_file)
 
 
@@ -412,11 +419,57 @@ def test_import_no_imports_section(tmp_path):
     assert recipe.quantize["quant_cfg"][0]["enable"] is False
 
 
-def test_import_builtin_config_snippet():
-    """Imports can reference built-in config snippets by relative path."""
+def test_import_builtin_recipe_with_imports():
+    """Built-in recipes using $import load and resolve correctly."""
     recipe = load_recipe("general/ptq/fp8_default-fp8_kv")
-    # This recipe doesn't use imports, but verify it still loads fine
     assert recipe.quantize
+    # Verify $import was resolved — cfg should be a dict, not a {$import: ...} marker
+    for entry in recipe.quantize["quant_cfg"]:
+        if "cfg" in entry and entry["cfg"] is not None:
+            assert "$import" not in entry["cfg"], f"Unresolved $import in {entry}"
+
+
+def test_import_entry_dict_replacement(tmp_path):
+    """$import as a quant_cfg list entry replaces with the imported dict."""
+    (tmp_path / "disable.yml").write_text("quantizer_name: '*'\nenable: false\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  disable_all: {tmp_path / 'disable.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - $import: disable_all\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert recipe.quantize["quant_cfg"][0] == {"quantizer_name": "*", "enable": False}
+
+
+def test_import_entry_list_splice(tmp_path):
+    """$import as a quant_cfg list entry splices a list-valued snippet."""
+    (tmp_path / "disables.yml").write_text(
+        "- quantizer_name: '*lm_head*'\n  enable: false\n"
+        "- quantizer_name: '*router*'\n  enable: false\n"
+    )
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  disables: {tmp_path / 'disables.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*'\n"
+        f"      enable: false\n"
+        f"    - $import: disables\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert len(recipe.quantize["quant_cfg"]) == 3
+    assert recipe.quantize["quant_cfg"][1]["quantizer_name"] == "*lm_head*"
+    assert recipe.quantize["quant_cfg"][2]["quantizer_name"] == "*router*"
 
 
 def test_import_dir_format(tmp_path):
@@ -430,7 +483,11 @@ def test_import_dir_format(tmp_path):
         f"  description: Dir with imports.\n"
     )
     (tmp_path / "quantize.yml").write_text(
-        "algorithm: max\nquant_cfg:\n  - quantizer_name: '*weight_quantizer'\n    cfg: fp8\n"
+        "algorithm: max\n"
+        "quant_cfg:\n"
+        "  - quantizer_name: '*weight_quantizer'\n"
+        "    cfg:\n"
+        "      $import: fp8\n"
     )
     recipe = load_recipe(tmp_path)
     assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3), "axis": None}
@@ -443,13 +500,10 @@ def test_import_dir_format(tmp_path):
 
 def test_import_recursive(tmp_path):
     """A snippet can itself import other snippets."""
-    # base snippet — no imports
     (tmp_path / "base.yml").write_text("num_bits: e4m3\n")
-    # mid-level snippet imports base
     (tmp_path / "mid.yml").write_text(
-        f"imports:\n  base: {tmp_path / 'base.yml'}\nnum_bits: base\n"
+        f"imports:\n  base: {tmp_path / 'base.yml'}\nnum_bits:\n  $import: base\n"
     )
-    # recipe imports mid
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
         f"imports:\n"
@@ -460,11 +514,12 @@ def test_import_recursive(tmp_path):
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg: mid\n"
+        f"      cfg:\n"
+        f"        $import: mid\n"
     )
     recipe = load_recipe(recipe_file)
-    # mid.yml's "num_bits: base" should have been resolved to the base snippet's content
     cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    # mid.yml resolved "num_bits: {$import: base}" → base.yml content
     assert cfg["num_bits"] == {"num_bits": (4, 3)}
 
 
@@ -487,26 +542,12 @@ def test_import_circular_raises(tmp_path):
 
 
 def test_import_cross_file_same_name_no_conflict(tmp_path):
-    """Same import name in parent and child files resolve independently (no conflict).
-
-    recipe.yml imports ``fmt`` → fp8.yml (num_bits: e4m3)
-    recipe.yml also imports ``child`` → child.yml
-    child.yml  imports ``fmt`` → nvfp4.yml (num_bits: e2m1, block_sizes: ...)
-
-    The parent's ``fmt`` and the child's ``fmt`` are different configs.
-    The parent should get fp8 for its own ``fmt`` reference, and the child's
-    ``fmt`` should be resolved within the child's scope only.
-    """
-    # Two different snippets, both will be imported under the name "fmt"
+    """Same import name in parent and child resolve independently (scoped)."""
     (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
     (tmp_path / "nvfp4.yml").write_text("num_bits: e2m1\nblock_sizes:\n  -1: 16\n")
-
-    # Child snippet imports "fmt" → nvfp4
     (tmp_path / "child.yml").write_text(
         f"imports:\n  fmt: {tmp_path / 'nvfp4.yml'}\nweight_format: fmt\n"
     )
-
-    # Parent recipe imports "fmt" → fp8, and also imports "child"
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
         f"imports:\n"
@@ -518,10 +559,10 @@ def test_import_cross_file_same_name_no_conflict(tmp_path):
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg: fmt\n"
+        f"      cfg:\n"
+        f"        $import: fmt\n"
     )
     recipe = load_recipe(recipe_file)
-
-    # Parent's "fmt" should resolve to fp8 (e4m3), not nvfp4
+    # Parent's "fmt" resolves to fp8 (e4m3), not child's nvfp4
     cfg = recipe.quantize["quant_cfg"][0]["cfg"]
     assert cfg == {"num_bits": (4, 3)}
diff --git a/tools/precommit/check_modelopt_recipes.py b/tools/precommit/check_modelopt_recipes.py
index f31145bc50..600de317b5 100644
--- a/tools/precommit/check_modelopt_recipes.py
+++ b/tools/precommit/check_modelopt_recipes.py
@@ -48,20 +48,20 @@ def _check_quant_cfg(quant_cfg, label: str) -> list[str]:
         )
     elif isinstance(quant_cfg, list):
         for i, entry in enumerate(quant_cfg):
-            if isinstance(entry, str):
-                # String entries are import references — resolved at load time
-                continue
             if not isinstance(entry, dict):
                 errors.append(
                     f"{label}: quant_cfg[{i}] must be a dict with "
-                    f"'quantizer_name', got {type(entry).__name__}. "
+                    f"'quantizer_name' or '$import', got {type(entry).__name__}. "
                     "See https://nvidia.github.io/Model-Optimizer/guides/_quant_cfg.html"
                 )
                 continue
+            # {$import: name} entries are resolved at load time
+            if "$import" in entry:
+                continue
             if "quantizer_name" not in entry:
                 errors.append(
                     f"{label}: quant_cfg[{i}] is missing 'quantizer_name'. "
-                    "Each entry must have an explicit 'quantizer_name' key. "
+                    "Each entry must have an explicit 'quantizer_name' or '$import' key. "
                     "See https://nvidia.github.io/Model-Optimizer/guides/_quant_cfg.html"
                 )
     return errors

From f3caa85f5c6716fd9ba7c4b43b7e845600adae13 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 16:41:31 -0700
Subject: [PATCH 03/30] remove enable: true

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml       | 1 -
 modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml     | 3 ---
 .../general/ptq/nvfp4_experts_only-fp8_kv.yaml             | 5 -----
 modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml    | 5 -----
 modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml   | 7 -------
 5 files changed, 21 deletions(-)

diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index 5048b4f74c..c6eedb824a 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -32,7 +32,6 @@ quantize:
       cfg:
         $import: fp8
     - quantizer_name: '*[kv]_bmm_quantizer'
-      enable: true
       cfg:
         $import: fp8
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index d5ba09c44d..65b73f9d02 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -27,15 +27,12 @@ quantize:
   quant_cfg:
     - $import: base_disable_all
     - quantizer_name: '*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
-      enable: true
       cfg:
         $import: fp8
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index c41005b8fd..9d17dbab5a 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -27,23 +27,18 @@ quantize:
   quant_cfg:
     - $import: base_disable_all
     - quantizer_name: '*mlp.experts*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*mlp.experts*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
-      enable: true
       cfg:
         $import: fp8
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index 86b8ba7fac..47bd5e62e6 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -27,23 +27,18 @@ quantize:
   quant_cfg:
     - $import: base_disable_all
     - quantizer_name: '*mlp*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
-      enable: true
       cfg:
         $import: fp8
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index c329849ad0..732255b0e9 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -27,31 +27,24 @@ quantize:
   quant_cfg:
     - $import: base_disable_all
     - quantizer_name: '*mlp*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*mlp*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*block_sparse_moe*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*block_sparse_moe*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*o_proj*weight_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*o_proj*input_quantizer'
-      enable: true
       cfg:
         $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
-      enable: true
       cfg:
         $import: fp8
     - $import: default_disabled

From f29aed84fe4b819aed6e9784b653cfc472022065 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 16:59:35 -0700
Subject: [PATCH 04/30] remove incorrect indent

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 .pre-commit-config.yaml                       |  3 +-
 .../ptq/default_disabled_quantizers.yaml      | 65 ++++++++++---------
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index dd546394c3..b48c9fdf2f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -53,7 +53,8 @@ repos:
     hooks:
       - id: yamlfmt
         args: [--mapping=2, --sequence=4, --offset=2, --implicit_start, --implicit_end, --preserve-quotes]
-        exclude: ^.github/workflows/
+        # configs/ contains reusable snippets that may be top-level YAML lists — yamlfmt misformats these
+        exclude: ^(.github/workflows/|modelopt_recipes/configs/)
 
   - repo: local
     hooks:
diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
index 98934ae725..4b181e8f38 100644
--- a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
+++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
@@ -1,33 +1,34 @@
 # Standard quantizer exclusions: layers that should not be quantized.
-  - quantizer_name: '*block_sparse_moe.gate*'
-    enable: false
-  - quantizer_name: '*linear_attn.conv1d*'
-    enable: false
-  - quantizer_name: '*lm_head*'
-    enable: false
-  - quantizer_name: '*mixer.conv1d*'
-    enable: false
-  - quantizer_name: '*mlp.gate.*'
-    enable: false
-  - quantizer_name: '*mlp.shared_expert_gate.*'
-    enable: false
-  - quantizer_name: '*output_layer*'
-    enable: false
-  - quantizer_name: '*proj_out.*'
-    enable: false
-  - quantizer_name: '*router*'
-    enable: false
-  - quantizer_name: 'output.*'
-    enable: false
-  - parent_class: 'nn.BatchNorm1d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.BatchNorm2d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.BatchNorm3d'
-    quantizer_name: '*'
-    enable: false
-  - parent_class: 'nn.LeakyReLU'
-    quantizer_name: '*'
-    enable: false
+
+- quantizer_name: '*block_sparse_moe.gate*'
+  enable: false
+- quantizer_name: '*linear_attn.conv1d*'
+  enable: false
+- quantizer_name: '*lm_head*'
+  enable: false
+- quantizer_name: '*mixer.conv1d*'
+  enable: false
+- quantizer_name: '*mlp.gate.*'
+  enable: false
+- quantizer_name: '*mlp.shared_expert_gate.*'
+  enable: false
+- quantizer_name: '*output_layer*'
+  enable: false
+- quantizer_name: '*proj_out.*'
+  enable: false
+- quantizer_name: '*router*'
+  enable: false
+- quantizer_name: 'output.*'
+  enable: false
+- parent_class: 'nn.BatchNorm1d'
+  quantizer_name: '*'
+  enable: false
+- parent_class: 'nn.BatchNorm2d'
+  quantizer_name: '*'
+  enable: false
+- parent_class: 'nn.BatchNorm3d'
+  quantizer_name: '*'
+  enable: false
+- parent_class: 'nn.LeakyReLU'
+  quantizer_name: '*'
+  enable: false

From eb0842b439bfb137824f74fbb0e2fb4b1203ccc4 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 17:01:01 -0700
Subject: [PATCH 05/30] remove filter

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 .pre-commit-config.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b48c9fdf2f..dd546394c3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -53,8 +53,7 @@ repos:
     hooks:
       - id: yamlfmt
         args: [--mapping=2, --sequence=4, --offset=2, --implicit_start, --implicit_end, --preserve-quotes]
-        # configs/ contains reusable snippets that may be top-level YAML lists — yamlfmt misformats these
-        exclude: ^(.github/workflows/|modelopt_recipes/configs/)
+        exclude: ^.github/workflows/
 
   - repo: local
     hooks:

From d69260605cdaf727cfead1d2318543a70f47df36 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 17:12:49 -0700
Subject: [PATCH 06/30] simplify list import

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/recipe/loader.py                     | 13 ++--
 .../configs/ptq/base_disable_all.yaml         |  5 +-
 .../ptq/default_disabled_quantizers.yaml      | 64 +++++++++----------
 tests/unit/recipe/test_loader.py              | 25 +++++++-
 4 files changed, 65 insertions(+), 42 deletions(-)

diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index 68d0f32c63..0191266346 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -97,12 +97,15 @@ def _lookup(ref_name: str, context: str) -> Any:
             resolved_cfg: list[Any] = []
             for entry in quant_cfg:
                 if isinstance(entry, dict) and _IMPORT_KEY in entry:
-                    # {$import: name} → replace entire entry (or splice list)
+                    # {$import: name} → splice imported list into quant_cfg
                     imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry")
-                    if isinstance(imported, list):
-                        resolved_cfg.extend(imported)
-                    else:
-                        resolved_cfg.append(imported)
+                    if not isinstance(imported, list):
+                        raise ValueError(
+                            f"$import {entry[_IMPORT_KEY]!r} in quant_cfg must resolve to a "
+                            f"list, got {type(imported).__name__}. Config snippets used as "
+                            f"quant_cfg entries must be YAML lists."
+                        )
+                    resolved_cfg.extend(imported)
                 elif (
                     isinstance(entry, dict)
                     and isinstance(entry.get("cfg"), dict)
diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/base_disable_all.yaml
index 7035b55c5f..fbe6cf514c 100644
--- a/modelopt_recipes/configs/ptq/base_disable_all.yaml
+++ b/modelopt_recipes/configs/ptq/base_disable_all.yaml
@@ -1,3 +1,4 @@
 # Disable all quantizers by default (deny-all-then-configure pattern).
-quantizer_name: '*'
-enable: false
+
+  - quantizer_name: '*'
+    enable: false
diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
index 4b181e8f38..7c1cd532fb 100644
--- a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
+++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
@@ -1,34 +1,34 @@
 # Standard quantizer exclusions: layers that should not be quantized.
 
-- quantizer_name: '*block_sparse_moe.gate*'
-  enable: false
-- quantizer_name: '*linear_attn.conv1d*'
-  enable: false
-- quantizer_name: '*lm_head*'
-  enable: false
-- quantizer_name: '*mixer.conv1d*'
-  enable: false
-- quantizer_name: '*mlp.gate.*'
-  enable: false
-- quantizer_name: '*mlp.shared_expert_gate.*'
-  enable: false
-- quantizer_name: '*output_layer*'
-  enable: false
-- quantizer_name: '*proj_out.*'
-  enable: false
-- quantizer_name: '*router*'
-  enable: false
-- quantizer_name: 'output.*'
-  enable: false
-- parent_class: 'nn.BatchNorm1d'
-  quantizer_name: '*'
-  enable: false
-- parent_class: 'nn.BatchNorm2d'
-  quantizer_name: '*'
-  enable: false
-- parent_class: 'nn.BatchNorm3d'
-  quantizer_name: '*'
-  enable: false
-- parent_class: 'nn.LeakyReLU'
-  quantizer_name: '*'
-  enable: false
+  - quantizer_name: '*block_sparse_moe.gate*'
+    enable: false
+  - quantizer_name: '*linear_attn.conv1d*'
+    enable: false
+  - quantizer_name: '*lm_head*'
+    enable: false
+  - quantizer_name: '*mixer.conv1d*'
+    enable: false
+  - quantizer_name: '*mlp.gate.*'
+    enable: false
+  - quantizer_name: '*mlp.shared_expert_gate.*'
+    enable: false
+  - quantizer_name: '*output_layer*'
+    enable: false
+  - quantizer_name: '*proj_out.*'
+    enable: false
+  - quantizer_name: '*router*'
+    enable: false
+  - quantizer_name: 'output.*'
+    enable: false
+  - parent_class: 'nn.BatchNorm1d'
+    quantizer_name: '*'
+    enable: false
+  - parent_class: 'nn.BatchNorm2d'
+    quantizer_name: '*'
+    enable: false
+  - parent_class: 'nn.BatchNorm3d'
+    quantizer_name: '*'
+    enable: false
+  - parent_class: 'nn.LeakyReLU'
+    quantizer_name: '*'
+    enable: false
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index d217518732..e6a5cdf90e 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -429,9 +429,9 @@ def test_import_builtin_recipe_with_imports():
             assert "$import" not in entry["cfg"], f"Unresolved $import in {entry}"
 
 
-def test_import_entry_dict_replacement(tmp_path):
-    """$import as a quant_cfg list entry replaces with the imported dict."""
-    (tmp_path / "disable.yml").write_text("quantizer_name: '*'\nenable: false\n")
+def test_import_entry_single_element_list(tmp_path):
+    """$import splices a single-element list snippet into quant_cfg."""
+    (tmp_path / "disable.yml").write_text("- quantizer_name: '*'\n  enable: false\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
         f"imports:\n"
@@ -444,9 +444,28 @@ def test_import_entry_dict_replacement(tmp_path):
         f"    - $import: disable_all\n"
     )
     recipe = load_recipe(recipe_file)
+    assert len(recipe.quantize["quant_cfg"]) == 1
     assert recipe.quantize["quant_cfg"][0] == {"quantizer_name": "*", "enable": False}
 
 
+def test_import_entry_non_list_raises(tmp_path):
+    """$import in quant_cfg list position raises if snippet is not a list."""
+    (tmp_path / "disable.yml").write_text("quantizer_name: '*'\nenable: false\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  disable_all: {tmp_path / 'disable.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - $import: disable_all\n"
+    )
+    with pytest.raises(ValueError, match="must resolve to a list"):
+        load_recipe(recipe_file)
+
+
 def test_import_entry_list_splice(tmp_path):
     """$import as a quant_cfg list entry splices a list-valued snippet."""
     (tmp_path / "disables.yml").write_text(

From e267edc9c6bd651b26dcc59905da670468913d40 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 17:46:34 -0700
Subject: [PATCH 07/30] update docs

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst | 100 +++++++++++++++++++++++++++---
 1 file changed, 92 insertions(+), 8 deletions(-)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index 468a1d2d6c..d684efc197 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -125,6 +125,79 @@ example:
          axis:
 
 
+Composable imports
+------------------
+
+Recipes can import **reusable config snippets** via the ``imports`` section.
+This eliminates duplication — numeric format definitions and standard exclusion
+lists are authored once and referenced by name across recipes.
+
+The ``imports`` section is a dict mapping short names to config file paths.
+References use the explicit ``{$import: name}`` marker so they are never
+confused with literal values.  The marker can appear anywhere in the recipe:
+
+- As a **dict value** — the marker is replaced with the snippet content.
+- As a **list element** — the snippet (which must itself be a list) is spliced
+  into the surrounding list.
+
+.. code-block:: yaml
+
+   imports:
+     base_disable_all: configs/ptq/base_disable_all
+     default_disabled: configs/ptq/default_disabled_quantizers
+     fp8: configs/numerics/fp8
+
+   metadata:
+     recipe_type: ptq
+     description: FP8 W8A8, FP8 KV cache.
+
+   quantize:
+     algorithm: max
+     quant_cfg:
+       - $import: base_disable_all          # spliced from a single-element list snippet
+       - quantizer_name: '*weight_quantizer'
+         cfg:
+           $import: fp8                     # cfg value replaced with imported dict
+       - $import: default_disabled          # spliced from a multi-element list snippet
+
+In this example:
+
+- ``$import: base_disable_all`` and ``$import: default_disabled`` are **list elements**
+  — their snippets (YAML lists) are spliced into ``quant_cfg``.
+- ``$import: fp8`` under ``cfg`` is a **dict value** — the snippet (a YAML dict of
+  quantizer attributes) replaces the ``cfg`` field.
+
+Import paths are resolved via :func:`~modelopt.recipe.load_config` — the
+built-in ``modelopt_recipes/`` library is checked first, then the filesystem.
+
+**Recursive imports:** An imported snippet may itself contain an ``imports``
+section.  Each file's imports are scoped to that file — the same name can be
+used in different files without conflict.  Circular imports are detected and
+raise ``ValueError``.
+
+Built-in config snippets
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reusable snippets are stored under ``modelopt_recipes/configs/``:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 45 55
+
+   * - Snippet path
+     - Description
+   * - ``configs/numerics/fp8``
+     - FP8 E4M3 quantizer attributes
+   * - ``configs/numerics/nvfp4_dynamic``
+     - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales
+   * - ``configs/numerics/nvfp4_static``
+     - NVFP4 E2M1 blockwise, static calibration, FP8 scales
+   * - ``configs/ptq/base_disable_all``
+     - Disable all quantizers (deny-all-then-configure pattern)
+   * - ``configs/ptq/default_disabled_quantizers``
+     - Standard exclusions (LM head, routers, BatchNorm, etc.)
+
+
 Metadata section
 ================
 
@@ -355,11 +428,15 @@ To create a custom recipe:
 3. Update the ``metadata.description`` to describe your changes.
 4. Save the file (or directory) and pass its path to ``load_recipe()`` or ``--recipe``.
 
-Example -- creating a custom PTQ recipe (INT8 per-channel):
+Example -- creating a custom PTQ recipe using imports:
 
 .. code-block:: yaml
 
    # my_int8_recipe.yml
+   imports:
+     base_disable_all: configs/ptq/base_disable_all
+     default_disabled: configs/ptq/default_disabled_quantizers
+
    metadata:
      recipe_type: ptq
      description: INT8 per-channel weight, per-tensor activation.
@@ -367,8 +444,7 @@ Example -- creating a custom PTQ recipe (INT8 per-channel):
    quantize:
      algorithm: max
      quant_cfg:
-       - quantizer_name: '*'
-         enable: false
+       - $import: base_disable_all
        - quantizer_name: '*weight_quantizer'
          cfg:
            num_bits: 8
@@ -377,10 +453,11 @@ Example -- creating a custom PTQ recipe (INT8 per-channel):
          cfg:
            num_bits: 8
            axis:
-       - quantizer_name: '*lm_head*'
-         enable: false
-       - quantizer_name: '*output_layer*'
-         enable: false
+       - $import: default_disabled
+
+The built-in snippets (``base_disable_all``, ``default_disabled``) handle the
+deny-all prefix and standard exclusions.  Only the format-specific entries need
+to be written inline.
 
 
 Recipe repository layout
@@ -402,7 +479,14 @@ The ``modelopt_recipes/`` package is organized as follows:
    +-- models/                     # Model-specific recipes
    |   +-- Step3.5-Flash/
    |       +-- nvfp4-mlp-only.yaml
-   +-- configs/                    # Shared configuration fragments
+   +-- configs/                    # Reusable config snippets (imported via $import)
+       +-- numerics/               # Numeric format definitions
+       |   +-- fp8.yml
+       |   +-- nvfp4_dynamic.yml
+       |   +-- nvfp4_static.yml
+       +-- ptq/                    # PTQ-specific entry snippets
+           +-- base_disable_all.yaml
+           +-- default_disabled_quantizers.yaml
 
 
 Recipe data model

From bc47154eee203d6abab2f47f45d2acf268d533d9 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 18:23:27 -0700
Subject: [PATCH 08/30] add import override semantic

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst |  31 +++++++
 modelopt/recipe/loader.py         |  41 +++++++++-
 tests/unit/recipe/test_loader.py  | 131 ++++++++++++++++++++++++++++++
 3 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index d684efc197..5abbf0a5c6 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -140,6 +140,37 @@ confused with literal values.  The marker can appear anywhere in the recipe:
 - As a **list element** — the snippet (which must itself be a list) is spliced
   into the surrounding list.
 
+As a **dict value**, ``$import`` supports three composition modes:
+
+- **Single import:** ``$import: name`` — replaced with the snippet content.
+- **Multiple imports:** ``$import: [name1, name2]`` — snippets are merged into
+  one dict.  The snippets must not have overlapping keys.
+- **Import + extend:** extra keys alongside ``$import`` are merged in after the
+  import(s).  Extra keys must not conflict with any imported key.
+
+.. code-block:: yaml
+
+   # Single import
+   cfg:
+     $import: fp8
+
+   # Multiple imports — merge two non-overlapping snippets
+   cfg:
+     $import: [bits, scale]
+
+   # Import + extend — add axis on top of imported fp8
+   cfg:
+     $import: fp8
+     axis: 0          # result: {num_bits: e4m3, axis: 0}
+
+Key conflicts are never allowed — whether between imported snippets or between
+imports and inline keys.  If a key appears in more than one source, the loader
+raises an error.  This avoids ambiguous merge semantics.  If you need different
+values for an existing key, create a new snippet instead.
+
+As a **list element**, ``$import`` must be the only key — extra keys alongside
+a list splice are not supported.
+
 .. code-block:: yaml
 
    imports:
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index 0191266346..232a3c71ee 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -98,6 +98,11 @@ def _lookup(ref_name: str, context: str) -> Any:
             for entry in quant_cfg:
                 if isinstance(entry, dict) and _IMPORT_KEY in entry:
                     # {$import: name} → splice imported list into quant_cfg
+                    if len(entry) > 1:
+                        raise ValueError(
+                            f"$import must be the only key in the dict, got extra keys: "
+                            f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
+                        )
                     imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry")
                     if not isinstance(imported, list):
                         raise ValueError(
@@ -111,8 +116,40 @@ def _lookup(ref_name: str, context: str) -> Any:
                     and isinstance(entry.get("cfg"), dict)
                     and _IMPORT_KEY in entry["cfg"]
                 ):
-                    # cfg: {$import: name} → replace cfg value
-                    entry["cfg"] = _lookup(entry["cfg"][_IMPORT_KEY], f"cfg of {entry}")
+                    # cfg: {$import: name_or_list, ...extra} → import, merge, extend
+                    ref = entry["cfg"].pop(_IMPORT_KEY)
+                    extra_keys = dict(entry["cfg"])  # remaining inline keys
+                    ref_names = ref if isinstance(ref, list) else [ref]
+
+                    # Merge all imported snippets, detecting conflicts between them
+                    merged: dict[str, Any] = {}
+                    for name in ref_names:
+                        snippet = _lookup(name, f"cfg of {entry}")
+                        if not isinstance(snippet, dict):
+                            raise ValueError(
+                                f"$import {name!r} in cfg must resolve to a dict, "
+                                f"got {type(snippet).__name__}."
+                            )
+                        conflicts = set(snippet) & set(merged)
+                        if conflicts:
+                            raise ValueError(
+                                f"$import {name!r} conflicts with keys from prior imports: "
+                                f"{sorted(conflicts)}. Imported snippets must not overlap."
+                            )
+                        merged.update(snippet)
+
+                    # Extend with inline keys, detecting conflicts with imports
+                    if extra_keys:
+                        conflicts = set(extra_keys) & set(merged)
+                        if conflicts:
+                            raise ValueError(
+                                f"Inline keys {sorted(conflicts)} conflict with imported "
+                                f"values. Cannot override imported values — create a new "
+                                f"snippet instead."
+                            )
+                        merged.update(extra_keys)
+
+                    entry["cfg"] = merged
                     resolved_cfg.append(entry)
                 else:
                     resolved_cfg.append(entry)
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index e6a5cdf90e..d335f1fd47 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -491,6 +491,137 @@ def test_import_entry_list_splice(tmp_path):
     assert recipe.quantize["quant_cfg"][2]["quantizer_name"] == "*router*"
 
 
+def test_import_entry_sibling_keys_raises(tmp_path):
+    """$import as a list entry with sibling keys raises ValueError."""
+    (tmp_path / "disable.yml").write_text("- quantizer_name: '*'\n  enable: false\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  disable_all: {tmp_path / 'disable.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - $import: disable_all\n"
+        f"      quantizer_name: '*extra*'\n"
+    )
+    with pytest.raises(ValueError, match="must be the only key"):
+        load_recipe(recipe_file)
+
+
+def test_import_cfg_extend(tmp_path):
+    """$import in cfg with extra non-conflicting keys extends the snippet."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
+        f"        axis: 0\n"
+    )
+    recipe = load_recipe(recipe_file)
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    assert cfg == {"num_bits": (4, 3), "axis": 0}
+
+
+def test_import_cfg_conflict_raises(tmp_path):
+    """$import in cfg with conflicting keys raises ValueError."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
+        f"        num_bits: 8\n"
+    )
+    with pytest.raises(ValueError, match="conflict with imported"):
+        load_recipe(recipe_file)
+
+
+def test_import_cfg_multi_import(tmp_path):
+    """$import with a list of names merges non-overlapping snippets."""
+    (tmp_path / "bits.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "axis.yml").write_text("axis: 0\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  bits: {tmp_path / 'bits.yml'}\n"
+        f"  axis: {tmp_path / 'axis.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg:\n"
+        f"        $import: [bits, axis]\n"
+    )
+    recipe = load_recipe(recipe_file)
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    assert cfg == {"num_bits": (4, 3), "axis": 0}
+
+
+def test_import_cfg_multi_import_conflict_raises(tmp_path):
+    """$import with a list of names raises when snippets have overlapping keys."""
+    (tmp_path / "a.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "b.yml").write_text("num_bits: 8\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  a: {tmp_path / 'a.yml'}\n"
+        f"  b: {tmp_path / 'b.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg:\n"
+        f"        $import: [a, b]\n"
+    )
+    with pytest.raises(ValueError, match="conflicts with keys from prior imports"):
+        load_recipe(recipe_file)
+
+
+def test_import_cfg_multi_import_with_extend(tmp_path):
+    """$import list + inline keys all merge without conflicts."""
+    (tmp_path / "bits.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "scale.yml").write_text("scale_bits: e8m0\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  bits: {tmp_path / 'bits.yml'}\n"
+        f"  scale: {tmp_path / 'scale.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg:\n"
+        f"        $import: [bits, scale]\n"
+        f"        axis: 0\n"
+    )
+    recipe = load_recipe(recipe_file)
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    assert cfg == {"num_bits": (4, 3), "scale_bits": (8, 0), "axis": 0}
+
+
 def test_import_dir_format(tmp_path):
     """Imports in recipe.yml work with the directory recipe format."""
     (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")

From dbb524d9026e7ac2aebe18253e15ae357259cf01 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 18:32:27 -0700
Subject: [PATCH 09/30] more clear docs

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst | 49 +++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 5 deletions(-)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index 5abbf0a5c6..fe99a22e1a 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -54,14 +54,18 @@ A recipe contains two top-level sections: ``metadata`` and a type-specific
 configuration section (for example, ``quantize`` for PTQ recipes).  These can live
 in a single YAML file or be split across files in a directory.
 
+Recipes support two authoring styles: **inline** (all values written directly)
+and **import-based** (reusable snippets referenced via ``$import``).  Both
+styles can be used in a single-file or directory layout.
+
 Single-file format
 ------------------
 
-The simplest form is a single ``.yml`` or ``.yaml`` file.  Here is a PTQ example:
+The simplest form is a single ``.yml`` or ``.yaml`` file.
 
-.. code-block:: yaml
+**Inline style** — all config values are written directly:
 
-   # modelopt_recipes/general/ptq/fp8_default-fp8_kv.yml
+.. code-block:: yaml
 
    metadata:
      recipe_type: ptq
@@ -81,11 +85,42 @@ The simplest form is a single ``.yml`` or ``.yaml`` file.  Here is a PTQ example
            num_bits: e4m3
            axis:
        - quantizer_name: '*[kv]_bmm_quantizer'
-         enable: true
          cfg:
            num_bits: e4m3
        # ... standard exclusions omitted for brevity
 
+**Import style** — the same recipe using reusable config snippets:
+
+.. code-block:: yaml
+
+   imports:
+     base_disable_all: configs/ptq/base_disable_all
+     default_disabled: configs/ptq/default_disabled_quantizers
+     fp8: configs/numerics/fp8
+
+   metadata:
+     recipe_type: ptq
+     description: FP8 per-tensor weight and activation (W8A8), FP8 KV cache, max calibration.
+
+   quantize:
+     algorithm: max
+     quant_cfg:
+       - $import: base_disable_all
+       - quantizer_name: '*input_quantizer'
+         cfg:
+           $import: fp8
+       - quantizer_name: '*weight_quantizer'
+         cfg:
+           $import: fp8
+       - quantizer_name: '*[kv]_bmm_quantizer'
+         cfg:
+           $import: fp8
+       - $import: default_disabled
+
+Both styles produce identical results at load time.  The import style reduces
+duplication when multiple recipes share the same numeric formats or exclusion
+lists.  See :ref:`composable-imports` below for the full ``$import`` specification.
+
 Directory format
 ----------------
 
@@ -96,7 +131,7 @@ example:
 .. code-block:: text
 
    my_recipe/
-     recipe.yml      # metadata section
+     recipe.yml      # metadata section (+ optional imports)
      quantize.yml    # quantize section (quant_cfg + algorithm)
 
 ``recipe.yml``:
@@ -124,6 +159,10 @@ example:
          num_bits: e4m3
          axis:
 
+Both inline and import styles work with the directory format.  When using
+imports in a directory recipe, place the ``imports`` section in ``recipe.yml``.
+
+.. _composable-imports:
 
 Composable imports
 ------------------

From 99414905b929e2422fda0ecd339f49874df31036 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Mon, 13 Apr 2026 18:38:13 -0700
Subject: [PATCH 10/30] changelog

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index fdd738590a..3712d505f9 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,6 +15,7 @@ Changelog
 - Enable PTQ workflow for the Step3.5-Flash MoE model with NVFP4 W4A4 + FP8 KV cache quantization. See `modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml <https://github.com/NVIDIA/Model-Optimizer/blob/main/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml>`_ for more details.
 - Add support for vLLM fakequant reload using ModelOpt state for HF models. See `examples/vllm_serve/README.md <https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/vllm_serve#load-qatptq-model-and-serve-in-vllm-wip>`_ for more details.
 - [Early Testing] Add Claude Code PTQ skill (``.claude/skills/ptq/``) for agent-assisted post-training quantization. The skill guides the agent through environment detection, model support checking, format selection, and execution via the launcher or manual SLURM/Docker/bare GPU paths. Includes handling for unlisted models with custom module patching. This feature is in early testing — use with caution.
+- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content (with optional extend and multi-import via ``$import: [a, b]``), as a list element it splices the snippet entries. Key conflicts between imports or inline keys raise errors. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification.
 
 **Backward Breaking Changes**
 

From 74235a9e246898c982ae27f4e3cdfa8f71039680 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Tue, 14 Apr 2026 10:13:41 -0700
Subject: [PATCH 11/30] new conflict semantic

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 CHANGELOG.rst                     |  2 +-
 docs/source/guides/10_recipes.rst | 37 ++++++++++++++++++-------------
 modelopt/recipe/loader.py         | 27 ++++++----------------
 tests/unit/recipe/test_loader.py  | 27 +++++++++++++---------
 4 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 3712d505f9..e2c4b2a7c0 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,7 +15,7 @@ Changelog
 - Enable PTQ workflow for the Step3.5-Flash MoE model with NVFP4 W4A4 + FP8 KV cache quantization. See `modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml <https://github.com/NVIDIA/Model-Optimizer/blob/main/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml>`_ for more details.
 - Add support for vLLM fakequant reload using ModelOpt state for HF models. See `examples/vllm_serve/README.md <https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/vllm_serve#load-qatptq-model-and-serve-in-vllm-wip>`_ for more details.
 - [Early Testing] Add Claude Code PTQ skill (``.claude/skills/ptq/``) for agent-assisted post-training quantization. The skill guides the agent through environment detection, model support checking, format selection, and execution via the launcher or manual SLURM/Docker/bare GPU paths. Includes handling for unlisted models with custom module patching. This feature is in early testing — use with caution.
-- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content (with optional extend and multi-import via ``$import: [a, b]``), as a list element it splices the snippet entries. Key conflicts between imports or inline keys raise errors. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification.
+- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content with ordered override precedence (later imports override earlier, inline keys override all), as a list element it splices the snippet entries. Supports multi-import (``$import: [a, b]``) and inline extension/override. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification.
 
 **Backward Breaking Changes**
 
diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index fe99a22e1a..d26c5d3671 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -179,33 +179,38 @@ confused with literal values.  The marker can appear anywhere in the recipe:
 - As a **list element** — the snippet (which must itself be a list) is spliced
   into the surrounding list.
 
-As a **dict value**, ``$import`` supports three composition modes:
+As a **dict value**, ``$import`` supports composition with clear override
+precedence (lowest to highest):
 
-- **Single import:** ``$import: name`` — replaced with the snippet content.
-- **Multiple imports:** ``$import: [name1, name2]`` — snippets are merged into
-  one dict.  The snippets must not have overlapping keys.
-- **Import + extend:** extra keys alongside ``$import`` are merged in after the
-  import(s).  Extra keys must not conflict with any imported key.
+1. **Imports in list order** — ``$import: [base, override]``: later snippets
+   override earlier ones on key conflicts.
+2. **Inline keys** — extra keys alongside ``$import`` override all imported
+   values.
+
+This is equivalent to calling ``dict.update()`` in order: imports first (in
+list order), then inline keys last.
 
 .. code-block:: yaml
 
    # Single import
    cfg:
-     $import: fp8
+     $import: nvfp4
 
-   # Multiple imports — merge two non-overlapping snippets
+   # Import + override — import nvfp4_dynamic, then override type inline
    cfg:
-     $import: [bits, scale]
+     $import: nvfp4    # imports {num_bits: e2m1, block_sizes: {-1: 16, type: dynamic, ...}}
+     block_sizes:
+       -1: 16
+       type: static    # overrides type: dynamic → static calibration
 
-   # Import + extend — add axis on top of imported fp8
+   # Multiple imports — later snippet overrides earlier on conflict
    cfg:
-     $import: fp8
-     axis: 0          # result: {num_bits: e4m3, axis: 0}
+     $import: [base_format, kv_tweaks]   # kv_tweaks wins on shared keys
 
-Key conflicts are never allowed — whether between imported snippets or between
-imports and inline keys.  If a key appears in more than one source, the loader
-raises an error.  This avoids ambiguous merge semantics.  If you need different
-values for an existing key, create a new snippet instead.
+   # All three: multi-import + inline override
+   cfg:
+     $import: [bits, scale]
+     axis: 0            # highest precedence
 
 As a **list element**, ``$import`` must be the only key — extra keys alongside
 a list splice are not supported.
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index 232a3c71ee..32f610e12b 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -116,12 +116,15 @@ def _lookup(ref_name: str, context: str) -> Any:
                     and isinstance(entry.get("cfg"), dict)
                     and _IMPORT_KEY in entry["cfg"]
                 ):
-                    # cfg: {$import: name_or_list, ...extra} → import, merge, extend
+                    # cfg: {$import: name_or_list, ...inline} → import then override
+                    #
+                    # Precedence (lowest → highest):
+                    #   1. Imports in list order (later imports override earlier)
+                    #   2. Inline keys (override all imports)
                     ref = entry["cfg"].pop(_IMPORT_KEY)
-                    extra_keys = dict(entry["cfg"])  # remaining inline keys
+                    inline_keys = dict(entry["cfg"])  # remaining inline keys
                     ref_names = ref if isinstance(ref, list) else [ref]
 
-                    # Merge all imported snippets, detecting conflicts between them
                     merged: dict[str, Any] = {}
                     for name in ref_names:
                         snippet = _lookup(name, f"cfg of {entry}")
@@ -130,25 +133,9 @@ def _lookup(ref_name: str, context: str) -> Any:
                                 f"$import {name!r} in cfg must resolve to a dict, "
                                 f"got {type(snippet).__name__}."
                             )
-                        conflicts = set(snippet) & set(merged)
-                        if conflicts:
-                            raise ValueError(
-                                f"$import {name!r} conflicts with keys from prior imports: "
-                                f"{sorted(conflicts)}. Imported snippets must not overlap."
-                            )
                         merged.update(snippet)
 
-                    # Extend with inline keys, detecting conflicts with imports
-                    if extra_keys:
-                        conflicts = set(extra_keys) & set(merged)
-                        if conflicts:
-                            raise ValueError(
-                                f"Inline keys {sorted(conflicts)} conflict with imported "
-                                f"values. Cannot override imported values — create a new "
-                                f"snippet instead."
-                            )
-                        merged.update(extra_keys)
-
+                    merged.update(inline_keys)
                     entry["cfg"] = merged
                     resolved_cfg.append(entry)
                 else:
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index d335f1fd47..723fbdcd35 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -532,9 +532,9 @@ def test_import_cfg_extend(tmp_path):
     assert cfg == {"num_bits": (4, 3), "axis": 0}
 
 
-def test_import_cfg_conflict_raises(tmp_path):
-    """$import in cfg with conflicting keys raises ValueError."""
-    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+def test_import_cfg_inline_overrides_import(tmp_path):
+    """Inline keys override imported values (highest precedence)."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
         f"imports:\n"
@@ -549,8 +549,12 @@ def test_import_cfg_conflict_raises(tmp_path):
         f"        $import: fp8\n"
         f"        num_bits: 8\n"
     )
-    with pytest.raises(ValueError, match="conflict with imported"):
-        load_recipe(recipe_file)
+    recipe = load_recipe(recipe_file)
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    # inline num_bits: 8 overrides imported num_bits: e4m3 → (4,3)
+    assert cfg["num_bits"] == 8
+    # imported axis: None is preserved (no inline override)
+    assert cfg["axis"] is None
 
 
 def test_import_cfg_multi_import(tmp_path):
@@ -576,9 +580,9 @@ def test_import_cfg_multi_import(tmp_path):
     assert cfg == {"num_bits": (4, 3), "axis": 0}
 
 
-def test_import_cfg_multi_import_conflict_raises(tmp_path):
-    """$import with a list of names raises when snippets have overlapping keys."""
-    (tmp_path / "a.yml").write_text("num_bits: e4m3\n")
+def test_import_cfg_multi_import_later_overrides_earlier(tmp_path):
+    """In $import list, later snippets override earlier ones on key conflicts."""
+    (tmp_path / "a.yml").write_text("num_bits: e4m3\naxis: 0\n")
     (tmp_path / "b.yml").write_text("num_bits: 8\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
@@ -594,8 +598,11 @@ def test_import_cfg_multi_import_conflict_raises(tmp_path):
         f"      cfg:\n"
         f"        $import: [a, b]\n"
     )
-    with pytest.raises(ValueError, match="conflicts with keys from prior imports"):
-        load_recipe(recipe_file)
+    recipe = load_recipe(recipe_file)
+    cfg = recipe.quantize["quant_cfg"][0]["cfg"]
+    # b overrides a's num_bits; a's axis is preserved
+    assert cfg["num_bits"] == 8
+    assert cfg["axis"] == 0
 
 
 def test_import_cfg_multi_import_with_extend(tmp_path):

From 8182b74d06e67b9547e91ee36dc3844fa95faaa5 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Tue, 14 Apr 2026 15:00:31 -0700
Subject: [PATCH 12/30] support import for recipe snippets

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/recipe/_config_loader.py             |  34 +++++-
 modelopt/recipe/loader.py                     | 104 ++++++++++--------
 modelopt_recipes/configs/ptq/fp8_kv.yaml      |   7 ++
 .../general/ptq/fp8_default-fp8_kv.yaml       |   5 +-
 .../general/ptq/nvfp4_default-fp8_kv.yaml     |   6 +-
 .../ptq/nvfp4_experts_only-fp8_kv.yaml        |   6 +-
 .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml    |   6 +-
 .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml   |   6 +-
 tests/unit/recipe/test_loader.py              |  43 ++++++++
 9 files changed, 149 insertions(+), 68 deletions(-)
 create mode 100644 modelopt_recipes/configs/ptq/fp8_kv.yaml

diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py
index da6f4b7640..a94b67edb2 100644
--- a/modelopt/recipe/_config_loader.py
+++ b/modelopt/recipe/_config_loader.py
@@ -103,9 +103,39 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[
             f"Cannot find config file of {config_file}, paths checked: {paths_to_check}"
         )
 
-    _raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
-    if _raw is None:
+    text = config_path.read_text(encoding="utf-8")
+    docs = list(yaml.safe_load_all(text))
+
+    if len(docs) == 0 or docs[0] is None:
         return {}
+    if len(docs) == 1:
+        _raw = docs[0]
+    elif len(docs) == 2:
+        # Multi-document: first doc is imports/metadata, second is content.
+        # Merge the imports into the content for downstream resolution.
+        header, content = docs[0], docs[1]
+        if not isinstance(header, dict):
+            raise ValueError(
+                f"Config file {config_path}: first YAML document must be a mapping, "
+                f"got {type(header).__name__}"
+            )
+        if content is None:
+            content = {}
+        if isinstance(content, dict):
+            _raw = {**header, **content}
+        elif isinstance(content, list):
+            # List content with a header dict — attach imports via wrapper
+            _raw = {**header, "_list_content": content}
+        else:
+            raise ValueError(
+                f"Config file {config_path}: second YAML document must be a mapping or list, "
+                f"got {type(content).__name__}"
+            )
+    else:
+        raise ValueError(
+            f"Config file {config_path}: expected 1 or 2 YAML documents, got {len(docs)}"
+        )
+
     if not isinstance(_raw, (dict, list)):
         raise ValueError(
             f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}"
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index 32f610e12b..da62b17b63 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -79,6 +79,9 @@ def _resolve_imports(
         snippet = load_config(config_path)
         if isinstance(snippet, dict) and "imports" in snippet:
             snippet = _resolve_imports(snippet, _loading | {config_path})
+        # Unwrap _list_content (multi-document YAML: imports + list content)
+        if isinstance(snippet, dict) and "_list_content" in snippet:
+            snippet = snippet["_list_content"]
         import_map[name] = snippet
 
     def _lookup(ref_name: str, context: str) -> Any:
@@ -89,58 +92,65 @@ def _lookup(ref_name: str, context: str) -> Any:
             )
         return import_map[ref_name]
 
+    def _resolve_list(entries: list[Any]) -> list[Any]:
+        """Resolve $import markers in a list of quant_cfg-style entries."""
+        resolved: list[Any] = []
+        for entry in entries:
+            if isinstance(entry, dict) and _IMPORT_KEY in entry:
+                # {$import: name} → splice imported list
+                if len(entry) > 1:
+                    raise ValueError(
+                        f"$import must be the only key in the dict, got extra keys: "
+                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
+                    )
+                imported = _lookup(entry[_IMPORT_KEY], "list entry")
+                if not isinstance(imported, list):
+                    raise ValueError(
+                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
+                        f"list, got {type(imported).__name__}."
+                    )
+                resolved.extend(imported)
+            elif (
+                isinstance(entry, dict)
+                and isinstance(entry.get("cfg"), dict)
+                and _IMPORT_KEY in entry["cfg"]
+            ):
+                # cfg: {$import: name_or_list, ...inline} → import then override
+                #
+                # Precedence (lowest → highest):
+                #   1. Imports in list order (later imports override earlier)
+                #   2. Inline keys (override all imports)
+                ref = entry["cfg"].pop(_IMPORT_KEY)
+                inline_keys = dict(entry["cfg"])
+                ref_names = ref if isinstance(ref, list) else [ref]
+
+                merged: dict[str, Any] = {}
+                for name in ref_names:
+                    snippet = _lookup(name, f"cfg of {entry}")
+                    if not isinstance(snippet, dict):
+                        raise ValueError(
+                            f"$import {name!r} in cfg must resolve to a dict, "
+                            f"got {type(snippet).__name__}."
+                        )
+                    merged.update(snippet)
+
+                merged.update(inline_keys)
+                entry["cfg"] = merged
+                resolved.append(entry)
+            else:
+                resolved.append(entry)
+        return resolved
+
     # Resolve $import references in quant_cfg entries
     quantize = data.get("quantize")
     if isinstance(quantize, dict):
         quant_cfg = quantize.get("quant_cfg")
         if isinstance(quant_cfg, list):
-            resolved_cfg: list[Any] = []
-            for entry in quant_cfg:
-                if isinstance(entry, dict) and _IMPORT_KEY in entry:
-                    # {$import: name} → splice imported list into quant_cfg
-                    if len(entry) > 1:
-                        raise ValueError(
-                            f"$import must be the only key in the dict, got extra keys: "
-                            f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
-                        )
-                    imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry")
-                    if not isinstance(imported, list):
-                        raise ValueError(
-                            f"$import {entry[_IMPORT_KEY]!r} in quant_cfg must resolve to a "
-                            f"list, got {type(imported).__name__}. Config snippets used as "
-                            f"quant_cfg entries must be YAML lists."
-                        )
-                    resolved_cfg.extend(imported)
-                elif (
-                    isinstance(entry, dict)
-                    and isinstance(entry.get("cfg"), dict)
-                    and _IMPORT_KEY in entry["cfg"]
-                ):
-                    # cfg: {$import: name_or_list, ...inline} → import then override
-                    #
-                    # Precedence (lowest → highest):
-                    #   1. Imports in list order (later imports override earlier)
-                    #   2. Inline keys (override all imports)
-                    ref = entry["cfg"].pop(_IMPORT_KEY)
-                    inline_keys = dict(entry["cfg"])  # remaining inline keys
-                    ref_names = ref if isinstance(ref, list) else [ref]
-
-                    merged: dict[str, Any] = {}
-                    for name in ref_names:
-                        snippet = _lookup(name, f"cfg of {entry}")
-                        if not isinstance(snippet, dict):
-                            raise ValueError(
-                                f"$import {name!r} in cfg must resolve to a dict, "
-                                f"got {type(snippet).__name__}."
-                            )
-                        merged.update(snippet)
-
-                    merged.update(inline_keys)
-                    entry["cfg"] = merged
-                    resolved_cfg.append(entry)
-                else:
-                    resolved_cfg.append(entry)
-            quantize["quant_cfg"] = resolved_cfg
+            quantize["quant_cfg"] = _resolve_list(quant_cfg)
+
+    # Resolve $import references in _list_content (multi-document snippets)
+    if "_list_content" in data:
+        data["_list_content"] = _resolve_list(data["_list_content"])
 
     return data
 
diff --git a/modelopt_recipes/configs/ptq/fp8_kv.yaml b/modelopt_recipes/configs/ptq/fp8_kv.yaml
new file mode 100644
index 0000000000..cb3ff3a009
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/fp8_kv.yaml
@@ -0,0 +1,7 @@
+# FP8 E4M3 KV cache quantization.
+imports:
+  fp8: configs/numerics/fp8
+---
+  - quantizer_name: '*[kv]_bmm_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index c6eedb824a..680677d607 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -17,6 +17,7 @@ imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
   fp8: configs/numerics/fp8
+  fp8_kv: configs/ptq/fp8_kv
 
 metadata:
   recipe_type: ptq
@@ -31,7 +32,5 @@ quantize:
     - quantizer_name: '*weight_quantizer'
       cfg:
         $import: fp8
-    - quantizer_name: '*[kv]_bmm_quantizer'
-      cfg:
-        $import: fp8
+    - $import: fp8_kv
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index 65b73f9d02..9dc6da1ace 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -17,7 +17,7 @@ imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4_dynamic
-  fp8: configs/numerics/fp8
+  fp8_kv: configs/ptq/fp8_kv
 
 metadata:
   recipe_type: ptq
@@ -32,7 +32,5 @@ quantize:
     - quantizer_name: '*input_quantizer'
       cfg:
         $import: nvfp4
-    - quantizer_name: '*[kv]_bmm_quantizer'
-      cfg:
-        $import: fp8
+    - $import: fp8_kv
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index 9d17dbab5a..a3730f839e 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -17,7 +17,7 @@ imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4_dynamic
-  fp8: configs/numerics/fp8
+  fp8_kv: configs/ptq/fp8_kv
 
 metadata:
   recipe_type: ptq
@@ -38,7 +38,5 @@ quantize:
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       cfg:
         $import: nvfp4
-    - quantizer_name: '*[kv]_bmm_quantizer'
-      cfg:
-        $import: fp8
+    - $import: fp8_kv
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index 47bd5e62e6..0d9d0861ca 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -17,7 +17,7 @@ imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4_dynamic
-  fp8: configs/numerics/fp8
+  fp8_kv: configs/ptq/fp8_kv
 
 metadata:
   recipe_type: ptq
@@ -38,7 +38,5 @@ quantize:
     - quantizer_name: '*block_sparse_moe*input_quantizer'
       cfg:
         $import: nvfp4
-    - quantizer_name: '*[kv]_bmm_quantizer'
-      cfg:
-        $import: fp8
+    - $import: fp8_kv
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index 732255b0e9..1a1fa63255 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -17,7 +17,7 @@ imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4_dynamic
-  fp8: configs/numerics/fp8
+  fp8_kv: configs/ptq/fp8_kv
 
 metadata:
   recipe_type: ptq
@@ -44,7 +44,5 @@ quantize:
     - quantizer_name: '*o_proj*input_quantizer'
       cfg:
         $import: nvfp4
-    - quantizer_name: '*[kv]_bmm_quantizer'
-      cfg:
-        $import: fp8
+    - $import: fp8_kv
     - $import: default_disabled
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index 723fbdcd35..4dd235a081 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -650,6 +650,49 @@ def test_import_dir_format(tmp_path):
     assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3), "axis": None}
 
 
+# ---------------------------------------------------------------------------
+# imports — multi-document snippets
+# ---------------------------------------------------------------------------
+
+
+def test_import_multi_document_list_snippet(tmp_path):
+    """List snippet using multi-document YAML (imports --- content) resolves $import."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "kv.yaml").write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"---\n"
+        f"- quantizer_name: '*[kv]_bmm_quantizer'\n"
+        f"  cfg:\n"
+        f"    $import: fp8\n"
+    )
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  kv: {tmp_path / 'kv.yaml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - $import: kv\n"
+    )
+    recipe = load_recipe(recipe_file)
+    assert len(recipe.quantize["quant_cfg"]) == 1
+    assert recipe.quantize["quant_cfg"][0]["quantizer_name"] == "*[kv]_bmm_quantizer"
+    assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3)}
+
+
+def test_import_builtin_fp8_kv_snippet():
+    """Built-in fp8_kv snippet uses multi-document format and resolves correctly."""
+    recipe = load_recipe("general/ptq/fp8_default-fp8_kv")
+    kv_entries = [
+        e for e in recipe.quantize["quant_cfg"] if e.get("quantizer_name") == "*[kv]_bmm_quantizer"
+    ]
+    assert len(kv_entries) == 1
+    assert kv_entries[0]["cfg"]["num_bits"] == (4, 3)
+
+
 # ---------------------------------------------------------------------------
 # imports — recursive resolution and cycle detection
 # ---------------------------------------------------------------------------

From fd13e6ed6d7489367ed5591a53184409a9d0a2d1 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Tue, 14 Apr 2026 15:19:46 -0700
Subject: [PATCH 13/30] license headers + more doc

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst             | 42 ++++++++++++++++++-
 modelopt_recipes/configs/numerics/fp8.yml     | 15 +++++++
 .../configs/numerics/nvfp4_dynamic.yml        | 15 +++++++
 .../configs/numerics/nvfp4_static.yml         | 15 +++++++
 .../configs/ptq/base_disable_all.yaml         | 15 +++++++
 .../ptq/default_disabled_quantizers.yaml      | 15 +++++++
 modelopt_recipes/configs/ptq/fp8_kv.yaml      | 21 ++++++++++
 7 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index d26c5d3671..aa59583446 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -173,7 +173,16 @@ lists are authored once and referenced by name across recipes.
 
 The ``imports`` section is a dict mapping short names to config file paths.
 References use the explicit ``{$import: name}`` marker so they are never
-confused with literal values.  The marker can appear anywhere in the recipe:
+confused with literal values.
+
+.. note::
+
+   ``imports`` (no ``$``) is a **top-level structural section** — like
+   ``metadata`` or ``quantize``, it declares the recipe's dependencies.
+   ``$import`` (with ``$``) is an **inline directive** that appears inside
+   data values and gets resolved at load time.
+
+The ``$import`` marker can appear anywhere in the recipe:
 
 - As a **dict value** — the marker is replaced with the snippet content.
 - As a **list element** — the snippet (which must itself be a list) is spliced
@@ -250,6 +259,35 @@ section.  Each file's imports are scoped to that file — the same name can be
 used in different files without conflict.  Circular imports are detected and
 raise ``ValueError``.
 
+Multi-document snippets
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Dict-valued snippets (e.g., numeric format definitions) can use ``imports``
+directly because the ``imports`` key and the snippet content are both part of
+the same YAML mapping.  List-valued snippets have a problem: YAML only allows
+one root node per document, so a file cannot be both a mapping (for
+``imports``) and a list (for entries) at the same time.
+
+The solution is **multi-document YAML**: the first document holds the
+``imports``, and the second document (after ``---``) holds the list content.
+The loader parses both documents, resolves ``$import`` markers in the content,
+and returns the resolved list:
+
+.. code-block:: yaml
+
+   # configs/ptq/fp8_kv.yaml — list snippet that imports a dict snippet
+   imports:
+     fp8: configs/numerics/fp8
+   ---
+   - quantizer_name: '*[kv]_bmm_quantizer'
+     cfg:
+       $import: fp8
+
+This enables full composability — list snippets can reference dict snippets,
+dict snippets can reference other dict snippets, and recipes can reference
+any of them.  All import resolution happens at load time with the same
+precedence rules.
+
 Built-in config snippets
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -271,6 +309,8 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``:
      - Disable all quantizers (deny-all-then-configure pattern)
    * - ``configs/ptq/default_disabled_quantizers``
      - Standard exclusions (LM head, routers, BatchNorm, etc.)
+   * - ``configs/ptq/fp8_kv``
+     - FP8 E4M3 KV cache quantization (multi-document, imports ``fp8``)
 
 
 Metadata section
diff --git a/modelopt_recipes/configs/numerics/fp8.yml b/modelopt_recipes/configs/numerics/fp8.yml
index e84779c8f4..2fd99627df 100644
--- a/modelopt_recipes/configs/numerics/fp8.yml
+++ b/modelopt_recipes/configs/numerics/fp8.yml
@@ -1,2 +1,17 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # FP8 E4M3 quantizer attributes (no axis — used for KV cache, etc.).
 num_bits: e4m3
diff --git a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
index 335e357a7f..e07ba9e19b 100644
--- a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
+++ b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
@@ -1,3 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales.
 num_bits: e2m1
 block_sizes:
diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yml
index 90d15bf489..758be89a30 100644
--- a/modelopt_recipes/configs/numerics/nvfp4_static.yml
+++ b/modelopt_recipes/configs/numerics/nvfp4_static.yml
@@ -1,3 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales.
 num_bits: e2m1
 block_sizes:
diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/base_disable_all.yaml
index fbe6cf514c..35bdf2c6a4 100644
--- a/modelopt_recipes/configs/ptq/base_disable_all.yaml
+++ b/modelopt_recipes/configs/ptq/base_disable_all.yaml
@@ -1,3 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Disable all quantizers by default (deny-all-then-configure pattern).
 
   - quantizer_name: '*'
diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
index 7c1cd532fb..a8c04357d7 100644
--- a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
+++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
@@ -1,3 +1,18 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Standard quantizer exclusions: layers that should not be quantized.
 
   - quantizer_name: '*block_sparse_moe.gate*'
diff --git a/modelopt_recipes/configs/ptq/fp8_kv.yaml b/modelopt_recipes/configs/ptq/fp8_kv.yaml
index cb3ff3a009..85ff617ead 100644
--- a/modelopt_recipes/configs/ptq/fp8_kv.yaml
+++ b/modelopt_recipes/configs/ptq/fp8_kv.yaml
@@ -1,4 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # FP8 E4M3 KV cache quantization.
+#
+# This snippet uses multi-document YAML (separated by ---) because it is a
+# list-valued snippet that also needs to $import another snippet.  YAML only
+# allows one root node per document, so a file cannot be both a mapping
+# (for imports) and a list (for entries).  The first document holds the
+# imports, the second holds the list content that references them.
 imports:
   fp8: configs/numerics/fp8
 ---

From dcf10a6e7cf3546ebdd88b234a82880bd3fcd7f6 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Tue, 14 Apr 2026 15:54:09 -0700
Subject: [PATCH 14/30] more snippets

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 .../configs/ptq/w8a8_fp8_fp8.yaml             | 25 +++++++++++++++++++
 .../general/ptq/fp8_default-fp8_kv.yaml       |  9 ++-----
 2 files changed, 27 insertions(+), 7 deletions(-)
 create mode 100644 modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml

diff --git a/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml b/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml
new file mode 100644
index 0000000000..c55cbf1d6b
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# W8A8 FP8: FP8 E4M3 weight and activation quantizers.
+imports:
+  fp8: configs/numerics/fp8
+---
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: fp8
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: fp8
diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index 680677d607..6b30a04022 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -16,7 +16,7 @@
 imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
-  fp8: configs/numerics/fp8
+  w8a8_fp8_fp8: configs/ptq/w8a8_fp8_fp8
   fp8_kv: configs/ptq/fp8_kv
 
 metadata:
@@ -26,11 +26,6 @@ quantize:
   algorithm: max
   quant_cfg:
     - $import: base_disable_all
-    - quantizer_name: '*input_quantizer'
-      cfg:
-        $import: fp8
-    - quantizer_name: '*weight_quantizer'
-      cfg:
-        $import: fp8
+    - $import: w8a8_fp8_fp8
     - $import: fp8_kv
     - $import: default_disabled

From dc670010bbf6086c7205a6d730db9ca6c0cee4b5 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Tue, 14 Apr 2026 17:16:16 -0700
Subject: [PATCH 15/30] nvfp4_dynamic is default

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst             |  8 +++---
 .../numerics/{nvfp4_dynamic.yml => nvfp4.yml} |  2 +-
 .../configs/numerics/nvfp4_static.yml         |  2 +-
 .../configs/ptq/w4a4_nvfp4_nvfp4.yaml         | 25 +++++++++++++++++++
 .../general/ptq/nvfp4_default-fp8_kv.yaml     |  9 ++-----
 .../ptq/nvfp4_default-none_kv_gptq.yaml       |  4 +--
 .../ptq/nvfp4_experts_only-fp8_kv.yaml        |  2 +-
 .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml    |  2 +-
 .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml   |  2 +-
 9 files changed, 38 insertions(+), 18 deletions(-)
 rename modelopt_recipes/configs/numerics/{nvfp4_dynamic.yml => nvfp4.yml} (88%)
 create mode 100644 modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index aa59583446..9a4b2e8f4a 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -205,7 +205,7 @@ list order), then inline keys last.
    cfg:
      $import: nvfp4
 
-   # Import + override — import nvfp4_dynamic, then override type inline
+   # Import + override — import nvfp4, then override type inline
    cfg:
      $import: nvfp4    # imports {num_bits: e2m1, block_sizes: {-1: 16, type: dynamic, ...}}
      block_sizes:
@@ -301,8 +301,8 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``:
      - Description
    * - ``configs/numerics/fp8``
      - FP8 E4M3 quantizer attributes
-   * - ``configs/numerics/nvfp4_dynamic``
-     - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales
+   * - ``configs/numerics/nvfp4``
+     - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales (default)
    * - ``configs/numerics/nvfp4_static``
      - NVFP4 E2M1 blockwise, static calibration, FP8 scales
    * - ``configs/ptq/base_disable_all``
@@ -597,8 +597,8 @@ The ``modelopt_recipes/`` package is organized as follows:
    +-- configs/                    # Reusable config snippets (imported via $import)
        +-- numerics/               # Numeric format definitions
        |   +-- fp8.yml
-       |   +-- nvfp4_dynamic.yml
        |   +-- nvfp4_static.yml
+       |   +-- nvfp4.yml
        +-- ptq/                    # PTQ-specific entry snippets
            +-- base_disable_all.yaml
            +-- default_disabled_quantizers.yaml
diff --git a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml b/modelopt_recipes/configs/numerics/nvfp4.yml
similarity index 88%
rename from modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
rename to modelopt_recipes/configs/numerics/nvfp4.yml
index e07ba9e19b..0639e51c14 100644
--- a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml
+++ b/modelopt_recipes/configs/numerics/nvfp4.yml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales.
+# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (dynamic calibration, the default).
 num_bits: e2m1
 block_sizes:
   -1: 16
diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yml
index 758be89a30..9dda0cae91 100644
--- a/modelopt_recipes/configs/numerics/nvfp4_static.yml
+++ b/modelopt_recipes/configs/numerics/nvfp4_static.yml
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales.
+# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (static calibration).
 num_bits: e2m1
 block_sizes:
   -1: 16
diff --git a/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml b/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml
new file mode 100644
index 0000000000..2fc516e5dc
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# W4A4 NVFP4: NVFP4 E2M1 dynamic weight and activation quantizers.
+imports:
+  nvfp4: configs/numerics/nvfp4
+---
+  - quantizer_name: '*weight_quantizer'
+    cfg:
+      $import: nvfp4
+  - quantizer_name: '*input_quantizer'
+    cfg:
+      $import: nvfp4
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index 9dc6da1ace..f3c368a620 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -16,7 +16,7 @@
 imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
-  nvfp4: configs/numerics/nvfp4_dynamic
+  w4a4_nvfp4_nvfp4: configs/ptq/w4a4_nvfp4_nvfp4
   fp8_kv: configs/ptq/fp8_kv
 
 metadata:
@@ -26,11 +26,6 @@ quantize:
   algorithm: max
   quant_cfg:
     - $import: base_disable_all
-    - quantizer_name: '*weight_quantizer'
-      cfg:
-        $import: nvfp4
-    - quantizer_name: '*input_quantizer'
-      cfg:
-        $import: nvfp4
+    - $import: w4a4_nvfp4_nvfp4
     - $import: fp8_kv
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
index 45db9aa80c..1754763f65 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
@@ -17,7 +17,7 @@ imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
   nvfp4_static: configs/numerics/nvfp4_static
-  nvfp4_dynamic: configs/numerics/nvfp4_dynamic
+  nvfp4: configs/numerics/nvfp4
 
 metadata:
   recipe_type: ptq
@@ -33,7 +33,7 @@ quantize:
         $import: nvfp4_static
     - quantizer_name: '*input_quantizer'
       cfg:
-        $import: nvfp4_dynamic
+        $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: false
     - $import: default_disabled
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index a3730f839e..845f45b5f7 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -16,7 +16,7 @@
 imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
-  nvfp4: configs/numerics/nvfp4_dynamic
+  nvfp4: configs/numerics/nvfp4
   fp8_kv: configs/ptq/fp8_kv
 
 metadata:
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index 0d9d0861ca..f1ecd23acf 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -16,7 +16,7 @@
 imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
-  nvfp4: configs/numerics/nvfp4_dynamic
+  nvfp4: configs/numerics/nvfp4
   fp8_kv: configs/ptq/fp8_kv
 
 metadata:
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index 1a1fa63255..77cf8b2b76 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -16,7 +16,7 @@
 imports:
   base_disable_all: configs/ptq/base_disable_all
   default_disabled: configs/ptq/default_disabled_quantizers
-  nvfp4: configs/numerics/nvfp4_dynamic
+  nvfp4: configs/numerics/nvfp4
   fp8_kv: configs/ptq/fp8_kv
 
 metadata:

From 5baba0b532dfdfab950d2e6b70b6858a6b5dcd46 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 12:59:11 -0700
Subject: [PATCH 16/30] quant config

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/recipe/_config_loader.py             | 128 ++++++++++++++++-
 modelopt/recipe/loader.py                     | 135 +-----------------
 modelopt/torch/quantization/config.py         |  17 +--
 .../configs/ptq/presets/README.md             |  14 ++
 .../configs/ptq/presets/fp8_default.yaml      |  27 ++++
 5 files changed, 173 insertions(+), 148 deletions(-)
 create mode 100644 modelopt_recipes/configs/ptq/presets/README.md
 create mode 100644 modelopt_recipes/configs/ptq/presets/fp8_default.yaml

diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py
index a94b67edb2..922875becb 100644
--- a/modelopt/recipe/_config_loader.py
+++ b/modelopt/recipe/_config_loader.py
@@ -62,8 +62,8 @@ def _parse_exmy(s: str) -> tuple[int, int] | str:
     return s
 
 
-def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
-    """Load a config yaml.
+def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
+    """Load a config YAML without resolving ``$import`` references.
 
     config_file: Path to a config yaml file. The path suffix can be omitted.
     """
@@ -141,3 +141,127 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[
             f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}"
         )
     return _parse_exmy_num_bits(_raw)
+
+
+# ---------------------------------------------------------------------------
+# $import resolution
+# ---------------------------------------------------------------------------
+
+_IMPORT_KEY = "$import"
+
+
+def _resolve_imports(
+    data: dict[str, Any], _loading: frozenset[str] | None = None
+) -> dict[str, Any]:
+    """Resolve the ``imports`` section and ``$import`` references.
+
+    See ``modelopt.recipe.loader`` module docstring for the full specification.
+    This function lives in ``_config_loader`` (not ``loader``) so that it can be
+    used from ``modelopt.torch.quantization.config`` without circular imports.
+    """
+    imports_dict = data.pop("imports", None)
+    if not imports_dict:
+        return data
+
+    if not isinstance(imports_dict, dict):
+        raise ValueError(
+            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
+        )
+
+    if _loading is None:
+        _loading = frozenset()
+
+    # Build name → config mapping (recursively resolve nested imports)
+    import_map: dict[str, Any] = {}
+    for name, config_path in imports_dict.items():
+        if not config_path:
+            raise ValueError(f"Import {name!r} has an empty config path.")
+        if config_path in _loading:
+            raise ValueError(
+                f"Circular import detected: {config_path!r} is already being loaded. "
+                f"Import chain: {sorted(_loading)}"
+            )
+        snippet = _load_raw_config(config_path)
+        if isinstance(snippet, dict) and "imports" in snippet:
+            snippet = _resolve_imports(snippet, _loading | {config_path})
+        # Unwrap _list_content (multi-document YAML: imports + list content)
+        if isinstance(snippet, dict) and "_list_content" in snippet:
+            snippet = snippet["_list_content"]
+        import_map[name] = snippet
+
+    def _lookup(ref_name: str, context: str) -> Any:
+        if ref_name not in import_map:
+            raise ValueError(
+                f"Unknown $import reference {ref_name!r} in {context}. "
+                f"Available imports: {list(import_map.keys())}"
+            )
+        return import_map[ref_name]
+
+    def _resolve_list(entries: list[Any]) -> list[Any]:
+        """Resolve $import markers in a list of entries."""
+        resolved: list[Any] = []
+        for entry in entries:
+            if isinstance(entry, dict) and _IMPORT_KEY in entry:
+                if len(entry) > 1:
+                    raise ValueError(
+                        f"$import must be the only key in the dict, got extra keys: "
+                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
+                    )
+                imported = _lookup(entry[_IMPORT_KEY], "list entry")
+                if not isinstance(imported, list):
+                    raise ValueError(
+                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
+                        f"list, got {type(imported).__name__}."
+                    )
+                resolved.extend(imported)
+            elif (
+                isinstance(entry, dict)
+                and isinstance(entry.get("cfg"), dict)
+                and _IMPORT_KEY in entry["cfg"]
+            ):
+                ref = entry["cfg"].pop(_IMPORT_KEY)
+                inline_keys = dict(entry["cfg"])
+                ref_names = ref if isinstance(ref, list) else [ref]
+
+                merged: dict[str, Any] = {}
+                for rname in ref_names:
+                    snippet = _lookup(rname, f"cfg of {entry}")
+                    if not isinstance(snippet, dict):
+                        raise ValueError(
+                            f"$import {rname!r} in cfg must resolve to a dict, "
+                            f"got {type(snippet).__name__}."
+                        )
+                    merged.update(snippet)
+
+                merged.update(inline_keys)
+                entry["cfg"] = merged
+                resolved.append(entry)
+            else:
+                resolved.append(entry)
+        return resolved
+
+    # Resolve in quant_cfg (top-level or nested under quantize)
+    for container in [data, data.get("quantize", {})]:
+        if isinstance(container, dict):
+            quant_cfg = container.get("quant_cfg")
+            if isinstance(quant_cfg, list):
+                container["quant_cfg"] = _resolve_list(quant_cfg)
+
+    # Resolve in _list_content (multi-document snippets)
+    if "_list_content" in data:
+        data["_list_content"] = _resolve_list(data["_list_content"])
+
+    return data
+
+
+def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]:
+    """Load a YAML config and resolve all ``$import`` references.
+
+    This is the primary config loading entry point.  It loads the YAML file,
+    resolves any ``imports`` / ``$import`` directives, and returns the final
+    config dict or list.
+    """
+    data = _load_raw_config(config_path)
+    if isinstance(data, dict) and "imports" in data:
+        data = _resolve_imports(data)
+    return data
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index da62b17b63..f91f6d9920 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -22,139 +22,12 @@
 from pathlib import Path
 from typing import Any
 
-from ._config_loader import BUILTIN_RECIPES_LIB, load_config
+from ._config_loader import BUILTIN_RECIPES_LIB, _load_raw_config, _resolve_imports, load_config
 from .config import ModelOptPTQRecipe, ModelOptRecipeBase, RecipeType
 
 __all__ = ["load_config", "load_recipe"]
 
 
-_IMPORT_KEY = "$import"
-
-
-def _resolve_imports(
-    data: dict[str, Any], _loading: frozenset[str] | None = None
-) -> dict[str, Any]:
-    """Resolve the ``imports`` section and ``$import`` references in a recipe.
-
-    An ``imports`` block is a dict mapping short names to config file paths::
-
-        imports:
-          fp8: configs/numerics/fp8
-          nvfp4: configs/numerics/nvfp4_dynamic
-
-    References use the explicit ``$import`` marker so they are never confused
-    with literal string values::
-
-        quant_cfg:
-          - $import: base_disable_all           # entire entry replaced (or list spliced)
-          - quantizer_name: '*weight_quantizer'
-            cfg:
-              $import: fp8                      # cfg value replaced
-
-    Resolution is **recursive**: an imported snippet may itself contain an
-    ``imports`` section.  Circular imports are detected and raise ``ValueError``.
-    """
-    imports_dict = data.pop("imports", None)
-    if not imports_dict:
-        return data
-
-    if not isinstance(imports_dict, dict):
-        raise ValueError(
-            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
-        )
-
-    if _loading is None:
-        _loading = frozenset()
-
-    # Build name → config mapping (recursively resolve nested imports)
-    import_map: dict[str, Any] = {}
-    for name, config_path in imports_dict.items():
-        if not config_path:
-            raise ValueError(f"Import {name!r} has an empty config path.")
-        if config_path in _loading:
-            raise ValueError(
-                f"Circular import detected: {config_path!r} is already being loaded. "
-                f"Import chain: {sorted(_loading)}"
-            )
-        snippet = load_config(config_path)
-        if isinstance(snippet, dict) and "imports" in snippet:
-            snippet = _resolve_imports(snippet, _loading | {config_path})
-        # Unwrap _list_content (multi-document YAML: imports + list content)
-        if isinstance(snippet, dict) and "_list_content" in snippet:
-            snippet = snippet["_list_content"]
-        import_map[name] = snippet
-
-    def _lookup(ref_name: str, context: str) -> Any:
-        if ref_name not in import_map:
-            raise ValueError(
-                f"Unknown $import reference {ref_name!r} in {context}. "
-                f"Available imports: {list(import_map.keys())}"
-            )
-        return import_map[ref_name]
-
-    def _resolve_list(entries: list[Any]) -> list[Any]:
-        """Resolve $import markers in a list of quant_cfg-style entries."""
-        resolved: list[Any] = []
-        for entry in entries:
-            if isinstance(entry, dict) and _IMPORT_KEY in entry:
-                # {$import: name} → splice imported list
-                if len(entry) > 1:
-                    raise ValueError(
-                        f"$import must be the only key in the dict, got extra keys: "
-                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
-                    )
-                imported = _lookup(entry[_IMPORT_KEY], "list entry")
-                if not isinstance(imported, list):
-                    raise ValueError(
-                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
-                        f"list, got {type(imported).__name__}."
-                    )
-                resolved.extend(imported)
-            elif (
-                isinstance(entry, dict)
-                and isinstance(entry.get("cfg"), dict)
-                and _IMPORT_KEY in entry["cfg"]
-            ):
-                # cfg: {$import: name_or_list, ...inline} → import then override
-                #
-                # Precedence (lowest → highest):
-                #   1. Imports in list order (later imports override earlier)
-                #   2. Inline keys (override all imports)
-                ref = entry["cfg"].pop(_IMPORT_KEY)
-                inline_keys = dict(entry["cfg"])
-                ref_names = ref if isinstance(ref, list) else [ref]
-
-                merged: dict[str, Any] = {}
-                for name in ref_names:
-                    snippet = _lookup(name, f"cfg of {entry}")
-                    if not isinstance(snippet, dict):
-                        raise ValueError(
-                            f"$import {name!r} in cfg must resolve to a dict, "
-                            f"got {type(snippet).__name__}."
-                        )
-                    merged.update(snippet)
-
-                merged.update(inline_keys)
-                entry["cfg"] = merged
-                resolved.append(entry)
-            else:
-                resolved.append(entry)
-        return resolved
-
-    # Resolve $import references in quant_cfg entries
-    quantize = data.get("quantize")
-    if isinstance(quantize, dict):
-        quant_cfg = quantize.get("quant_cfg")
-        if isinstance(quant_cfg, list):
-            quantize["quant_cfg"] = _resolve_list(quant_cfg)
-
-    # Resolve $import references in _list_content (multi-document snippets)
-    if "_list_content" in data:
-        data["_list_content"] = _resolve_list(data["_list_content"])
-
-    return data
-
-
 def _resolve_recipe_path(recipe_path: str | Path | Traversable) -> Path | Traversable:
     """Resolve a recipe path, checking the built-in library first then the filesystem.
 
@@ -214,7 +87,7 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas
     The file must contain a ``metadata`` section with at least ``recipe_type``,
     plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes.
     """
-    raw = load_config(recipe_file)
+    raw = _load_raw_config(recipe_file)
     assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping."
     data = _resolve_imports(raw)
 
@@ -247,7 +120,7 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
             f"Cannot find a recipe descriptor in {recipe_dir}. Looked for: recipe.yml, recipe.yaml"
         )
 
-    recipe_data = load_config(recipe_file)
+    recipe_data = _load_raw_config(recipe_file)
     assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping."
     metadata = recipe_data.get("metadata", {})
     recipe_type = metadata.get("recipe_type")
@@ -266,7 +139,7 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
                 f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml"
             )
         # Resolve imports: imports are in recipe.yml, quantize data is separate
-        quantize_data = load_config(quantize_file)
+        quantize_data = _load_raw_config(quantize_file)
         assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping."
         combined: dict[str, Any] = {"quantize": quantize_data}
         imports = recipe_data.get("imports")
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
index 99c729efbc..5535156438 100644
--- a/modelopt/torch/quantization/config.py
+++ b/modelopt/torch/quantization/config.py
@@ -157,6 +157,7 @@
 from pydantic import ValidationInfo, field_validator, model_validator
 from typing_extensions import Required, TypedDict
 
+from modelopt.recipe._config_loader import load_config
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 from modelopt.torch.utils.network import ConstructorLike
 
@@ -272,21 +273,7 @@ def find_quant_cfg_entry_by_path(
     "algorithm": "max",
 }
 
-FP8_DEFAULT_CFG = {
-    "quant_cfg": [
-        *_base_disable_all,
-        {
-            "quantizer_name": "*weight_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        {
-            "quantizer_name": "*input_quantizer",
-            "cfg": {"num_bits": (4, 3), "axis": None},
-        },
-        *_default_disabled_quantizer_cfg,
-    ],
-    "algorithm": "max",
-}
+FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/fp8_default")
 
 MAMBA_MOE_FP8_AGGRESSIVE_CFG = {
     "quant_cfg": [
diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md
new file mode 100644
index 0000000000..80d186d6fc
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/README.md
@@ -0,0 +1,14 @@
+# PTQ Preset Configs
+
+This directory holds preset quantization configurations that serve as the
+single source of truth for the hardcoded `*_CFG` dicts in
+`modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`).
+
+Each preset is a complete, self-contained config with `algorithm` and
+`quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose
+from the reusable snippets in `configs/numerics/` and `configs/ptq/` via
+the `$import` system.
+
+When adding a new preset, use existing snippets where possible and keep
+the YAML as the authoritative definition — the Python config should load
+from here rather than hardcoding the dict.
diff --git a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml b/modelopt_recipes/configs/ptq/presets/fp8_default.yaml
new file mode 100644
index 0000000000..21ce58f4e7
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/fp8_default.yaml
@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# FP8 per-tensor weight and activation (W8A8), max calibration.
+# Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py.
+imports:
+  base_disable_all: configs/ptq/base_disable_all
+  w8a8: configs/ptq/w8a8_fp8_fp8
+  default_disabled: configs/ptq/default_disabled_quantizers
+
+algorithm: max
+quant_cfg:
+  - $import: base_disable_all
+  - $import: w8a8
+  - $import: default_disabled

From 82d5a12620e9ca057768daa1e6dad092df3ac508 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 14:06:03 -0700
Subject: [PATCH 17/30] presets

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst             | 20 ++++++++--------
 modelopt/torch/quantization/config.py         | 11 ++-------
 .../configs/ptq/presets/README.md             | 18 +++++++++-----
 .../configs/ptq/presets/kv/fp8.yaml           | 24 +++++++++++++++++++
 .../{fp8_default.yaml => model/fp8.yaml}      |  6 ++---
 .../ptq/{ => units}/base_disable_all.yaml     |  0
 .../default_disabled_quantizers.yaml          |  0
 .../configs/ptq/{ => units}/fp8_kv.yaml       |  0
 .../ptq/{ => units}/w4a4_nvfp4_nvfp4.yaml     |  0
 .../configs/ptq/{ => units}/w8a8_fp8_fp8.yaml |  0
 .../general/ptq/fp8_default-fp8_kv.yaml       |  8 +++----
 .../general/ptq/nvfp4_default-fp8_kv.yaml     |  8 +++----
 .../ptq/nvfp4_default-none_kv_gptq.yaml       |  4 ++--
 .../ptq/nvfp4_experts_only-fp8_kv.yaml        |  6 ++---
 .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml    |  6 ++---
 .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml   |  6 ++---
 16 files changed, 70 insertions(+), 47 deletions(-)
 create mode 100644 modelopt_recipes/configs/ptq/presets/kv/fp8.yaml
 rename modelopt_recipes/configs/ptq/presets/{fp8_default.yaml => model/fp8.yaml} (85%)
 rename modelopt_recipes/configs/ptq/{ => units}/base_disable_all.yaml (100%)
 rename modelopt_recipes/configs/ptq/{ => units}/default_disabled_quantizers.yaml (100%)
 rename modelopt_recipes/configs/ptq/{ => units}/fp8_kv.yaml (100%)
 rename modelopt_recipes/configs/ptq/{ => units}/w4a4_nvfp4_nvfp4.yaml (100%)
 rename modelopt_recipes/configs/ptq/{ => units}/w8a8_fp8_fp8.yaml (100%)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index 9a4b2e8f4a..a8986312f0 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -94,8 +94,8 @@ The simplest form is a single ``.yml`` or ``.yaml`` file.
 .. code-block:: yaml
 
    imports:
-     base_disable_all: configs/ptq/base_disable_all
-     default_disabled: configs/ptq/default_disabled_quantizers
+     base_disable_all: configs/ptq/units/base_disable_all
+     default_disabled: configs/ptq/units/default_disabled_quantizers
      fp8: configs/numerics/fp8
 
    metadata:
@@ -227,8 +227,8 @@ a list splice are not supported.
 .. code-block:: yaml
 
    imports:
-     base_disable_all: configs/ptq/base_disable_all
-     default_disabled: configs/ptq/default_disabled_quantizers
+     base_disable_all: configs/ptq/units/base_disable_all
+     default_disabled: configs/ptq/units/default_disabled_quantizers
      fp8: configs/numerics/fp8
 
    metadata:
@@ -275,7 +275,7 @@ and returns the resolved list:
 
 .. code-block:: yaml
 
-   # configs/ptq/fp8_kv.yaml — list snippet that imports a dict snippet
+   # configs/ptq/units/fp8_kv.yaml — list snippet that imports a dict snippet
    imports:
      fp8: configs/numerics/fp8
    ---
@@ -305,11 +305,11 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``:
      - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales (default)
    * - ``configs/numerics/nvfp4_static``
      - NVFP4 E2M1 blockwise, static calibration, FP8 scales
-   * - ``configs/ptq/base_disable_all``
+   * - ``configs/ptq/units/base_disable_all``
      - Disable all quantizers (deny-all-then-configure pattern)
-   * - ``configs/ptq/default_disabled_quantizers``
+   * - ``configs/ptq/units/default_disabled_quantizers``
      - Standard exclusions (LM head, routers, BatchNorm, etc.)
-   * - ``configs/ptq/fp8_kv``
+   * - ``configs/ptq/units/fp8_kv``
      - FP8 E4M3 KV cache quantization (multi-document, imports ``fp8``)
 
 
@@ -549,8 +549,8 @@ Example -- creating a custom PTQ recipe using imports:
 
    # my_int8_recipe.yml
    imports:
-     base_disable_all: configs/ptq/base_disable_all
-     default_disabled: configs/ptq/default_disabled_quantizers
+     base_disable_all: configs/ptq/units/base_disable_all
+     default_disabled: configs/ptq/units/default_disabled_quantizers
 
    metadata:
      recipe_type: ptq
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
index 5535156438..5430391adb 100644
--- a/modelopt/torch/quantization/config.py
+++ b/modelopt/torch/quantization/config.py
@@ -273,7 +273,7 @@ def find_quant_cfg_entry_by_path(
     "algorithm": "max",
 }
 
-FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/fp8_default")
+FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/model/fp8")
 
 MAMBA_MOE_FP8_AGGRESSIVE_CFG = {
     "quant_cfg": [
@@ -518,14 +518,7 @@ def find_quant_cfg_entry_by_path(
 # KV-cache configs are designed to be merged with a primary quantization config (e.g.
 # FP8_DEFAULT_CFG) that already contains _base_disable_all.  They intentionally omit both
 # _base_disable_all and "algorithm" because these are provided by the primary config.
-FP8_KV_CFG = {
-    "quant_cfg": [
-        {
-            "quantizer_name": "*[kv]_bmm_quantizer",
-            "cfg": {"num_bits": (4, 3)},
-        },
-    ]
-}
+FP8_KV_CFG: dict[str, Any] = load_config("configs/ptq/presets/kv/fp8")
 
 FP8_AFFINE_KV_CFG = {
     "quant_cfg": [
diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md
index 80d186d6fc..f8974fc78c 100644
--- a/modelopt_recipes/configs/ptq/presets/README.md
+++ b/modelopt_recipes/configs/ptq/presets/README.md
@@ -1,14 +1,20 @@
 # PTQ Preset Configs
 
 This directory holds preset quantization configurations that serve as the
-single source of truth for the hardcoded `*_CFG` dicts in
+YAML source of truth for the hardcoded `*_CFG` dicts in
 `modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`).
 
 Each preset is a complete, self-contained config with `algorithm` and
 `quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose
-from the reusable snippets in `configs/numerics/` and `configs/ptq/` via
-the `$import` system.
+from the reusable snippets in `configs/numerics/` and `configs/ptq/units/`
+via the `$import` system.
 
-When adding a new preset, use existing snippets where possible and keep
-the YAML as the authoritative definition — the Python config should load
-from here rather than hardcoding the dict.
+**Note:** The main purpose of these presets is to support the existing
+`hf_ptq.py` script's `--qformat` / `--kv_cache_qformat` flags and other
+code paths that reference
+the hardcoded `*_CFG` dicts, maintaining backward compatibility during
+the transition to recipe-based workflows. Users are encouraged to use
+`load_recipe` with full recipe files under `general/` or `models/`
+instead. Some or all of these presets may be deprecated or removed in
+future releases as the recipe-based workflow becomes the standard entry
+point.
diff --git a/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml b/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml
new file mode 100644
index 0000000000..f23ba54145
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# FP8 E4M3 KV cache quantization preset.
+# Equivalent to the hardcoded FP8_KV_CFG in config.py.
+# This is a partial config (no algorithm, no base_disable_all) — designed
+# to be merged with a primary model quantization config.
+imports:
+  fp8_kv: configs/ptq/units/fp8_kv
+
+quant_cfg:
+  - $import: fp8_kv
diff --git a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
similarity index 85%
rename from modelopt_recipes/configs/ptq/presets/fp8_default.yaml
rename to modelopt_recipes/configs/ptq/presets/model/fp8.yaml
index 21ce58f4e7..763fe8ee5f 100644
--- a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml
+++ b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
@@ -16,9 +16,9 @@
 # FP8 per-tensor weight and activation (W8A8), max calibration.
 # Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py.
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  w8a8: configs/ptq/w8a8_fp8_fp8
-  default_disabled: configs/ptq/default_disabled_quantizers
+  base_disable_all: configs/ptq/units/base_disable_all
+  w8a8: configs/ptq/units/w8a8_fp8_fp8
+  default_disabled: configs/ptq/units/default_disabled_quantizers
 
 algorithm: max
 quant_cfg:
diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/units/base_disable_all.yaml
similarity index 100%
rename from modelopt_recipes/configs/ptq/base_disable_all.yaml
rename to modelopt_recipes/configs/ptq/units/base_disable_all.yaml
diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml
similarity index 100%
rename from modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml
rename to modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml
diff --git a/modelopt_recipes/configs/ptq/fp8_kv.yaml b/modelopt_recipes/configs/ptq/units/fp8_kv.yaml
similarity index 100%
rename from modelopt_recipes/configs/ptq/fp8_kv.yaml
rename to modelopt_recipes/configs/ptq/units/fp8_kv.yaml
diff --git a/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml
similarity index 100%
rename from modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml
rename to modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml
diff --git a/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml b/modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml
similarity index 100%
rename from modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml
rename to modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml
diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index 6b30a04022..8fe8c121d2 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -14,10 +14,10 @@
 # limitations under the License.
 
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  default_disabled: configs/ptq/default_disabled_quantizers
-  w8a8_fp8_fp8: configs/ptq/w8a8_fp8_fp8
-  fp8_kv: configs/ptq/fp8_kv
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled: configs/ptq/units/default_disabled_quantizers
+  w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8
+  fp8_kv: configs/ptq/units/fp8_kv
 
 metadata:
   recipe_type: ptq
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index f3c368a620..8da3bebff1 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -14,10 +14,10 @@
 # limitations under the License.
 
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  default_disabled: configs/ptq/default_disabled_quantizers
-  w4a4_nvfp4_nvfp4: configs/ptq/w4a4_nvfp4_nvfp4
-  fp8_kv: configs/ptq/fp8_kv
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled: configs/ptq/units/default_disabled_quantizers
+  w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
+  fp8_kv: configs/ptq/units/fp8_kv
 
 metadata:
   recipe_type: ptq
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
index 1754763f65..04cfcfa925 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
@@ -14,8 +14,8 @@
 # limitations under the License.
 
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  default_disabled: configs/ptq/default_disabled_quantizers
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled: configs/ptq/units/default_disabled_quantizers
   nvfp4_static: configs/numerics/nvfp4_static
   nvfp4: configs/numerics/nvfp4
 
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index 845f45b5f7..689e981b34 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -14,10 +14,10 @@
 # limitations under the License.
 
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  default_disabled: configs/ptq/default_disabled_quantizers
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled: configs/ptq/units/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4
-  fp8_kv: configs/ptq/fp8_kv
+  fp8_kv: configs/ptq/units/fp8_kv
 
 metadata:
   recipe_type: ptq
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index f1ecd23acf..ee26898cd9 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -14,10 +14,10 @@
 # limitations under the License.
 
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  default_disabled: configs/ptq/default_disabled_quantizers
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled: configs/ptq/units/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4
-  fp8_kv: configs/ptq/fp8_kv
+  fp8_kv: configs/ptq/units/fp8_kv
 
 metadata:
   recipe_type: ptq
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index 77cf8b2b76..1075303f72 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -14,10 +14,10 @@
 # limitations under the License.
 
 imports:
-  base_disable_all: configs/ptq/base_disable_all
-  default_disabled: configs/ptq/default_disabled_quantizers
+  base_disable_all: configs/ptq/units/base_disable_all
+  default_disabled: configs/ptq/units/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4
-  fp8_kv: configs/ptq/fp8_kv
+  fp8_kv: configs/ptq/units/fp8_kv
 
 metadata:
   recipe_type: ptq

From cbf3f29975b1ffeb17f4b77d577105e302454e14 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 14:11:42 -0700
Subject: [PATCH 18/30] yml -> yaml

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt_recipes/configs/numerics/{fp8.yml => fp8.yaml}           | 0
 modelopt_recipes/configs/numerics/{nvfp4.yml => nvfp4.yaml}       | 0
 .../configs/numerics/{nvfp4_static.yml => nvfp4_static.yaml}      | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename modelopt_recipes/configs/numerics/{fp8.yml => fp8.yaml} (100%)
 rename modelopt_recipes/configs/numerics/{nvfp4.yml => nvfp4.yaml} (100%)
 rename modelopt_recipes/configs/numerics/{nvfp4_static.yml => nvfp4_static.yaml} (100%)

diff --git a/modelopt_recipes/configs/numerics/fp8.yml b/modelopt_recipes/configs/numerics/fp8.yaml
similarity index 100%
rename from modelopt_recipes/configs/numerics/fp8.yml
rename to modelopt_recipes/configs/numerics/fp8.yaml
diff --git a/modelopt_recipes/configs/numerics/nvfp4.yml b/modelopt_recipes/configs/numerics/nvfp4.yaml
similarity index 100%
rename from modelopt_recipes/configs/numerics/nvfp4.yml
rename to modelopt_recipes/configs/numerics/nvfp4.yaml
diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yaml
similarity index 100%
rename from modelopt_recipes/configs/numerics/nvfp4_static.yml
rename to modelopt_recipes/configs/numerics/nvfp4_static.yaml

From ae9e24527fdb0170c84d40ae256e78d273efb6c9 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 14:32:54 -0700
Subject: [PATCH 19/30] remove circular dependency

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/recipe/_config_loader.py     | 258 +------------------------
 modelopt/torch/opt/config_loader.py   | 268 ++++++++++++++++++++++++++
 modelopt/torch/quantization/config.py |   2 +-
 3 files changed, 277 insertions(+), 251 deletions(-)
 create mode 100644 modelopt/torch/opt/config_loader.py

diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py
index 922875becb..5ed2c80361 100644
--- a/modelopt/recipe/_config_loader.py
+++ b/modelopt/recipe/_config_loader.py
@@ -13,255 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""YAML config loading utilities.
+"""Re-export config loading utilities from ``modelopt.torch.opt.config_loader``."""
 
-This module is intentionally free of ``modelopt.torch`` imports so that
-``modelopt.torch.quantization.config`` can import :func:`load_config` without
-triggering a circular import through ``modelopt.recipe.loader``.
-"""
+from modelopt.torch.opt.config_loader import (
+    BUILTIN_RECIPES_LIB,
+    _load_raw_config,
+    _resolve_imports,
+    load_config,
+)
 
-from importlib.resources import files
-
-try:
-    from importlib.resources.abc import Traversable
-except ImportError:  # Python < 3.11
-    from importlib.abc import Traversable
-import re
-from pathlib import Path
-from typing import Any
-
-import yaml
-
-# Root to all built-in recipes. Users can create own recipes.
-BUILTIN_RECIPES_LIB = files("modelopt_recipes")
-
-_EXMY_RE = re.compile(r"^[Ee](\d+)[Mm](\d+)$")
-_EXMY_KEYS = frozenset({"num_bits", "scale_bits"})
-
-
-def _parse_exmy_num_bits(obj: Any) -> Any:
-    """Recursively convert ``ExMy`` strings in ``num_bits`` / ``scale_bits`` to ``(x, y)`` tuples."""
-    if isinstance(obj, dict):
-        return {
-            k: (
-                _parse_exmy(v)
-                if k in _EXMY_KEYS and isinstance(v, str)
-                else _parse_exmy_num_bits(v)
-            )
-            for k, v in obj.items()
-        }
-    if isinstance(obj, list):
-        return [_parse_exmy_num_bits(item) for item in obj]
-    return obj
-
-
-def _parse_exmy(s: str) -> tuple[int, int] | str:
-    m = _EXMY_RE.match(s)
-    if m:
-        return (int(m.group(1)), int(m.group(2)))
-    return s
-
-
-def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
-    """Load a config YAML without resolving ``$import`` references.
-
-    config_file: Path to a config yaml file. The path suffix can be omitted.
-    """
-    paths_to_check: list[Path | Traversable] = []
-    if isinstance(config_file, str):
-        if not config_file.endswith(".yml") and not config_file.endswith(".yaml"):
-            paths_to_check.append(Path(f"{config_file}.yml"))
-            paths_to_check.append(Path(f"{config_file}.yaml"))
-            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml"))
-            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml"))
-        else:
-            paths_to_check.append(Path(config_file))
-            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(config_file))
-    elif isinstance(config_file, Path):
-        if config_file.suffix in (".yml", ".yaml"):
-            paths_to_check.append(config_file)
-            if not config_file.is_absolute():
-                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(str(config_file)))
-        else:
-            paths_to_check.append(Path(f"{config_file}.yml"))
-            paths_to_check.append(Path(f"{config_file}.yaml"))
-            if not config_file.is_absolute():
-                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml"))
-                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml"))
-    elif isinstance(config_file, Traversable):
-        paths_to_check.append(config_file)
-    else:
-        raise ValueError(f"Invalid config file of {config_file}")
-
-    config_path = None
-    for path in paths_to_check:
-        if path.is_file():
-            config_path = path
-            break
-    if not config_path:
-        raise ValueError(
-            f"Cannot find config file of {config_file}, paths checked: {paths_to_check}"
-        )
-
-    text = config_path.read_text(encoding="utf-8")
-    docs = list(yaml.safe_load_all(text))
-
-    if len(docs) == 0 or docs[0] is None:
-        return {}
-    if len(docs) == 1:
-        _raw = docs[0]
-    elif len(docs) == 2:
-        # Multi-document: first doc is imports/metadata, second is content.
-        # Merge the imports into the content for downstream resolution.
-        header, content = docs[0], docs[1]
-        if not isinstance(header, dict):
-            raise ValueError(
-                f"Config file {config_path}: first YAML document must be a mapping, "
-                f"got {type(header).__name__}"
-            )
-        if content is None:
-            content = {}
-        if isinstance(content, dict):
-            _raw = {**header, **content}
-        elif isinstance(content, list):
-            # List content with a header dict — attach imports via wrapper
-            _raw = {**header, "_list_content": content}
-        else:
-            raise ValueError(
-                f"Config file {config_path}: second YAML document must be a mapping or list, "
-                f"got {type(content).__name__}"
-            )
-    else:
-        raise ValueError(
-            f"Config file {config_path}: expected 1 or 2 YAML documents, got {len(docs)}"
-        )
-
-    if not isinstance(_raw, (dict, list)):
-        raise ValueError(
-            f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}"
-        )
-    return _parse_exmy_num_bits(_raw)
-
-
-# ---------------------------------------------------------------------------
-# $import resolution
-# ---------------------------------------------------------------------------
-
-_IMPORT_KEY = "$import"
-
-
-def _resolve_imports(
-    data: dict[str, Any], _loading: frozenset[str] | None = None
-) -> dict[str, Any]:
-    """Resolve the ``imports`` section and ``$import`` references.
-
-    See ``modelopt.recipe.loader`` module docstring for the full specification.
-    This function lives in ``_config_loader`` (not ``loader``) so that it can be
-    used from ``modelopt.torch.quantization.config`` without circular imports.
-    """
-    imports_dict = data.pop("imports", None)
-    if not imports_dict:
-        return data
-
-    if not isinstance(imports_dict, dict):
-        raise ValueError(
-            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
-        )
-
-    if _loading is None:
-        _loading = frozenset()
-
-    # Build name → config mapping (recursively resolve nested imports)
-    import_map: dict[str, Any] = {}
-    for name, config_path in imports_dict.items():
-        if not config_path:
-            raise ValueError(f"Import {name!r} has an empty config path.")
-        if config_path in _loading:
-            raise ValueError(
-                f"Circular import detected: {config_path!r} is already being loaded. "
-                f"Import chain: {sorted(_loading)}"
-            )
-        snippet = _load_raw_config(config_path)
-        if isinstance(snippet, dict) and "imports" in snippet:
-            snippet = _resolve_imports(snippet, _loading | {config_path})
-        # Unwrap _list_content (multi-document YAML: imports + list content)
-        if isinstance(snippet, dict) and "_list_content" in snippet:
-            snippet = snippet["_list_content"]
-        import_map[name] = snippet
-
-    def _lookup(ref_name: str, context: str) -> Any:
-        if ref_name not in import_map:
-            raise ValueError(
-                f"Unknown $import reference {ref_name!r} in {context}. "
-                f"Available imports: {list(import_map.keys())}"
-            )
-        return import_map[ref_name]
-
-    def _resolve_list(entries: list[Any]) -> list[Any]:
-        """Resolve $import markers in a list of entries."""
-        resolved: list[Any] = []
-        for entry in entries:
-            if isinstance(entry, dict) and _IMPORT_KEY in entry:
-                if len(entry) > 1:
-                    raise ValueError(
-                        f"$import must be the only key in the dict, got extra keys: "
-                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
-                    )
-                imported = _lookup(entry[_IMPORT_KEY], "list entry")
-                if not isinstance(imported, list):
-                    raise ValueError(
-                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
-                        f"list, got {type(imported).__name__}."
-                    )
-                resolved.extend(imported)
-            elif (
-                isinstance(entry, dict)
-                and isinstance(entry.get("cfg"), dict)
-                and _IMPORT_KEY in entry["cfg"]
-            ):
-                ref = entry["cfg"].pop(_IMPORT_KEY)
-                inline_keys = dict(entry["cfg"])
-                ref_names = ref if isinstance(ref, list) else [ref]
-
-                merged: dict[str, Any] = {}
-                for rname in ref_names:
-                    snippet = _lookup(rname, f"cfg of {entry}")
-                    if not isinstance(snippet, dict):
-                        raise ValueError(
-                            f"$import {rname!r} in cfg must resolve to a dict, "
-                            f"got {type(snippet).__name__}."
-                        )
-                    merged.update(snippet)
-
-                merged.update(inline_keys)
-                entry["cfg"] = merged
-                resolved.append(entry)
-            else:
-                resolved.append(entry)
-        return resolved
-
-    # Resolve in quant_cfg (top-level or nested under quantize)
-    for container in [data, data.get("quantize", {})]:
-        if isinstance(container, dict):
-            quant_cfg = container.get("quant_cfg")
-            if isinstance(quant_cfg, list):
-                container["quant_cfg"] = _resolve_list(quant_cfg)
-
-    # Resolve in _list_content (multi-document snippets)
-    if "_list_content" in data:
-        data["_list_content"] = _resolve_list(data["_list_content"])
-
-    return data
-
-
-def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]:
-    """Load a YAML config and resolve all ``$import`` references.
-
-    This is the primary config loading entry point.  It loads the YAML file,
-    resolves any ``imports`` / ``$import`` directives, and returns the final
-    config dict or list.
-    """
-    data = _load_raw_config(config_path)
-    if isinstance(data, dict) and "imports" in data:
-        data = _resolve_imports(data)
-    return data
+__all__ = ["BUILTIN_RECIPES_LIB", "_load_raw_config", "_resolve_imports", "load_config"]
diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py
new file mode 100644
index 0000000000..3c03f4b445
--- /dev/null
+++ b/modelopt/torch/opt/config_loader.py
@@ -0,0 +1,268 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""General-purpose YAML config loading with ``$import`` resolution.
+
+This module provides the config loading infrastructure used by both
+``modelopt.recipe`` and ``modelopt.torch.quantization.config``.  It lives
+in ``modelopt.torch.opt`` (the lowest dependency layer) to avoid circular
+imports.
+"""
+
+from importlib.resources import files
+
+try:
+    from importlib.resources.abc import Traversable
+except ImportError:  # Python < 3.11
+    from importlib.abc import Traversable
+import re
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+# Root to all built-in recipes. Users can create own recipes.
+BUILTIN_RECIPES_LIB = files("modelopt_recipes")
+
+_EXMY_RE = re.compile(r"^[Ee](\d+)[Mm](\d+)$")
+_EXMY_KEYS = frozenset({"num_bits", "scale_bits"})
+
+
+def _parse_exmy_num_bits(obj: Any) -> Any:
+    """Recursively convert ``ExMy`` strings in ``num_bits`` / ``scale_bits`` to ``(x, y)`` tuples."""
+    if isinstance(obj, dict):
+        return {
+            k: (
+                _parse_exmy(v)
+                if k in _EXMY_KEYS and isinstance(v, str)
+                else _parse_exmy_num_bits(v)
+            )
+            for k, v in obj.items()
+        }
+    if isinstance(obj, list):
+        return [_parse_exmy_num_bits(item) for item in obj]
+    return obj
+
+
+def _parse_exmy(s: str) -> tuple[int, int] | str:
+    m = _EXMY_RE.match(s)
+    if m:
+        return (int(m.group(1)), int(m.group(2)))
+    return s
+
+
+def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]:
+    """Load a config YAML without resolving ``$import`` references.
+
+    config_file: Path to a config yaml file. The path suffix can be omitted.
+    """
+    paths_to_check: list[Path | Traversable] = []
+    if isinstance(config_file, str):
+        if not config_file.endswith(".yml") and not config_file.endswith(".yaml"):
+            paths_to_check.append(Path(f"{config_file}.yml"))
+            paths_to_check.append(Path(f"{config_file}.yaml"))
+            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml"))
+            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml"))
+        else:
+            paths_to_check.append(Path(config_file))
+            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(config_file))
+    elif isinstance(config_file, Path):
+        if config_file.suffix in (".yml", ".yaml"):
+            paths_to_check.append(config_file)
+            if not config_file.is_absolute():
+                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(str(config_file)))
+        else:
+            paths_to_check.append(Path(f"{config_file}.yml"))
+            paths_to_check.append(Path(f"{config_file}.yaml"))
+            if not config_file.is_absolute():
+                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml"))
+                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml"))
+    elif isinstance(config_file, Traversable):
+        paths_to_check.append(config_file)
+    else:
+        raise ValueError(f"Invalid config file of {config_file}")
+
+    config_path = None
+    for path in paths_to_check:
+        if path.is_file():
+            config_path = path
+            break
+    if not config_path:
+        raise ValueError(
+            f"Cannot find config file of {config_file}, paths checked: {paths_to_check}"
+        )
+
+    text = config_path.read_text(encoding="utf-8")
+    docs = list(yaml.safe_load_all(text))
+
+    if len(docs) == 0 or docs[0] is None:
+        return {}
+    if len(docs) == 1:
+        _raw = docs[0]
+    elif len(docs) == 2:
+        # Multi-document: first doc is imports/metadata, second is content.
+        # Merge the imports into the content for downstream resolution.
+        header, content = docs[0], docs[1]
+        if not isinstance(header, dict):
+            raise ValueError(
+                f"Config file {config_path}: first YAML document must be a mapping, "
+                f"got {type(header).__name__}"
+            )
+        if content is None:
+            content = {}
+        if isinstance(content, dict):
+            _raw = {**header, **content}
+        elif isinstance(content, list):
+            # List content with a header dict — attach imports via wrapper
+            _raw = {**header, "_list_content": content}
+        else:
+            raise ValueError(
+                f"Config file {config_path}: second YAML document must be a mapping or list, "
+                f"got {type(content).__name__}"
+            )
+    else:
+        raise ValueError(
+            f"Config file {config_path}: expected 1 or 2 YAML documents, got {len(docs)}"
+        )
+
+    if not isinstance(_raw, (dict, list)):
+        raise ValueError(
+            f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}"
+        )
+    return _parse_exmy_num_bits(_raw)
+
+
+# ---------------------------------------------------------------------------
+# $import resolution
+# ---------------------------------------------------------------------------
+
+_IMPORT_KEY = "$import"
+
+
+def _resolve_imports(
+    data: dict[str, Any], _loading: frozenset[str] | None = None
+) -> dict[str, Any]:
+    """Resolve the ``imports`` section and ``$import`` references.
+
+    See ``modelopt.recipe.loader`` module docstring for the full specification.
+    This function lives in ``_config_loader`` (not ``loader``) so that it can be
+    used from ``modelopt.torch.quantization.config`` without circular imports.
+    """
+    imports_dict = data.pop("imports", None)
+    if not imports_dict:
+        return data
+
+    if not isinstance(imports_dict, dict):
+        raise ValueError(
+            f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}"
+        )
+
+    if _loading is None:
+        _loading = frozenset()
+
+    # Build name → config mapping (recursively resolve nested imports)
+    import_map: dict[str, Any] = {}
+    for name, config_path in imports_dict.items():
+        if not config_path:
+            raise ValueError(f"Import {name!r} has an empty config path.")
+        if config_path in _loading:
+            raise ValueError(
+                f"Circular import detected: {config_path!r} is already being loaded. "
+                f"Import chain: {sorted(_loading)}"
+            )
+        snippet = _load_raw_config(config_path)
+        if isinstance(snippet, dict) and "imports" in snippet:
+            snippet = _resolve_imports(snippet, _loading | {config_path})
+        # Unwrap _list_content (multi-document YAML: imports + list content)
+        if isinstance(snippet, dict) and "_list_content" in snippet:
+            snippet = snippet["_list_content"]
+        import_map[name] = snippet
+
+    def _lookup(ref_name: str, context: str) -> Any:
+        if ref_name not in import_map:
+            raise ValueError(
+                f"Unknown $import reference {ref_name!r} in {context}. "
+                f"Available imports: {list(import_map.keys())}"
+            )
+        return import_map[ref_name]
+
+    def _resolve_list(entries: list[Any]) -> list[Any]:
+        """Resolve $import markers in a list of entries."""
+        resolved: list[Any] = []
+        for entry in entries:
+            if isinstance(entry, dict) and _IMPORT_KEY in entry:
+                if len(entry) > 1:
+                    raise ValueError(
+                        f"$import must be the only key in the dict, got extra keys: "
+                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
+                    )
+                imported = _lookup(entry[_IMPORT_KEY], "list entry")
+                if not isinstance(imported, list):
+                    raise ValueError(
+                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
+                        f"list, got {type(imported).__name__}."
+                    )
+                resolved.extend(imported)
+            elif (
+                isinstance(entry, dict)
+                and isinstance(entry.get("cfg"), dict)
+                and _IMPORT_KEY in entry["cfg"]
+            ):
+                ref = entry["cfg"].pop(_IMPORT_KEY)
+                inline_keys = dict(entry["cfg"])
+                ref_names = ref if isinstance(ref, list) else [ref]
+
+                merged: dict[str, Any] = {}
+                for rname in ref_names:
+                    snippet = _lookup(rname, f"cfg of {entry}")
+                    if not isinstance(snippet, dict):
+                        raise ValueError(
+                            f"$import {rname!r} in cfg must resolve to a dict, "
+                            f"got {type(snippet).__name__}."
+                        )
+                    merged.update(snippet)
+
+                merged.update(inline_keys)
+                entry["cfg"] = merged
+                resolved.append(entry)
+            else:
+                resolved.append(entry)
+        return resolved
+
+    # Resolve in quant_cfg (top-level or nested under quantize)
+    for container in [data, data.get("quantize", {})]:
+        if isinstance(container, dict):
+            quant_cfg = container.get("quant_cfg")
+            if isinstance(quant_cfg, list):
+                container["quant_cfg"] = _resolve_list(quant_cfg)
+
+    # Resolve in _list_content (multi-document snippets)
+    if "_list_content" in data:
+        data["_list_content"] = _resolve_list(data["_list_content"])
+
+    return data
+
+
+def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]:
+    """Load a YAML config and resolve all ``$import`` references.
+
+    This is the primary config loading entry point.  It loads the YAML file,
+    resolves any ``imports`` / ``$import`` directives, and returns the final
+    config dict or list.
+    """
+    data = _load_raw_config(config_path)
+    if isinstance(data, dict) and "imports" in data:
+        data = _resolve_imports(data)
+    return data
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
index 5430391adb..c6f49b2de9 100644
--- a/modelopt/torch/quantization/config.py
+++ b/modelopt/torch/quantization/config.py
@@ -157,8 +157,8 @@
 from pydantic import ValidationInfo, field_validator, model_validator
 from typing_extensions import Required, TypedDict
 
-from modelopt.recipe._config_loader import load_config
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
+from modelopt.torch.opt.config_loader import load_config
 from modelopt.torch.utils.network import ConstructorLike
 
 

From 65b291d2dd5e1c20c11e02d3834197b1c8bd514e Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 14:40:32 -0700
Subject: [PATCH 20/30] make config_root so it is logcially independent of
 recipe

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/recipe/_config_loader.py   | 12 ++++++++++--
 modelopt/torch/opt/config_loader.py | 16 ++++++++--------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py
index 5ed2c80361..1abbd36c98 100644
--- a/modelopt/recipe/_config_loader.py
+++ b/modelopt/recipe/_config_loader.py
@@ -16,10 +16,18 @@
 """Re-export config loading utilities from ``modelopt.torch.opt.config_loader``."""
 
 from modelopt.torch.opt.config_loader import (
-    BUILTIN_RECIPES_LIB,
+    BUILTIN_CONFIG_ROOT,
     _load_raw_config,
     _resolve_imports,
     load_config,
 )
 
-__all__ = ["BUILTIN_RECIPES_LIB", "_load_raw_config", "_resolve_imports", "load_config"]
+BUILTIN_RECIPES_LIB = BUILTIN_CONFIG_ROOT
+
+__all__ = [
+    "BUILTIN_CONFIG_ROOT",
+    "BUILTIN_RECIPES_LIB",
+    "_load_raw_config",
+    "_resolve_imports",
+    "load_config",
+]
diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py
index 3c03f4b445..25d4daa7b8 100644
--- a/modelopt/torch/opt/config_loader.py
+++ b/modelopt/torch/opt/config_loader.py
@@ -33,8 +33,8 @@
 
 import yaml
 
-# Root to all built-in recipes. Users can create own recipes.
-BUILTIN_RECIPES_LIB = files("modelopt_recipes")
+# Root to all built-in configs and recipes.
+BUILTIN_CONFIG_ROOT = files("modelopt_recipes")
 
 _EXMY_RE = re.compile(r"^[Ee](\d+)[Mm](\d+)$")
 _EXMY_KEYS = frozenset({"num_bits", "scale_bits"})
@@ -73,22 +73,22 @@ def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] |
         if not config_file.endswith(".yml") and not config_file.endswith(".yaml"):
             paths_to_check.append(Path(f"{config_file}.yml"))
             paths_to_check.append(Path(f"{config_file}.yaml"))
-            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml"))
-            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml"))
+            paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yml"))
+            paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yaml"))
         else:
             paths_to_check.append(Path(config_file))
-            paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(config_file))
+            paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(config_file))
     elif isinstance(config_file, Path):
         if config_file.suffix in (".yml", ".yaml"):
             paths_to_check.append(config_file)
             if not config_file.is_absolute():
-                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(str(config_file)))
+                paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(str(config_file)))
         else:
             paths_to_check.append(Path(f"{config_file}.yml"))
             paths_to_check.append(Path(f"{config_file}.yaml"))
             if not config_file.is_absolute():
-                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml"))
-                paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml"))
+                paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yml"))
+                paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yaml"))
     elif isinstance(config_file, Traversable):
         paths_to_check.append(config_file)
     else:

From 9f69cd05ecc7530011d02845647075adadfc0922 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 15:12:02 -0700
Subject: [PATCH 21/30] README

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt_recipes/configs/ptq/units/README.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 modelopt_recipes/configs/ptq/units/README.md

diff --git a/modelopt_recipes/configs/ptq/units/README.md b/modelopt_recipes/configs/ptq/units/README.md
new file mode 100644
index 0000000000..50cf028c15
--- /dev/null
+++ b/modelopt_recipes/configs/ptq/units/README.md
@@ -0,0 +1,17 @@
+# PTQ Config Units
+
+Reusable building blocks for composing PTQ quantization configurations.
+Each file defines one or more `quant_cfg` entries that can be imported
+into recipes or presets via `$import`.
+
+Units are **not** standalone configs — they don't have `algorithm` or
+`metadata`. They are meant to be composed into complete configs by
+recipes (under `general/` or `models/`) or presets (under `presets/`).
+
+| File | Description |
+|------|-------------|
+| `base_disable_all.yaml` | Deny-all entry: disables all quantizers as the first step |
+| `default_disabled_quantizers.yaml` | Standard exclusions (LM head, routers, BatchNorm, etc.) |
+| `fp8_kv.yaml` | FP8 E4M3 KV cache quantizer entry |
+| `w8a8_fp8_fp8.yaml` | FP8 weight + activation quantizer entries (W8A8) |
+| `w4a4_nvfp4_nvfp4.yaml` | NVFP4 weight + activation quantizer entries (W4A4) |

From 0b79b9fc2bf80d22f301faf1e85e27d068acee97 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 15:24:57 -0700
Subject: [PATCH 22/30] Change Log

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 CHANGELOG.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index e2c4b2a7c0..f22ec42256 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,7 +15,7 @@ Changelog
 - Enable PTQ workflow for the Step3.5-Flash MoE model with NVFP4 W4A4 + FP8 KV cache quantization. See `modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml <https://github.com/NVIDIA/Model-Optimizer/blob/main/modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml>`_ for more details.
 - Add support for vLLM fakequant reload using ModelOpt state for HF models. See `examples/vllm_serve/README.md <https://github.com/NVIDIA/Model-Optimizer/tree/main/examples/vllm_serve#load-qatptq-model-and-serve-in-vllm-wip>`_ for more details.
 - [Early Testing] Add Claude Code PTQ skill (``.claude/skills/ptq/``) for agent-assisted post-training quantization. The skill guides the agent through environment detection, model support checking, format selection, and execution via the launcher or manual SLURM/Docker/bare GPU paths. Includes handling for unlisted models with custom module patching. This feature is in early testing — use with caution.
-- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content with ordered override precedence (later imports override earlier, inline keys override all), as a list element it splices the snippet entries. Supports multi-import (``$import: [a, b]``) and inline extension/override. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification.
+- Add composable ``$import`` system for recipe YAML configs, enabling reusable config snippets referenced via ``{$import: name}`` markers. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/`` (numeric formats, quant_cfg building blocks, presets). See :ref:`composable-imports`.
 
 **Backward Breaking Changes**
 

From e3c9e5003901a088e5ff59ed933e3cbd3a4081a8 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 15:58:17 -0700
Subject: [PATCH 23/30] use full name, do not short

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst             | 37 ++++++++++---------
 .../configs/ptq/presets/model/fp8.yaml        |  8 ++--
 .../general/ptq/fp8_default-fp8_kv.yaml       |  4 +-
 .../general/ptq/nvfp4_default-fp8_kv.yaml     |  4 +-
 .../ptq/nvfp4_default-none_kv_gptq.yaml       |  4 +-
 .../ptq/nvfp4_experts_only-fp8_kv.yaml        |  4 +-
 .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml    |  4 +-
 .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml   |  4 +-
 8 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index a8986312f0..ed783dc93c 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -61,7 +61,7 @@ styles can be used in a single-file or directory layout.
 Single-file format
 ------------------
 
-The simplest form is a single ``.yml`` or ``.yaml`` file.
+The simplest form is a single ``.yaml`` file.
 
 **Inline style** — all config values are written directly:
 
@@ -131,10 +131,10 @@ example:
 .. code-block:: text
 
    my_recipe/
-     recipe.yml      # metadata section (+ optional imports)
-     quantize.yml    # quantize section (quant_cfg + algorithm)
+     recipe.yaml      # metadata section (+ optional imports)
+     quantize.yaml    # quantize section (+ optional imports)
 
-``recipe.yml``:
+``recipe.yaml``:
 
 .. code-block:: yaml
 
@@ -142,7 +142,7 @@ example:
      recipe_type: ptq
      description: My custom NVFP4 recipe.
 
-``quantize.yml``:
+``quantize.yaml``:
 
 .. code-block:: yaml
 
@@ -159,8 +159,9 @@ example:
          num_bits: e4m3
          axis:
 
-Both inline and import styles work with the directory format.  When using
-imports in a directory recipe, place the ``imports`` section in ``recipe.yml``.
+Both inline and import styles work with the directory format.  Any YAML file
+in the directory can have its own ``imports`` section — ``recipe.yaml``,
+``quantize.yaml``, or any other config file.
 
 .. _composable-imports:
 
@@ -475,7 +476,7 @@ type depends on the ``recipe_type`` in the metadata:
 .. code-block:: python
 
    # Load a custom recipe from the filesystem (file or directory)
-   recipe = load_recipe("/path/to/my_custom_recipe.yml")
+   recipe = load_recipe("/path/to/my_custom_recipe.yaml")
    # or: recipe = load_recipe("/path/to/my_recipe_dir/")
 
 Command-line usage
@@ -529,7 +530,7 @@ This means built-in recipes can be referenced without any prefix:
 
    # These are all equivalent:
    load_recipe("general/ptq/fp8_default-fp8_kv")
-   load_recipe("general/ptq/fp8_default-fp8_kv.yml")
+   load_recipe("general/ptq/fp8_default-fp8_kv.yaml")
 
 
 Writing a custom recipe
@@ -547,7 +548,7 @@ Example -- creating a custom PTQ recipe using imports:
 
 .. code-block:: yaml
 
-   # my_int8_recipe.yml
+   # my_int8_recipe.yaml
    imports:
      base_disable_all: configs/ptq/units/base_disable_all
      default_disabled: configs/ptq/units/default_disabled_quantizers
@@ -586,19 +587,19 @@ The ``modelopt_recipes/`` package is organized as follows:
    +-- __init__.py
    +-- general/                    # Model-agnostic recipes
    |   +-- ptq/
-   |       +-- fp8_default-fp8_kv.yml
-   |       +-- nvfp4_default-fp8_kv.yml
-   |       +-- nvfp4_mlp_only-fp8_kv.yml
-   |       +-- nvfp4_experts_only-fp8_kv.yml
-   |       +-- nvfp4_omlp_only-fp8_kv.yml
+   |       +-- fp8_default-fp8_kv.yaml
+   |       +-- nvfp4_default-fp8_kv.yaml
+   |       +-- nvfp4_mlp_only-fp8_kv.yaml
+   |       +-- nvfp4_experts_only-fp8_kv.yaml
+   |       +-- nvfp4_omlp_only-fp8_kv.yaml
    +-- models/                     # Model-specific recipes
    |   +-- Step3.5-Flash/
    |       +-- nvfp4-mlp-only.yaml
    +-- configs/                    # Reusable config snippets (imported via $import)
        +-- numerics/               # Numeric format definitions
-       |   +-- fp8.yml
-       |   +-- nvfp4_static.yml
-       |   +-- nvfp4.yml
+       |   +-- fp8.yaml
+       |   +-- nvfp4_static.yaml
+       |   +-- nvfp4.yaml
        +-- ptq/                    # PTQ-specific entry snippets
            +-- base_disable_all.yaml
            +-- default_disabled_quantizers.yaml
diff --git a/modelopt_recipes/configs/ptq/presets/model/fp8.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
index 763fe8ee5f..3f7ef9f860 100644
--- a/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
+++ b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml
@@ -17,11 +17,11 @@
 # Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py.
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  w8a8: configs/ptq/units/w8a8_fp8_fp8
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
 
 algorithm: max
 quant_cfg:
   - $import: base_disable_all
-  - $import: w8a8
-  - $import: default_disabled
+  - $import: w8a8_fp8_fp8
+  - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
index 8fe8c121d2..85267c8672 100644
--- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml
@@ -15,7 +15,7 @@
 
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
   w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8
   fp8_kv: configs/ptq/units/fp8_kv
 
@@ -28,4 +28,4 @@ quantize:
     - $import: base_disable_all
     - $import: w8a8_fp8_fp8
     - $import: fp8_kv
-    - $import: default_disabled
+    - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
index 8da3bebff1..e616a3a63a 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml
@@ -15,7 +15,7 @@
 
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
   w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4
   fp8_kv: configs/ptq/units/fp8_kv
 
@@ -28,4 +28,4 @@ quantize:
     - $import: base_disable_all
     - $import: w4a4_nvfp4_nvfp4
     - $import: fp8_kv
-    - $import: default_disabled
+    - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
index 04cfcfa925..1f9fa822cd 100644
--- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml
@@ -15,7 +15,7 @@
 
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
   nvfp4_static: configs/numerics/nvfp4_static
   nvfp4: configs/numerics/nvfp4
 
@@ -36,4 +36,4 @@ quantize:
         $import: nvfp4
     - quantizer_name: '*[kv]_bmm_quantizer'
       enable: false
-    - $import: default_disabled
+    - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
index 689e981b34..cd7ce28b46 100644
--- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml
@@ -15,7 +15,7 @@
 
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4
   fp8_kv: configs/ptq/units/fp8_kv
 
@@ -39,4 +39,4 @@ quantize:
       cfg:
         $import: nvfp4
     - $import: fp8_kv
-    - $import: default_disabled
+    - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
index ee26898cd9..9e300b2501 100644
--- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml
@@ -15,7 +15,7 @@
 
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4
   fp8_kv: configs/ptq/units/fp8_kv
 
@@ -39,4 +39,4 @@ quantize:
       cfg:
         $import: nvfp4
     - $import: fp8_kv
-    - $import: default_disabled
+    - $import: default_disabled_quantizers
diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
index 1075303f72..2c83641137 100644
--- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
+++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml
@@ -15,7 +15,7 @@
 
 imports:
   base_disable_all: configs/ptq/units/base_disable_all
-  default_disabled: configs/ptq/units/default_disabled_quantizers
+  default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers
   nvfp4: configs/numerics/nvfp4
   fp8_kv: configs/ptq/units/fp8_kv
 
@@ -45,4 +45,4 @@ quantize:
       cfg:
         $import: nvfp4
     - $import: fp8_kv
-    - $import: default_disabled
+    - $import: default_disabled_quantizers

From 070f2154a8d4ca38727f359cfd45018f3cb9d9bb Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 16:07:46 -0700
Subject: [PATCH 24/30] cleaner code

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/recipe/loader.py           | 23 +++++++++++---
 modelopt/torch/opt/config_loader.py | 48 ++++++++++++++++-------------
 2 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
index f91f6d9920..a23353f944 100644
--- a/modelopt/recipe/loader.py
+++ b/modelopt/recipe/loader.py
@@ -88,7 +88,10 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas
     plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes.
     """
     raw = _load_raw_config(recipe_file)
-    assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping."
+    if not isinstance(raw, dict):
+        raise ValueError(
+            f"Recipe file {recipe_file} must be a YAML mapping, got {type(raw).__name__}."
+        )
     data = _resolve_imports(raw)
 
     metadata = data.get("metadata", {})
@@ -121,7 +124,10 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
         )
 
     recipe_data = _load_raw_config(recipe_file)
-    assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping."
+    if not isinstance(recipe_data, dict):
+        raise ValueError(
+            f"Recipe file {recipe_file} must be a YAML mapping, got {type(recipe_data).__name__}."
+        )
     metadata = recipe_data.get("metadata", {})
     recipe_type = metadata.get("recipe_type")
     if recipe_type is None:
@@ -138,14 +144,21 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase:
             raise ValueError(
                 f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml"
             )
-        # Resolve imports: imports are in recipe.yml, quantize data is separate
+        # Resolve imports from both recipe.yaml and quantize.yaml
         quantize_data = _load_raw_config(quantize_file)
-        assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping."
+        if not isinstance(quantize_data, dict):
+            raise ValueError(
+                f"{quantize_file} must be a YAML mapping, got {type(quantize_data).__name__}."
+            )
+        # Resolve quantize.yaml's own imports first (if any)
+        if "imports" in quantize_data:
+            quantize_data = _resolve_imports(quantize_data)
+        # Then resolve recipe.yaml's imports applied to the quantize data
         combined: dict[str, Any] = {"quantize": quantize_data}
         imports = recipe_data.get("imports")
         if imports:
             combined["imports"] = imports
-        combined = _resolve_imports(combined)
+            combined = _resolve_imports(combined)
         return ModelOptPTQRecipe(
             recipe_type=RecipeType.PTQ,
             description=metadata.get("description", "PTQ recipe."),
diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py
index 25d4daa7b8..6e63f3cdc6 100644
--- a/modelopt/torch/opt/config_loader.py
+++ b/modelopt/torch/opt/config_loader.py
@@ -198,6 +198,28 @@ def _lookup(ref_name: str, context: str) -> Any:
             )
         return import_map[ref_name]
 
+    def _resolve_dict_value(d: dict[str, Any], key: str) -> None:
+        """Resolve ``$import`` in a dict value: ``key: {$import: name, ...inline}``."""
+        val = d[key]
+        if not isinstance(val, dict) or _IMPORT_KEY not in val:
+            return
+        ref = val.pop(_IMPORT_KEY)
+        inline_keys = dict(val)
+        ref_names = ref if isinstance(ref, list) else [ref]
+
+        merged: dict[str, Any] = {}
+        for rname in ref_names:
+            snippet = _lookup(rname, f"{key} of {d}")
+            if not isinstance(snippet, dict):
+                raise ValueError(
+                    f"$import {rname!r} in {key} must resolve to a dict, "
+                    f"got {type(snippet).__name__}."
+                )
+            merged.update(snippet)
+
+        merged.update(inline_keys)
+        d[key] = merged
+
     def _resolve_list(entries: list[Any]) -> list[Any]:
         """Resolve $import markers in a list of entries."""
         resolved: list[Any] = []
@@ -215,27 +237,11 @@ def _resolve_list(entries: list[Any]) -> list[Any]:
                         f"list, got {type(imported).__name__}."
                     )
                 resolved.extend(imported)
-            elif (
-                isinstance(entry, dict)
-                and isinstance(entry.get("cfg"), dict)
-                and _IMPORT_KEY in entry["cfg"]
-            ):
-                ref = entry["cfg"].pop(_IMPORT_KEY)
-                inline_keys = dict(entry["cfg"])
-                ref_names = ref if isinstance(ref, list) else [ref]
-
-                merged: dict[str, Any] = {}
-                for rname in ref_names:
-                    snippet = _lookup(rname, f"cfg of {entry}")
-                    if not isinstance(snippet, dict):
-                        raise ValueError(
-                            f"$import {rname!r} in cfg must resolve to a dict, "
-                            f"got {type(snippet).__name__}."
-                        )
-                    merged.update(snippet)
-
-                merged.update(inline_keys)
-                entry["cfg"] = merged
+            elif isinstance(entry, dict):
+                # Resolve $import in any dict value within the entry
+                for key in list(entry):
+                    if isinstance(entry.get(key), dict) and _IMPORT_KEY in entry[key]:
+                        _resolve_dict_value(entry, key)
                 resolved.append(entry)
             else:
                 resolved.append(entry)

From 1127f323fc37b87bf43f3536d64fb21008bb5a8a Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 16:14:09 -0700
Subject: [PATCH 25/30] A new test

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 tests/unit/recipe/test_loader.py | 47 ++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index 4dd235a081..beb1319284 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -557,6 +557,53 @@ def test_import_cfg_inline_overrides_import(tmp_path):
     assert cfg["axis"] is None
 
 
+def test_import_in_non_cfg_dict_value(tmp_path):
+    """$import resolves in any dict value, not just cfg."""
+    (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: static\naxis: -1\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  bias_cfg: {tmp_path / 'bias_cfg.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      bias:\n"
+        f"        $import: bias_cfg\n"
+    )
+    recipe = load_recipe(recipe_file)
+    entry = recipe.quantize["quant_cfg"][0]
+    assert entry["bias"] == {"enable": True, "type": "static", "axis": -1}
+
+
+def test_import_in_multiple_dict_values(tmp_path):
+    """$import resolves independently in multiple dict values of the same entry."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: dynamic\n")
+    recipe_file = tmp_path / "recipe.yml"
+    recipe_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"  bias_cfg: {tmp_path / 'bias_cfg.yml'}\n"
+        f"metadata:\n"
+        f"  recipe_type: ptq\n"
+        f"quantize:\n"
+        f"  algorithm: max\n"
+        f"  quant_cfg:\n"
+        f"    - quantizer_name: '*weight_quantizer'\n"
+        f"      cfg:\n"
+        f"        $import: fp8\n"
+        f"      bias:\n"
+        f"        $import: bias_cfg\n"
+    )
+    recipe = load_recipe(recipe_file)
+    entry = recipe.quantize["quant_cfg"][0]
+    assert entry["cfg"] == {"num_bits": (4, 3)}
+    assert entry["bias"] == {"enable": True, "type": "dynamic"}
+
+
 def test_import_cfg_multi_import(tmp_path):
     """$import with a list of names merges non-overlapping snippets."""
     (tmp_path / "bits.yml").write_text("num_bits: e4m3\n")

From 185ee3bec3c7a2a71c968129e52d5856a7111fa8 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 16:18:59 -0700
Subject: [PATCH 26/30] more loads

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/torch/quantization/config.py | 32 ++++-----------------------
 1 file changed, 4 insertions(+), 28 deletions(-)

diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
index c6f49b2de9..fb2a8c3fd2 100644
--- a/modelopt/torch/quantization/config.py
+++ b/modelopt/torch/quantization/config.py
@@ -204,35 +204,11 @@ def find_quant_cfg_entry_by_path(
     return result
 
 
-_base_disable_all: list[QuantizerCfgEntry] = [
-    {"quantizer_name": "*", "enable": False},
-]
+_base_disable_all: list[QuantizerCfgEntry] = load_config("configs/ptq/units/base_disable_all")
 
-_default_disabled_quantizer_cfg: list[QuantizerCfgEntry] = [
-    {"parent_class": "nn.BatchNorm1d", "quantizer_name": "*", "enable": False},
-    {"parent_class": "nn.BatchNorm2d", "quantizer_name": "*", "enable": False},
-    {"parent_class": "nn.BatchNorm3d", "quantizer_name": "*", "enable": False},
-    {"parent_class": "nn.LeakyReLU", "quantizer_name": "*", "enable": False},
-    {"quantizer_name": "*lm_head*", "enable": False},
-    {
-        "quantizer_name": "*proj_out.*",
-        "enable": False,
-    },  # In Whisper model, lm_head has key name proj_out
-    {
-        "quantizer_name": "*block_sparse_moe.gate*",
-        "enable": False,
-    },  # Skip the MOE router
-    {"quantizer_name": "*router*", "enable": False},  # Skip the MOE router
-    {"quantizer_name": "*mlp.gate.*", "enable": False},  # Skip the MOE router
-    {
-        "quantizer_name": "*mlp.shared_expert_gate.*",
-        "enable": False,
-    },  # Skip the MOE router
-    {"quantizer_name": "*linear_attn.conv1d*", "enable": False},
-    {"quantizer_name": "*mixer.conv1d*", "enable": False},  # Skip mamba conv1d
-    {"quantizer_name": "*output_layer*", "enable": False},
-    {"quantizer_name": "output.*", "enable": False},
-]
+_default_disabled_quantizer_cfg: list[QuantizerCfgEntry] = load_config(
+    "configs/ptq/units/default_disabled_quantizers"
+)
 
 _mamba_moe_disabled_quantizer_cfg: list[QuantizerCfgEntry] = [
     {"quantizer_name": "*fc1_latent_proj*", "enable": False},  # Skip Latent MOE

From 5e0cc8af2aef02b4a8cbc0c0f29617c251544cb3 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 17:00:10 -0700
Subject: [PATCH 27/30] fix the doc

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 docs/source/guides/10_recipes.rst         | 15 ++++++++++++---
 modelopt/torch/opt/config_loader.py       |  2 ++
 tools/precommit/check_modelopt_recipes.py |  8 ++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst
index ed783dc93c..26f28afd75 100644
--- a/docs/source/guides/10_recipes.rst
+++ b/docs/source/guides/10_recipes.rst
@@ -600,9 +600,18 @@ The ``modelopt_recipes/`` package is organized as follows:
        |   +-- fp8.yaml
        |   +-- nvfp4_static.yaml
        |   +-- nvfp4.yaml
-       +-- ptq/                    # PTQ-specific entry snippets
-           +-- base_disable_all.yaml
-           +-- default_disabled_quantizers.yaml
+       +-- ptq/
+           +-- units/                # Reusable quant_cfg building blocks
+           |   +-- base_disable_all.yaml
+           |   +-- default_disabled_quantizers.yaml
+           |   +-- fp8_kv.yaml
+           |   +-- w8a8_fp8_fp8.yaml
+           |   +-- w4a4_nvfp4_nvfp4.yaml
+           +-- presets/              # Complete configs (backward compat with *_CFG dicts)
+               +-- model/
+               |   +-- fp8.yaml
+               +-- kv/
+                   +-- fp8.yaml
 
 
 Recipe data model
diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py
index 6e63f3cdc6..89bed69262 100644
--- a/modelopt/torch/opt/config_loader.py
+++ b/modelopt/torch/opt/config_loader.py
@@ -68,6 +68,8 @@ def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] |
 
     config_file: Path to a config yaml file. The path suffix can be omitted.
     """
+    # Probe order: filesystem first, then built-in library.
+    # This lets users override built-in configs by placing a file locally.
     paths_to_check: list[Path | Traversable] = []
     if isinstance(config_file, str):
         if not config_file.endswith(".yml") and not config_file.endswith(".yaml"):
diff --git a/tools/precommit/check_modelopt_recipes.py b/tools/precommit/check_modelopt_recipes.py
index 600de317b5..2c5706ee73 100644
--- a/tools/precommit/check_modelopt_recipes.py
+++ b/tools/precommit/check_modelopt_recipes.py
@@ -57,6 +57,14 @@ def _check_quant_cfg(quant_cfg, label: str) -> list[str]:
                 continue
             # {$import: name} entries are resolved at load time
             if "$import" in entry:
+                ref = entry["$import"]
+                if not isinstance(ref, (str, list)) or (
+                    isinstance(ref, list) and not all(isinstance(r, str) for r in ref)
+                ):
+                    errors.append(
+                        f"{label}: quant_cfg[{i}] '$import' must be a string or list of strings, "
+                        f"got {type(ref).__name__}: {ref!r}"
+                    )
                 continue
             if "quantizer_name" not in entry:
                 errors.append(

From c7ce455e6667167db1498f1158f57efd6caadc8e Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 18:16:58 -0700
Subject: [PATCH 28/30] fix failed tests and more tests

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt/torch/opt/config_loader.py | 108 +++++++--------
 tests/unit/recipe/test_loader.py    | 203 +++++++++++++++++++++-------
 2 files changed, 202 insertions(+), 109 deletions(-)

diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py
index 89bed69262..51ab693d88 100644
--- a/modelopt/torch/opt/config_loader.py
+++ b/modelopt/torch/opt/config_loader.py
@@ -200,65 +200,57 @@ def _lookup(ref_name: str, context: str) -> Any:
             )
         return import_map[ref_name]
 
-    def _resolve_dict_value(d: dict[str, Any], key: str) -> None:
-        """Resolve ``$import`` in a dict value: ``key: {$import: name, ...inline}``."""
-        val = d[key]
-        if not isinstance(val, dict) or _IMPORT_KEY not in val:
-            return
-        ref = val.pop(_IMPORT_KEY)
-        inline_keys = dict(val)
-        ref_names = ref if isinstance(ref, list) else [ref]
-
-        merged: dict[str, Any] = {}
-        for rname in ref_names:
-            snippet = _lookup(rname, f"{key} of {d}")
-            if not isinstance(snippet, dict):
-                raise ValueError(
-                    f"$import {rname!r} in {key} must resolve to a dict, "
-                    f"got {type(snippet).__name__}."
-                )
-            merged.update(snippet)
-
-        merged.update(inline_keys)
-        d[key] = merged
-
-    def _resolve_list(entries: list[Any]) -> list[Any]:
-        """Resolve $import markers in a list of entries."""
-        resolved: list[Any] = []
-        for entry in entries:
-            if isinstance(entry, dict) and _IMPORT_KEY in entry:
-                if len(entry) > 1:
-                    raise ValueError(
-                        f"$import must be the only key in the dict, got extra keys: "
-                        f"{sorted(k for k in entry if k != _IMPORT_KEY)}"
-                    )
-                imported = _lookup(entry[_IMPORT_KEY], "list entry")
-                if not isinstance(imported, list):
-                    raise ValueError(
-                        f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
-                        f"list, got {type(imported).__name__}."
-                    )
-                resolved.extend(imported)
-            elif isinstance(entry, dict):
-                # Resolve $import in any dict value within the entry
-                for key in list(entry):
-                    if isinstance(entry.get(key), dict) and _IMPORT_KEY in entry[key]:
-                        _resolve_dict_value(entry, key)
-                resolved.append(entry)
+    def _resolve_value(obj: Any) -> Any:
+        """Recursively resolve ``$import`` markers anywhere in the config tree.
+
+        - Dict with ``$import`` as only key and list value → splice (in list context)
+        - Dict with ``$import`` key → replace/merge (import + override with inline keys)
+        - List → resolve each element (with list-splice for ``$import`` entries)
+        - Other → return as-is
+        """
+        if isinstance(obj, dict):
+            if _IMPORT_KEY in obj:
+                # {$import: name, ...inline} → import, merge, override
+                ref = obj.pop(_IMPORT_KEY)
+                inline_keys = dict(obj)
+                ref_names = ref if isinstance(ref, list) else [ref]
+
+                merged: dict[str, Any] = {}
+                for rname in ref_names:
+                    snippet = _lookup(rname, "dict value")
+                    if not isinstance(snippet, dict):
+                        raise ValueError(
+                            f"$import {rname!r} in dict must resolve to a dict, "
+                            f"got {type(snippet).__name__}."
+                        )
+                    merged.update(snippet)
+
+                merged.update(inline_keys)
+                return _resolve_value(merged)  # resolve any nested $import in result
             else:
-                resolved.append(entry)
-        return resolved
-
-    # Resolve in quant_cfg (top-level or nested under quantize)
-    for container in [data, data.get("quantize", {})]:
-        if isinstance(container, dict):
-            quant_cfg = container.get("quant_cfg")
-            if isinstance(quant_cfg, list):
-                container["quant_cfg"] = _resolve_list(quant_cfg)
-
-    # Resolve in _list_content (multi-document snippets)
-    if "_list_content" in data:
-        data["_list_content"] = _resolve_list(data["_list_content"])
+                return {k: _resolve_value(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            resolved: list[Any] = []
+            for entry in obj:
+                if isinstance(entry, dict) and _IMPORT_KEY in entry and len(entry) == 1:
+                    # {$import: name} as sole key in list → splice
+                    imported = _lookup(entry[_IMPORT_KEY], "list entry")
+                    if not isinstance(imported, list):
+                        raise ValueError(
+                            f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a "
+                            f"list, got {type(imported).__name__}."
+                        )
+                    resolved.extend(_resolve_value(imported))
+                else:
+                    resolved.append(_resolve_value(entry))
+            return resolved
+        return obj
+
+    data = _resolve_value(data)
+
+    # Unwrap _list_content (multi-document snippets)
+    if isinstance(data, dict) and "_list_content" in data:
+        data["_list_content"] = _resolve_value(data["_list_content"])
 
     return data
 
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index beb1319284..222b0a190d 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -445,7 +445,9 @@ def test_import_entry_single_element_list(tmp_path):
     )
     recipe = load_recipe(recipe_file)
     assert len(recipe.quantize["quant_cfg"]) == 1
-    assert recipe.quantize["quant_cfg"][0] == {"quantizer_name": "*", "enable": False}
+    entry = recipe.quantize["quant_cfg"][0]
+    assert entry["quantizer_name"] == "*"
+    assert entry["enable"] is False
 
 
 def test_import_entry_non_list_raises(tmp_path):
@@ -491,8 +493,8 @@ def test_import_entry_list_splice(tmp_path):
     assert recipe.quantize["quant_cfg"][2]["quantizer_name"] == "*router*"
 
 
-def test_import_entry_sibling_keys_raises(tmp_path):
-    """$import as a list entry with sibling keys raises ValueError."""
+def test_import_entry_sibling_keys_with_list_snippet_raises(tmp_path):
+    """$import with sibling keys raises when the import resolves to a list (not a dict)."""
     (tmp_path / "disable.yml").write_text("- quantizer_name: '*'\n  enable: false\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
@@ -506,7 +508,7 @@ def test_import_entry_sibling_keys_raises(tmp_path):
         f"    - $import: disable_all\n"
         f"      quantizer_name: '*extra*'\n"
     )
-    with pytest.raises(ValueError, match="must be the only key"):
+    with pytest.raises(ValueError, match="must resolve to a dict"):
         load_recipe(recipe_file)
 
 
@@ -558,50 +560,42 @@ def test_import_cfg_inline_overrides_import(tmp_path):
 
 
 def test_import_in_non_cfg_dict_value(tmp_path):
-    """$import resolves in any dict value, not just cfg."""
-    (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: static\naxis: -1\n")
-    recipe_file = tmp_path / "recipe.yml"
-    recipe_file.write_text(
+    """$import resolves in any dict value, not just cfg (tested via load_config to skip validation)."""
+    (tmp_path / "extra.yml").write_text("foo: bar\nbaz: 42\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
         f"imports:\n"
-        f"  bias_cfg: {tmp_path / 'bias_cfg.yml'}\n"
-        f"metadata:\n"
-        f"  recipe_type: ptq\n"
-        f"quantize:\n"
-        f"  algorithm: max\n"
-        f"  quant_cfg:\n"
-        f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      bias:\n"
-        f"        $import: bias_cfg\n"
+        f"  extra: {tmp_path / 'extra.yml'}\n"
+        f"quant_cfg:\n"
+        f"  - quantizer_name: '*weight_quantizer'\n"
+        f"    my_field:\n"
+        f"      $import: extra\n"
     )
-    recipe = load_recipe(recipe_file)
-    entry = recipe.quantize["quant_cfg"][0]
-    assert entry["bias"] == {"enable": True, "type": "static", "axis": -1}
+    data = load_config(config_file)
+    entry = data["quant_cfg"][0]
+    assert entry["my_field"] == {"foo": "bar", "baz": 42}
 
 
 def test_import_in_multiple_dict_values(tmp_path):
     """$import resolves independently in multiple dict values of the same entry."""
     (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
-    (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: dynamic\n")
-    recipe_file = tmp_path / "recipe.yml"
-    recipe_file.write_text(
+    (tmp_path / "extra.yml").write_text("foo: bar\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
         f"imports:\n"
         f"  fp8: {tmp_path / 'fp8.yml'}\n"
-        f"  bias_cfg: {tmp_path / 'bias_cfg.yml'}\n"
-        f"metadata:\n"
-        f"  recipe_type: ptq\n"
-        f"quantize:\n"
-        f"  algorithm: max\n"
-        f"  quant_cfg:\n"
-        f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg:\n"
-        f"        $import: fp8\n"
-        f"      bias:\n"
-        f"        $import: bias_cfg\n"
+        f"  extra: {tmp_path / 'extra.yml'}\n"
+        f"quant_cfg:\n"
+        f"  - quantizer_name: '*weight_quantizer'\n"
+        f"    cfg:\n"
+        f"      $import: fp8\n"
+        f"    my_field:\n"
+        f"      $import: extra\n"
     )
-    recipe = load_recipe(recipe_file)
-    entry = recipe.quantize["quant_cfg"][0]
+    data = load_config(config_file)
+    entry = data["quant_cfg"][0]
     assert entry["cfg"] == {"num_bits": (4, 3)}
-    assert entry["bias"] == {"enable": True, "type": "dynamic"}
+    assert entry["my_field"] == {"foo": "bar"}
 
 
 def test_import_cfg_multi_import(tmp_path):
@@ -655,12 +649,12 @@ def test_import_cfg_multi_import_later_overrides_earlier(tmp_path):
 def test_import_cfg_multi_import_with_extend(tmp_path):
     """$import list + inline keys all merge without conflicts."""
     (tmp_path / "bits.yml").write_text("num_bits: e4m3\n")
-    (tmp_path / "scale.yml").write_text("scale_bits: e8m0\n")
+    (tmp_path / "extra.yml").write_text("fake_quant: false\n")
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
         f"imports:\n"
         f"  bits: {tmp_path / 'bits.yml'}\n"
-        f"  scale: {tmp_path / 'scale.yml'}\n"
+        f"  extra: {tmp_path / 'extra.yml'}\n"
         f"metadata:\n"
         f"  recipe_type: ptq\n"
         f"quantize:\n"
@@ -668,12 +662,12 @@ def test_import_cfg_multi_import_with_extend(tmp_path):
         f"  quant_cfg:\n"
         f"    - quantizer_name: '*weight_quantizer'\n"
         f"      cfg:\n"
-        f"        $import: [bits, scale]\n"
+        f"        $import: [bits, extra]\n"
         f"        axis: 0\n"
     )
     recipe = load_recipe(recipe_file)
     cfg = recipe.quantize["quant_cfg"][0]["cfg"]
-    assert cfg == {"num_bits": (4, 3), "scale_bits": (8, 0), "axis": 0}
+    assert cfg == {"num_bits": (4, 3), "fake_quant": False, "axis": 0}
 
 
 def test_import_dir_format(tmp_path):
@@ -740,34 +734,141 @@ def test_import_builtin_fp8_kv_snippet():
     assert kv_entries[0]["cfg"]["num_bits"] == (4, 3)
 
 
+# ---------------------------------------------------------------------------
+# imports — general tree-wide resolution (not just quant_cfg)
+# ---------------------------------------------------------------------------
+
+
+def test_import_in_top_level_dict_value(tmp_path):
+    """$import resolves in a top-level dict value (not inside any list)."""
+    (tmp_path / "algo.yml").write_text("method: gptq\nuse_layerwise: true\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
+        f"imports:\n  algo: {tmp_path / 'algo.yml'}\nalgorithm:\n  $import: algo\nquant_cfg: []\n"
+    )
+    data = load_config(config_file)
+    assert data["algorithm"] == {"method": "gptq", "use_layerwise": True}
+
+
+def test_import_in_nested_dict(tmp_path):
+    """$import resolves in deeply nested dicts."""
+    (tmp_path / "settings.yml").write_text("lr: 0.001\nepochs: 10\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
+        f"imports:\n"
+        f"  settings: {tmp_path / 'settings.yml'}\n"
+        f"training:\n"
+        f"  optimizer:\n"
+        f"    params:\n"
+        f"      $import: settings\n"
+    )
+    data = load_config(config_file)
+    assert data["training"]["optimizer"]["params"] == {"lr": 0.001, "epochs": 10}
+
+
+def test_import_list_splice_outside_quant_cfg(tmp_path):
+    """$import list splice works in any list, not just quant_cfg."""
+    (tmp_path / "extra_tasks.yml").write_text("- name: task_b\n- name: task_c\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
+        f"imports:\n"
+        f"  extra: {tmp_path / 'extra_tasks.yml'}\n"
+        f"tasks:\n"
+        f"  - name: task_a\n"
+        f"  - $import: extra\n"
+        f"  - name: task_d\n"
+    )
+    data = load_config(config_file)
+    assert data["tasks"] == [
+        {"name": "task_a"},
+        {"name": "task_b"},
+        {"name": "task_c"},
+        {"name": "task_d"},
+    ]
+
+
+def test_import_in_nested_list_of_dicts(tmp_path):
+    """$import in dict values within a nested list resolves correctly."""
+    (tmp_path / "defaults.yml").write_text("timeout: 30\nretries: 3\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
+        f"imports:\n"
+        f"  defaults: {tmp_path / 'defaults.yml'}\n"
+        f"stages:\n"
+        f"  - name: build\n"
+        f"    config:\n"
+        f"      $import: defaults\n"
+        f"      verbose: true\n"
+        f"  - name: test\n"
+        f"    config:\n"
+        f"      $import: defaults\n"
+    )
+    data = load_config(config_file)
+    assert data["stages"][0]["config"] == {"timeout": 30, "retries": 3, "verbose": True}
+    assert data["stages"][1]["config"] == {"timeout": 30, "retries": 3}
+
+
+def test_import_mixed_tree(tmp_path):
+    """$import resolves at multiple levels in the same config."""
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    (tmp_path / "disables.yml").write_text("- quantizer_name: '*lm_head*'\n  enable: false\n")
+    (tmp_path / "meta.yml").write_text("version: 2\nauthor: test\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"  disables: {tmp_path / 'disables.yml'}\n"
+        f"  meta: {tmp_path / 'meta.yml'}\n"
+        f"info:\n"
+        f"  $import: meta\n"
+        f"items:\n"
+        f"  - name: a\n"
+        f"    cfg:\n"
+        f"      $import: fp8\n"
+        f"  - $import: disables\n"
+    )
+    data = load_config(config_file)
+    # Top-level dict import
+    assert data["info"] == {"version": 2, "author": "test"}
+    # Dict import inside list entry
+    assert data["items"][0]["cfg"] == {"num_bits": (4, 3)}
+    # List splice
+    assert data["items"][1] == {"quantizer_name": "*lm_head*", "enable": False}
+
+
 # ---------------------------------------------------------------------------
 # imports — recursive resolution and cycle detection
 # ---------------------------------------------------------------------------
 
 
 def test_import_recursive(tmp_path):
-    """A snippet can itself import other snippets."""
-    (tmp_path / "base.yml").write_text("num_bits: e4m3\n")
-    (tmp_path / "mid.yml").write_text(
-        f"imports:\n  base: {tmp_path / 'base.yml'}\nnum_bits:\n  $import: base\n"
+    """A list snippet can import a dict snippet (recursive resolution via multi-doc)."""
+    # base: dict snippet with FP8 attributes
+    (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n")
+    # mid: list snippet that imports base and uses $import in cfg
+    (tmp_path / "mid.yaml").write_text(
+        f"imports:\n"
+        f"  fp8: {tmp_path / 'fp8.yml'}\n"
+        f"---\n"
+        f"- quantizer_name: '*weight_quantizer'\n"
+        f"  cfg:\n"
+        f"    $import: fp8\n"
     )
+    # recipe imports mid
     recipe_file = tmp_path / "recipe.yml"
     recipe_file.write_text(
         f"imports:\n"
-        f"  mid: {tmp_path / 'mid.yml'}\n"
+        f"  mid: {tmp_path / 'mid.yaml'}\n"
         f"metadata:\n"
         f"  recipe_type: ptq\n"
         f"quantize:\n"
         f"  algorithm: max\n"
         f"  quant_cfg:\n"
-        f"    - quantizer_name: '*weight_quantizer'\n"
-        f"      cfg:\n"
-        f"        $import: mid\n"
+        f"    - $import: mid\n"
     )
     recipe = load_recipe(recipe_file)
     cfg = recipe.quantize["quant_cfg"][0]["cfg"]
-    # mid.yml resolved "num_bits: {$import: base}" → base.yml content
-    assert cfg["num_bits"] == {"num_bits": (4, 3)}
+    assert cfg == {"num_bits": (4, 3)}
 
 
 def test_import_circular_raises(tmp_path):

From 33af932ac6287418f3e31e605eb956d2a2cbb244 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Wed, 15 Apr 2026 18:53:28 -0700
Subject: [PATCH 29/30] better wording

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 modelopt_recipes/configs/ptq/presets/README.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md
index f8974fc78c..ee45ec83b9 100644
--- a/modelopt_recipes/configs/ptq/presets/README.md
+++ b/modelopt_recipes/configs/ptq/presets/README.md
@@ -4,10 +4,9 @@ This directory holds preset quantization configurations that serve as the
 YAML source of truth for the hardcoded `*_CFG` dicts in
 `modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`).
 
-Each preset is a complete, self-contained config with `algorithm` and
-`quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose
-from the reusable snippets in `configs/numerics/` and `configs/ptq/units/`
-via the `$import` system.
+Each preset is a self-contained config with `quant_cfg` that can be
+passed to `mtq.quantize()`. Presets compose from the reusable snippets
+in `configs/numerics/` and `configs/ptq/units/` via the `$import` system.
 
 **Note:** The main purpose of these presets is to support the existing
 `hf_ptq.py` script's `--qformat` / `--kv_cache_qformat` flags and other

From a8f5c0fd0d34e2a40c0f4d4d0938a3eee450a3a0 Mon Sep 17 00:00:00 2001
From: Shengliang Xu <shengliangx@nvidia.com>
Date: Thu, 16 Apr 2026 18:05:52 -0700
Subject: [PATCH 30/30] more tests for better coverage

Signed-off-by: Shengliang Xu <shengliangx@nvidia.com>
---
 tests/unit/recipe/test_loader.py | 122 +++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
index 222b0a190d..7c6e12de0d 100644
--- a/tests/unit/recipe/test_loader.py
+++ b/tests/unit/recipe/test_loader.py
@@ -914,3 +914,125 @@ def test_import_cross_file_same_name_no_conflict(tmp_path):
     # Parent's "fmt" resolves to fp8 (e4m3), not child's nvfp4
     cfg = recipe.quantize["quant_cfg"][0]["cfg"]
     assert cfg == {"num_bits": (4, 3)}
+
+
+# ---------------------------------------------------------------------------
+# Coverage: _load_raw_config edge cases
+# ---------------------------------------------------------------------------
+
+
+def test_load_config_path_object(tmp_path):
+    """load_config accepts a Path object."""
+    cfg_file = tmp_path / "test.yaml"
+    cfg_file.write_text("key: value\n")
+    data = load_config(cfg_file)
+    assert data == {"key": "value"}
+
+
+def test_load_config_path_without_suffix(tmp_path):
+    """load_config probes .yml/.yaml suffixes for a Path without suffix."""
+    cfg_file = tmp_path / "test.yaml"
+    cfg_file.write_text("key: value\n")
+    data = load_config(tmp_path / "test")  # no suffix
+    assert data == {"key": "value"}
+
+
+def test_load_config_empty_yaml(tmp_path):
+    """load_config returns empty dict for empty YAML file."""
+    cfg_file = tmp_path / "empty.yaml"
+    cfg_file.write_text("")
+    data = load_config(cfg_file)
+    assert data == {}
+
+
+def test_load_config_null_yaml(tmp_path):
+    """load_config returns empty dict for YAML file containing only null."""
+    cfg_file = tmp_path / "null.yaml"
+    cfg_file.write_text("---\n")
+    data = load_config(cfg_file)
+    assert data == {}
+
+
+def test_load_config_multi_doc_dict_dict(tmp_path):
+    """Multi-document YAML with two dicts merges them."""
+    cfg_file = tmp_path / "multi.yaml"
+    cfg_file.write_text("imports:\n  fp8: some/path\n---\nalgorithm: max\n")
+    from modelopt.torch.opt.config_loader import _load_raw_config
+
+    data = _load_raw_config(cfg_file)
+    assert data["imports"] == {"fp8": "some/path"}
+    assert data["algorithm"] == "max"
+
+
+def test_load_config_multi_doc_null_content(tmp_path):
+    """Multi-document YAML where second doc is null treats content as empty dict."""
+    cfg_file = tmp_path / "multi_null.yaml"
+    cfg_file.write_text("key: value\n---\n")
+    from modelopt.torch.opt.config_loader import _load_raw_config
+
+    data = _load_raw_config(cfg_file)
+    assert data == {"key": "value"}
+
+
+def test_load_config_multi_doc_first_not_dict_raises(tmp_path):
+    """Multi-document YAML with non-dict first document raises ValueError."""
+    cfg_file = tmp_path / "bad_multi.yaml"
+    cfg_file.write_text("- item1\n---\nkey: value\n")
+    with pytest.raises(ValueError, match="first YAML document must be a mapping"):
+        load_config(cfg_file)
+
+
+def test_load_config_multi_doc_second_not_dict_or_list_raises(tmp_path):
+    """Multi-document YAML with scalar second document raises ValueError."""
+    cfg_file = tmp_path / "bad_multi2.yaml"
+    cfg_file.write_text("key: value\n---\njust a string\n")
+    with pytest.raises(ValueError, match="second YAML document must be a mapping or list"):
+        load_config(cfg_file)
+
+
+def test_load_config_three_docs_raises(tmp_path):
+    """YAML with 3+ documents raises ValueError."""
+    cfg_file = tmp_path / "three_docs.yaml"
+    cfg_file.write_text("a: 1\n---\nb: 2\n---\nc: 3\n")
+    with pytest.raises(ValueError, match="expected 1 or 2 YAML documents"):
+        load_config(cfg_file)
+
+
+def test_load_config_invalid_type_raises():
+    """load_config with non-string/Path/Traversable raises ValueError."""
+    with pytest.raises(ValueError, match="Invalid config file"):
+        load_config(12345)
+
+
+def test_load_config_list_valued_yaml(tmp_path):
+    """load_config handles top-level YAML list."""
+    cfg_file = tmp_path / "list.yaml"
+    cfg_file.write_text("- name: a\n  value: 1\n- name: b\n  value: 2\n")
+    data = load_config(cfg_file)
+    assert isinstance(data, list)
+    assert len(data) == 2
+    assert data[0] == {"name": "a", "value": 1}
+
+
+# ---------------------------------------------------------------------------
+# Coverage: _resolve_imports edge cases
+# ---------------------------------------------------------------------------
+
+
+def test_import_dict_value_resolves_to_list_raises(tmp_path):
+    """$import in dict value position raises when snippet is a list."""
+    (tmp_path / "entries.yml").write_text("- a: 1\n- b: 2\n")
+    config_file = tmp_path / "config.yml"
+    config_file.write_text(
+        f"imports:\n  entries: {tmp_path / 'entries.yml'}\nmy_field:\n  $import: entries\n"
+    )
+    with pytest.raises(ValueError, match="must resolve to a dict"):
+        load_config(config_file)
+
+
+def test_import_imports_not_a_dict_raises(tmp_path):
+    """imports section that is a list raises ValueError."""
+    config_file = tmp_path / "config.yml"
+    config_file.write_text("imports:\n  - some/path\nkey: value\n")
+    with pytest.raises(ValueError, match="must be a dict"):
+        load_config(config_file)