From 1d8a30b06f6e46e8e98f2b3e0b49f3bed5e9699a Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 15:59:29 -0700 Subject: [PATCH 01/30] Add import system for composable YAML configs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit description: Introduce an import mechanism that lets recipe YAML files reference reusable config snippets by name, reducing duplication across recipes. Syntax: imports: fp8: configs/numerics/fp8 base_disable_all: configs/ptq/base_disable_all quant_cfg: - base_disable_all # string entry → replaced with imported dict or spliced list - quantizer_name: '*weight_quantizer' cfg: fp8 # string cfg → replaced with imported dict Features: - Dict-based imports (keys are names, values are config paths) — no name conflicts - Three resolution modes: string cfg value, string list entry (dict), string list entry (list splice) - Recursive resolution with circular import detection - Path resolution via load_config (built-in library first, then filesystem) - Works with both single-file and directory recipe formats New reusable config snippets (modelopt_recipes/configs/): - numerics/fp8.yml, nvfp4_dynamic.yml, nvfp4_static.yml - ptq/base_disable_all.yaml, default_disabled_quantizers.yaml All 6 built-in PTQ recipes converted to use imports, reducing each by ~30 lines. Pre-commit hook updated to skip configs/ directory and allow string entries in quant_cfg. load_config() now accepts YAML lists for list-valued snippets. Signed-off-by: Shengliang Xu --- .pre-commit-config.yaml | 1 + modelopt/recipe/_config_loader.py | 6 +- modelopt/recipe/loader.py | 100 ++++++- modelopt_recipes/configs/numerics/fp8.yml | 2 + .../configs/numerics/nvfp4_dynamic.yml | 6 + .../configs/numerics/nvfp4_static.yml | 6 + .../configs/ptq/base_disable_all.yaml | 3 + .../ptq/default_disabled_quantizers.yaml | 33 +++ .../general/ptq/fp8_default-fp8_kv.yaml | 52 +--- .../general/ptq/nvfp4_default-fp8_kv.yaml | 59 +--- .../ptq/nvfp4_default-none_kv_gptq.yaml | 56 +--- .../ptq/nvfp4_experts_only-fp8_kv.yaml | 73 +---- .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 73 +---- .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 87 +----- tests/unit/recipe/test_loader.py | 276 ++++++++++++++++++ tools/precommit/check_modelopt_recipes.py | 3 + 16 files changed, 502 insertions(+), 334 deletions(-) create mode 100644 modelopt_recipes/configs/numerics/fp8.yml create mode 100644 modelopt_recipes/configs/numerics/nvfp4_dynamic.yml create mode 100644 modelopt_recipes/configs/numerics/nvfp4_static.yml create mode 100644 modelopt_recipes/configs/ptq/base_disable_all.yaml create mode 100644 modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c4c11a090..0fc5c8eeaa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,6 +68,7 @@ repos: entry: python tools/precommit/check_modelopt_recipes.py language: system files: ^modelopt_recipes/ + exclude: ^modelopt_recipes/configs/ # Instructions to change license file if ever needed: # https://github.com/Lucas-C/pre-commit-hooks#removing-old-license-and-replacing-it-with-a-new-one diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py index 188dcf236f..da6f4b7640 100644 --- a/modelopt/recipe/_config_loader.py +++ b/modelopt/recipe/_config_loader.py @@ -62,7 +62,7 @@ def _parse_exmy(s: str) -> tuple[int, int] | str: return s -def load_config(config_file: str | Path | Traversable) -> dict[str, Any]: +def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]: """Load a config yaml. config_file: Path to a config yaml file. The path suffix can be omitted. @@ -106,8 +106,8 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any]: _raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) if _raw is None: return {} - if not isinstance(_raw, dict): + if not isinstance(_raw, (dict, list)): raise ValueError( - f"Config file {config_path} must contain a YAML mapping, got {type(_raw).__name__}" + f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}" ) return _parse_exmy_num_bits(_raw) diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index 3a9c66fb22..342e615d06 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -20,6 +20,7 @@ except ImportError: # Python < 3.11 from importlib.abc import Traversable from pathlib import Path +from typing import Any from ._config_loader import BUILTIN_RECIPES_LIB, load_config from .config import ModelOptPTQRecipe, ModelOptRecipeBase, RecipeType @@ -27,6 +28,87 @@ __all__ = ["load_config", "load_recipe"] +def _resolve_imports( + data: dict[str, Any], _loading: frozenset[str] | None = None +) -> dict[str, Any]: + """Resolve the ``imports`` section in a recipe and substitute named references. + + An ``imports`` block is a dict mapping short names to config file paths:: + + imports: + fp8: configs/numerics/fp8 + nvfp4: configs/numerics/nvfp4_dynamic + + ``cfg`` values in ``quant_cfg`` entries that are plain strings are looked up + against the imported names and replaced with the loaded config dict. + + Resolution is **recursive**: an imported snippet may itself contain an + ``imports`` section. Circular imports are detected and raise ``ValueError``. + """ + imports_dict = data.pop("imports", None) + if not imports_dict: + return data + + if not isinstance(imports_dict, dict): + raise ValueError( + f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}" + ) + + if _loading is None: + _loading = frozenset() + + # Build name → config mapping (recursively resolve nested imports) + import_map: dict[str, Any] = {} + for name, config_path in imports_dict.items(): + if not config_path: + raise ValueError(f"Import {name!r} has an empty config path.") + if config_path in _loading: + raise ValueError( + f"Circular import detected: {config_path!r} is already being loaded. " + f"Import chain: {sorted(_loading)}" + ) + snippet = load_config(config_path) + if isinstance(snippet, dict) and "imports" in snippet: + snippet = _resolve_imports(snippet, _loading | {config_path}) + import_map[name] = snippet + + # Resolve string references in quant_cfg entries + quantize = data.get("quantize") + if isinstance(quantize, dict): + quant_cfg = quantize.get("quant_cfg") + if isinstance(quant_cfg, list): + resolved_cfg: list[Any] = [] + for entry in quant_cfg: + if isinstance(entry, str): + # Entire entry is a string → replace with the imported value + if entry not in import_map: + raise ValueError( + f"Unknown import reference {entry!r} in quant_cfg list. " + f"Available imports: {list(import_map.keys())}" + ) + imported = import_map[entry] + if isinstance(imported, list): + # List import → splice all entries in place + resolved_cfg.extend(imported) + else: + resolved_cfg.append(imported) + elif isinstance(entry, dict) and isinstance(entry.get("cfg"), str): + # cfg field is a string → replace cfg value + ref_name = entry["cfg"] + if ref_name not in import_map: + raise ValueError( + f"Unknown import reference {ref_name!r} in quant_cfg entry " + f"{entry!r}. Available imports: {list(import_map.keys())}" + ) + entry["cfg"] = import_map[ref_name] + resolved_cfg.append(entry) + else: + resolved_cfg.append(entry) + quantize["quant_cfg"] = resolved_cfg + + return data + + def _resolve_recipe_path(recipe_path: str | Path | Traversable) -> Path | Traversable: """Resolve a recipe path, checking the built-in library first then the filesystem. @@ -86,7 +168,9 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas The file must contain a ``metadata`` section with at least ``recipe_type``, plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes. """ - data = load_config(recipe_file) + raw = load_config(recipe_file) + assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping." + data = _resolve_imports(raw) metadata = data.get("metadata", {}) recipe_type = metadata.get("recipe_type") @@ -117,7 +201,9 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase: f"Cannot find a recipe descriptor in {recipe_dir}. Looked for: recipe.yml, recipe.yaml" ) - metadata = load_config(recipe_file).get("metadata", {}) + recipe_data = load_config(recipe_file) + assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping." + metadata = recipe_data.get("metadata", {}) recipe_type = metadata.get("recipe_type") if recipe_type is None: raise ValueError(f"Recipe file {recipe_file} must contain a 'metadata.recipe_type' field.") @@ -133,9 +219,17 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase: raise ValueError( f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml" ) + # Resolve imports: imports are in recipe.yml, quantize data is separate + quantize_data = load_config(quantize_file) + assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping." + combined: dict[str, Any] = {"quantize": quantize_data} + imports = recipe_data.get("imports") + if imports: + combined["imports"] = imports + combined = _resolve_imports(combined) return ModelOptPTQRecipe( recipe_type=RecipeType.PTQ, description=metadata.get("description", "PTQ recipe."), - quantize=load_config(quantize_file), + quantize=combined["quantize"], ) raise ValueError(f"Unsupported recipe type: {recipe_type!r}") diff --git a/modelopt_recipes/configs/numerics/fp8.yml b/modelopt_recipes/configs/numerics/fp8.yml new file mode 100644 index 0000000000..e84779c8f4 --- /dev/null +++ b/modelopt_recipes/configs/numerics/fp8.yml @@ -0,0 +1,2 @@ +# FP8 E4M3 quantizer attributes (no axis — used for KV cache, etc.). +num_bits: e4m3 diff --git a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml new file mode 100644 index 0000000000..335e357a7f --- /dev/null +++ b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml @@ -0,0 +1,6 @@ +# NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales. +num_bits: e2m1 +block_sizes: + -1: 16 + type: dynamic + scale_bits: e4m3 diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yml new file mode 100644 index 0000000000..90d15bf489 --- /dev/null +++ b/modelopt_recipes/configs/numerics/nvfp4_static.yml @@ -0,0 +1,6 @@ +# NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales. +num_bits: e2m1 +block_sizes: + -1: 16 + type: static + scale_bits: e4m3 diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/base_disable_all.yaml new file mode 100644 index 0000000000..7035b55c5f --- /dev/null +++ b/modelopt_recipes/configs/ptq/base_disable_all.yaml @@ -0,0 +1,3 @@ +# Disable all quantizers by default (deny-all-then-configure pattern). +quantizer_name: '*' +enable: false diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml new file mode 100644 index 0000000000..98934ae725 --- /dev/null +++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml @@ -0,0 +1,33 @@ +# Standard quantizer exclusions: layers that should not be quantized. + - quantizer_name: '*block_sparse_moe.gate*' + enable: false + - quantizer_name: '*linear_attn.conv1d*' + enable: false + - quantizer_name: '*lm_head*' + enable: false + - quantizer_name: '*mixer.conv1d*' + enable: false + - quantizer_name: '*mlp.gate.*' + enable: false + - quantizer_name: '*mlp.shared_expert_gate.*' + enable: false + - quantizer_name: '*output_layer*' + enable: false + - quantizer_name: '*proj_out.*' + enable: false + - quantizer_name: '*router*' + enable: false + - quantizer_name: 'output.*' + enable: false + - parent_class: 'nn.BatchNorm1d' + quantizer_name: '*' + enable: false + - parent_class: 'nn.BatchNorm2d' + quantizer_name: '*' + enable: false + - parent_class: 'nn.BatchNorm3d' + quantizer_name: '*' + enable: false + - parent_class: 'nn.LeakyReLU' + quantizer_name: '*' + enable: false diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index c80904e8eb..cfe6f1269b 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -13,55 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. +imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + fp8: configs/numerics/fp8 + metadata: recipe_type: ptq description: FP8 per-tensor weight and activation (W8A8), FP8 KV cache, max calibration. quantize: algorithm: max quant_cfg: - - quantizer_name: '*' - enable: false + - base_disable_all - quantizer_name: '*input_quantizer' - cfg: - num_bits: e4m3 - axis: + cfg: fp8 - quantizer_name: '*weight_quantizer' - cfg: - num_bits: e4m3 - axis: + cfg: fp8 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: - num_bits: e4m3 - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + cfg: fp8 + - default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index 6fe4a8c3d1..37e46bef56 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -13,63 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + nvfp4: configs/numerics/nvfp4_dynamic + fp8: configs/numerics/fp8 + metadata: recipe_type: ptq description: NVFP4 MLP/MoE weight only (W4A16), FP8 KV cache, max calibration. quantize: algorithm: max quant_cfg: - - quantizer_name: '*' - enable: false + - base_disable_all - quantizer_name: '*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: - num_bits: e4m3 - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + cfg: fp8 + - default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml index a62051b659..73b3fada7e 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml @@ -13,6 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + nvfp4_static: configs/numerics/nvfp4_static + nvfp4_dynamic: configs/numerics/nvfp4_dynamic + metadata: recipe_type: ptq description: NVFP4 weight and activation (W4A4), gptq sequential calibration. @@ -21,53 +27,11 @@ quantize: method: gptq use_sequential: true quant_cfg: - - quantizer_name: '*' - enable: false + - base_disable_all - quantizer_name: '*weight_quantizer' - cfg: - block_sizes: - -1: 16 - type: static - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4_static - quantizer_name: '*input_quantizer' - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4_dynamic - quantizer_name: '*[kv]_bmm_quantizer' enable: false - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + - default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index cc332733a0..7177a6d8aa 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -13,79 +13,32 @@ # See the License for the specific language governing permissions and # limitations under the License. +imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + nvfp4: configs/numerics/nvfp4_dynamic + fp8: configs/numerics/fp8 + metadata: recipe_type: ptq description: NVFP4 static weight and dynamic activation for expert layers only (W4A4), FP8 KV cache, max calibration. quantize: algorithm: max quant_cfg: - - quantizer_name: '*' - enable: false + - base_disable_all - quantizer_name: '*mlp.experts*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*mlp.experts*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: - num_bits: e4m3 - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + cfg: fp8 + - default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index 0222274af0..990d686d5a 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -13,79 +13,32 @@ # See the License for the specific language governing permissions and # limitations under the License. +imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + nvfp4: configs/numerics/nvfp4_dynamic + fp8: configs/numerics/fp8 + metadata: recipe_type: ptq description: NVFP4 static weight and dynamic activation for all linear layers (W4A4), FP8 KV cache, max calibration. quantize: algorithm: max quant_cfg: - - quantizer_name: '*' - enable: false + - base_disable_all - quantizer_name: '*mlp*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*mlp*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: - num_bits: e4m3 - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + cfg: fp8 + - default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index 3fdd79888d..5b92c97714 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -13,95 +13,38 @@ # See the License for the specific language governing permissions and # limitations under the License. +imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + nvfp4: configs/numerics/nvfp4_dynamic + fp8: configs/numerics/fp8 + metadata: recipe_type: ptq description: NVFP4 static weight and dynamic activation for all linear layers including output projections, FP8 KV cache, max calibration. quantize: algorithm: max quant_cfg: - - quantizer_name: '*' - enable: false + - base_disable_all - quantizer_name: '*mlp*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*mlp*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*o_proj*weight_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*o_proj*input_quantizer' enable: true - cfg: - block_sizes: - -1: 16 - type: dynamic - scale_bits: e4m3 - num_bits: e2m1 + cfg: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: - num_bits: e4m3 - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + cfg: fp8 + - default_disabled diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index b8da2d140f..706d99d023 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -107,6 +107,8 @@ def test_load_recipe_builtin_description(): _BUILTIN_PTQ_RECIPES = [ "general/ptq/fp8_default-fp8_kv", "general/ptq/nvfp4_default-fp8_kv", + "general/ptq/nvfp4_default-none_kv_gptq", + "general/ptq/nvfp4_experts_only-fp8_kv", "general/ptq/nvfp4_mlp_only-fp8_kv", "general/ptq/nvfp4_omlp_only-fp8_kv", ] @@ -249,3 +251,277 @@ def _sort_key(entry): assert sorted(python_entries, key=_sort_key) == sorted(yaml_entries, key=_sort_key) assert model_cfg["algorithm"] == yaml_data["quantize"]["algorithm"] + + +# --------------------------------------------------------------------------- +# imports — named config snippet resolution +# --------------------------------------------------------------------------- + + +def test_import_resolves_cfg_reference(tmp_path): + """String cfg values are replaced with the imported config dict.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg: fp8\n" + ) + recipe = load_recipe(recipe_file) + entry = recipe.quantize["quant_cfg"][0] + assert entry["cfg"] == {"num_bits": (4, 3), "axis": None} + + +def test_import_same_name_used_twice(tmp_path): + """The same import can be referenced in multiple quant_cfg entries.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg: fp8\n" + f" - quantizer_name: '*input_quantizer'\n" + f" cfg: fp8\n" + ) + recipe = load_recipe(recipe_file) + assert recipe.quantize["quant_cfg"][0]["cfg"] == recipe.quantize["quant_cfg"][1]["cfg"] + + +def test_import_multiple_snippets(tmp_path): + """Multiple imports with different names resolve independently.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") + (tmp_path / "nvfp4.yml").write_text("num_bits: e2m1\nblock_sizes:\n -1: 16\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f" nvfp4: {tmp_path / 'nvfp4.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg: nvfp4\n" + f" - quantizer_name: '*[kv]_bmm_quantizer'\n" + f" cfg: fp8\n" + ) + recipe = load_recipe(recipe_file) + assert recipe.quantize["quant_cfg"][0]["cfg"]["num_bits"] == (2, 1) + assert recipe.quantize["quant_cfg"][1]["cfg"]["num_bits"] == (4, 3) + + +def test_import_inline_cfg_not_affected(tmp_path): + """Inline dict cfg entries are not touched by import resolution.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg: fp8\n" + f" - quantizer_name: '*input_quantizer'\n" + f" cfg:\n" + f" num_bits: 8\n" + f" axis: 0\n" + ) + recipe = load_recipe(recipe_file) + assert recipe.quantize["quant_cfg"][1]["cfg"] == {"num_bits": 8, "axis": 0} + + +def test_import_unknown_reference_raises(tmp_path): + """Referencing an undefined import name raises ValueError.""" + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + "imports:\n" + " fp8: configs/numerics/fp8\n" + "metadata:\n" + " recipe_type: ptq\n" + "quantize:\n" + " algorithm: max\n" + " quant_cfg:\n" + " - quantizer_name: '*weight_quantizer'\n" + " cfg: nonexistent\n" + ) + with pytest.raises(ValueError, match="Unknown import reference"): + load_recipe(recipe_file) + + +def test_import_empty_path_raises(tmp_path): + """Import with empty config path raises ValueError.""" + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + "imports:\n" + " fp8:\n" + "metadata:\n" + " recipe_type: ptq\n" + "quantize:\n" + " algorithm: max\n" + " quant_cfg: []\n" + ) + with pytest.raises(ValueError, match="empty config path"): + load_recipe(recipe_file) + + +def test_import_not_a_dict_raises(tmp_path): + """Import section that is not a dict raises ValueError.""" + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + "imports:\n" + " - configs/numerics/fp8\n" + "metadata:\n" + " recipe_type: ptq\n" + "quantize:\n" + " algorithm: max\n" + " quant_cfg: []\n" + ) + with pytest.raises(ValueError, match="must be a dict"): + load_recipe(recipe_file) + + +def test_import_no_imports_section(tmp_path): + """Recipes without imports load normally.""" + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + "metadata:\n" + " recipe_type: ptq\n" + "quantize:\n" + " algorithm: max\n" + " quant_cfg:\n" + " - quantizer_name: '*'\n" + " enable: false\n" + ) + recipe = load_recipe(recipe_file) + assert recipe.quantize["quant_cfg"][0]["enable"] is False + + +def test_import_builtin_config_snippet(): + """Imports can reference built-in config snippets by relative path.""" + recipe = load_recipe("general/ptq/fp8_default-fp8_kv") + # This recipe doesn't use imports, but verify it still loads fine + assert recipe.quantize + + +def test_import_dir_format(tmp_path): + """Imports in recipe.yml work with the directory recipe format.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") + (tmp_path / "recipe.yml").write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f" description: Dir with imports.\n" + ) + (tmp_path / "quantize.yml").write_text( + "algorithm: max\nquant_cfg:\n - quantizer_name: '*weight_quantizer'\n cfg: fp8\n" + ) + recipe = load_recipe(tmp_path) + assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3), "axis": None} + + +# --------------------------------------------------------------------------- +# imports — recursive resolution and cycle detection +# --------------------------------------------------------------------------- + + +def test_import_recursive(tmp_path): + """A snippet can itself import other snippets.""" + # base snippet — no imports + (tmp_path / "base.yml").write_text("num_bits: e4m3\n") + # mid-level snippet imports base + (tmp_path / "mid.yml").write_text( + f"imports:\n base: {tmp_path / 'base.yml'}\nnum_bits: base\n" + ) + # recipe imports mid + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" mid: {tmp_path / 'mid.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg: mid\n" + ) + recipe = load_recipe(recipe_file) + # mid.yml's "num_bits: base" should have been resolved to the base snippet's content + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + assert cfg["num_bits"] == {"num_bits": (4, 3)} + + +def test_import_circular_raises(tmp_path): + """Circular imports are detected and raise ValueError.""" + (tmp_path / "a.yml").write_text(f"imports:\n b: {tmp_path / 'b.yml'}\nnum_bits: 8\n") + (tmp_path / "b.yml").write_text(f"imports:\n a: {tmp_path / 'a.yml'}\nnum_bits: 4\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" a: {tmp_path / 'a.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg: []\n" + ) + with pytest.raises(ValueError, match="Circular import"): + load_recipe(recipe_file) + + +def test_import_cross_file_same_name_no_conflict(tmp_path): + """Same import name in parent and child files resolve independently (no conflict). + + recipe.yml imports ``fmt`` → fp8.yml (num_bits: e4m3) + recipe.yml also imports ``child`` → child.yml + child.yml imports ``fmt`` → nvfp4.yml (num_bits: e2m1, block_sizes: ...) + + The parent's ``fmt`` and the child's ``fmt`` are different configs. + The parent should get fp8 for its own ``fmt`` reference, and the child's + ``fmt`` should be resolved within the child's scope only. + """ + # Two different snippets, both will be imported under the name "fmt" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + (tmp_path / "nvfp4.yml").write_text("num_bits: e2m1\nblock_sizes:\n -1: 16\n") + + # Child snippet imports "fmt" → nvfp4 + (tmp_path / "child.yml").write_text( + f"imports:\n fmt: {tmp_path / 'nvfp4.yml'}\nweight_format: fmt\n" + ) + + # Parent recipe imports "fmt" → fp8, and also imports "child" + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fmt: {tmp_path / 'fp8.yml'}\n" + f" child: {tmp_path / 'child.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg: fmt\n" + ) + recipe = load_recipe(recipe_file) + + # Parent's "fmt" should resolve to fp8 (e4m3), not nvfp4 + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + assert cfg == {"num_bits": (4, 3)} diff --git a/tools/precommit/check_modelopt_recipes.py b/tools/precommit/check_modelopt_recipes.py index b964b4b040..f31145bc50 100644 --- a/tools/precommit/check_modelopt_recipes.py +++ b/tools/precommit/check_modelopt_recipes.py @@ -48,6 +48,9 @@ def _check_quant_cfg(quant_cfg, label: str) -> list[str]: ) elif isinstance(quant_cfg, list): for i, entry in enumerate(quant_cfg): + if isinstance(entry, str): + # String entries are import references — resolved at load time + continue if not isinstance(entry, dict): errors.append( f"{label}: quant_cfg[{i}] must be a dict with " From 99120f89faed981afdc65f46817211b7182f8129 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 16:34:34 -0700 Subject: [PATCH 02/30] reimplement using $import Signed-off-by: Shengliang Xu --- .pre-commit-config.yaml | 1 + modelopt/recipe/loader.py | 53 ++++---- .../general/ptq/fp8_default-fp8_kv.yaml | 13 +- .../general/ptq/nvfp4_default-fp8_kv.yaml | 13 +- .../ptq/nvfp4_default-none_kv_gptq.yaml | 10 +- .../ptq/nvfp4_experts_only-fp8_kv.yaml | 19 +-- .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 19 +-- .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 25 ++-- tests/unit/recipe/test_loader.py | 117 ++++++++++++------ tools/precommit/check_modelopt_recipes.py | 10 +- 10 files changed, 178 insertions(+), 102 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0fc5c8eeaa..dd546394c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -68,6 +68,7 @@ repos: entry: python tools/precommit/check_modelopt_recipes.py language: system files: ^modelopt_recipes/ + # configs/ contains reusable snippets (not full recipes) — skip recipe validation exclude: ^modelopt_recipes/configs/ # Instructions to change license file if ever needed: diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index 342e615d06..68d0f32c63 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -28,10 +28,13 @@ __all__ = ["load_config", "load_recipe"] +_IMPORT_KEY = "$import" + + def _resolve_imports( data: dict[str, Any], _loading: frozenset[str] | None = None ) -> dict[str, Any]: - """Resolve the ``imports`` section in a recipe and substitute named references. + """Resolve the ``imports`` section and ``$import`` references in a recipe. An ``imports`` block is a dict mapping short names to config file paths:: @@ -39,8 +42,14 @@ def _resolve_imports( fp8: configs/numerics/fp8 nvfp4: configs/numerics/nvfp4_dynamic - ``cfg`` values in ``quant_cfg`` entries that are plain strings are looked up - against the imported names and replaced with the loaded config dict. + References use the explicit ``$import`` marker so they are never confused + with literal string values:: + + quant_cfg: + - $import: base_disable_all # entire entry replaced (or list spliced) + - quantizer_name: '*weight_quantizer' + cfg: + $import: fp8 # cfg value replaced Resolution is **recursive**: an imported snippet may itself contain an ``imports`` section. Circular imports are detected and raise ``ValueError``. @@ -72,35 +81,35 @@ def _resolve_imports( snippet = _resolve_imports(snippet, _loading | {config_path}) import_map[name] = snippet - # Resolve string references in quant_cfg entries + def _lookup(ref_name: str, context: str) -> Any: + if ref_name not in import_map: + raise ValueError( + f"Unknown $import reference {ref_name!r} in {context}. " + f"Available imports: {list(import_map.keys())}" + ) + return import_map[ref_name] + + # Resolve $import references in quant_cfg entries quantize = data.get("quantize") if isinstance(quantize, dict): quant_cfg = quantize.get("quant_cfg") if isinstance(quant_cfg, list): resolved_cfg: list[Any] = [] for entry in quant_cfg: - if isinstance(entry, str): - # Entire entry is a string → replace with the imported value - if entry not in import_map: - raise ValueError( - f"Unknown import reference {entry!r} in quant_cfg list. " - f"Available imports: {list(import_map.keys())}" - ) - imported = import_map[entry] + if isinstance(entry, dict) and _IMPORT_KEY in entry: + # {$import: name} → replace entire entry (or splice list) + imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry") if isinstance(imported, list): - # List import → splice all entries in place resolved_cfg.extend(imported) else: resolved_cfg.append(imported) - elif isinstance(entry, dict) and isinstance(entry.get("cfg"), str): - # cfg field is a string → replace cfg value - ref_name = entry["cfg"] - if ref_name not in import_map: - raise ValueError( - f"Unknown import reference {ref_name!r} in quant_cfg entry " - f"{entry!r}. Available imports: {list(import_map.keys())}" - ) - entry["cfg"] = import_map[ref_name] + elif ( + isinstance(entry, dict) + and isinstance(entry.get("cfg"), dict) + and _IMPORT_KEY in entry["cfg"] + ): + # cfg: {$import: name} → replace cfg value + entry["cfg"] = _lookup(entry["cfg"][_IMPORT_KEY], f"cfg of {entry}") resolved_cfg.append(entry) else: resolved_cfg.append(entry) diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index cfe6f1269b..5048b4f74c 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -24,12 +24,15 @@ metadata: quantize: algorithm: max quant_cfg: - - base_disable_all + - $import: base_disable_all - quantizer_name: '*input_quantizer' - cfg: fp8 + cfg: + $import: fp8 - quantizer_name: '*weight_quantizer' - cfg: fp8 + cfg: + $import: fp8 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: fp8 - - default_disabled + cfg: + $import: fp8 + - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index 37e46bef56..d5ba09c44d 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -25,14 +25,17 @@ metadata: quantize: algorithm: max quant_cfg: - - base_disable_all + - $import: base_disable_all - quantizer_name: '*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: fp8 - - default_disabled + cfg: + $import: fp8 + - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml index 73b3fada7e..45db9aa80c 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml @@ -27,11 +27,13 @@ quantize: method: gptq use_sequential: true quant_cfg: - - base_disable_all + - $import: base_disable_all - quantizer_name: '*weight_quantizer' - cfg: nvfp4_static + cfg: + $import: nvfp4_static - quantizer_name: '*input_quantizer' - cfg: nvfp4_dynamic + cfg: + $import: nvfp4_dynamic - quantizer_name: '*[kv]_bmm_quantizer' enable: false - - default_disabled + - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index 7177a6d8aa..c41005b8fd 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -25,20 +25,25 @@ metadata: quantize: algorithm: max quant_cfg: - - base_disable_all + - $import: base_disable_all - quantizer_name: '*mlp.experts*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*mlp.experts*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: fp8 - - default_disabled + cfg: + $import: fp8 + - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index 990d686d5a..86b8ba7fac 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -25,20 +25,25 @@ metadata: quantize: algorithm: max quant_cfg: - - base_disable_all + - $import: base_disable_all - quantizer_name: '*mlp*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*mlp*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: fp8 - - default_disabled + cfg: + $import: fp8 + - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index 5b92c97714..c329849ad0 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -25,26 +25,33 @@ metadata: quantize: algorithm: max quant_cfg: - - base_disable_all + - $import: base_disable_all - quantizer_name: '*mlp*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*mlp*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*o_proj*weight_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*o_proj*input_quantizer' enable: true - cfg: nvfp4 + cfg: + $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: true - cfg: fp8 - - default_disabled + cfg: + $import: fp8 + - $import: default_disabled diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index 706d99d023..d217518732 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -259,7 +259,7 @@ def _sort_key(entry): def test_import_resolves_cfg_reference(tmp_path): - """String cfg values are replaced with the imported config dict.""" + """$import in cfg is replaced with the imported config dict.""" (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( @@ -271,7 +271,8 @@ def test_import_resolves_cfg_reference(tmp_path): f" algorithm: max\n" f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" - f" cfg: fp8\n" + f" cfg:\n" + f" $import: fp8\n" ) recipe = load_recipe(recipe_file) entry = recipe.quantize["quant_cfg"][0] @@ -291,9 +292,11 @@ def test_import_same_name_used_twice(tmp_path): f" algorithm: max\n" f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" - f" cfg: fp8\n" + f" cfg:\n" + f" $import: fp8\n" f" - quantizer_name: '*input_quantizer'\n" - f" cfg: fp8\n" + f" cfg:\n" + f" $import: fp8\n" ) recipe = load_recipe(recipe_file) assert recipe.quantize["quant_cfg"][0]["cfg"] == recipe.quantize["quant_cfg"][1]["cfg"] @@ -314,9 +317,11 @@ def test_import_multiple_snippets(tmp_path): f" algorithm: max\n" f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" - f" cfg: nvfp4\n" + f" cfg:\n" + f" $import: nvfp4\n" f" - quantizer_name: '*[kv]_bmm_quantizer'\n" - f" cfg: fp8\n" + f" cfg:\n" + f" $import: fp8\n" ) recipe = load_recipe(recipe_file) assert recipe.quantize["quant_cfg"][0]["cfg"]["num_bits"] == (2, 1) @@ -324,7 +329,7 @@ def test_import_multiple_snippets(tmp_path): def test_import_inline_cfg_not_affected(tmp_path): - """Inline dict cfg entries are not touched by import resolution.""" + """Inline dict cfg entries without $import are not touched.""" (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( @@ -336,7 +341,8 @@ def test_import_inline_cfg_not_affected(tmp_path): f" algorithm: max\n" f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" - f" cfg: fp8\n" + f" cfg:\n" + f" $import: fp8\n" f" - quantizer_name: '*input_quantizer'\n" f" cfg:\n" f" num_bits: 8\n" @@ -358,9 +364,10 @@ def test_import_unknown_reference_raises(tmp_path): " algorithm: max\n" " quant_cfg:\n" " - quantizer_name: '*weight_quantizer'\n" - " cfg: nonexistent\n" + " cfg:\n" + " $import: nonexistent\n" ) - with pytest.raises(ValueError, match="Unknown import reference"): + with pytest.raises(ValueError, match=r"Unknown \$import reference"): load_recipe(recipe_file) @@ -412,11 +419,57 @@ def test_import_no_imports_section(tmp_path): assert recipe.quantize["quant_cfg"][0]["enable"] is False -def test_import_builtin_config_snippet(): - """Imports can reference built-in config snippets by relative path.""" +def test_import_builtin_recipe_with_imports(): + """Built-in recipes using $import load and resolve correctly.""" recipe = load_recipe("general/ptq/fp8_default-fp8_kv") - # This recipe doesn't use imports, but verify it still loads fine assert recipe.quantize + # Verify $import was resolved — cfg should be a dict, not a {$import: ...} marker + for entry in recipe.quantize["quant_cfg"]: + if "cfg" in entry and entry["cfg"] is not None: + assert "$import" not in entry["cfg"], f"Unresolved $import in {entry}" + + +def test_import_entry_dict_replacement(tmp_path): + """$import as a quant_cfg list entry replaces with the imported dict.""" + (tmp_path / "disable.yml").write_text("quantizer_name: '*'\nenable: false\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" disable_all: {tmp_path / 'disable.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - $import: disable_all\n" + ) + recipe = load_recipe(recipe_file) + assert recipe.quantize["quant_cfg"][0] == {"quantizer_name": "*", "enable": False} + + +def test_import_entry_list_splice(tmp_path): + """$import as a quant_cfg list entry splices a list-valued snippet.""" + (tmp_path / "disables.yml").write_text( + "- quantizer_name: '*lm_head*'\n enable: false\n" + "- quantizer_name: '*router*'\n enable: false\n" + ) + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" disables: {tmp_path / 'disables.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*'\n" + f" enable: false\n" + f" - $import: disables\n" + ) + recipe = load_recipe(recipe_file) + assert len(recipe.quantize["quant_cfg"]) == 3 + assert recipe.quantize["quant_cfg"][1]["quantizer_name"] == "*lm_head*" + assert recipe.quantize["quant_cfg"][2]["quantizer_name"] == "*router*" def test_import_dir_format(tmp_path): @@ -430,7 +483,11 @@ def test_import_dir_format(tmp_path): f" description: Dir with imports.\n" ) (tmp_path / "quantize.yml").write_text( - "algorithm: max\nquant_cfg:\n - quantizer_name: '*weight_quantizer'\n cfg: fp8\n" + "algorithm: max\n" + "quant_cfg:\n" + " - quantizer_name: '*weight_quantizer'\n" + " cfg:\n" + " $import: fp8\n" ) recipe = load_recipe(tmp_path) assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3), "axis": None} @@ -443,13 +500,10 @@ def test_import_dir_format(tmp_path): def test_import_recursive(tmp_path): """A snippet can itself import other snippets.""" - # base snippet — no imports (tmp_path / "base.yml").write_text("num_bits: e4m3\n") - # mid-level snippet imports base (tmp_path / "mid.yml").write_text( - f"imports:\n base: {tmp_path / 'base.yml'}\nnum_bits: base\n" + f"imports:\n base: {tmp_path / 'base.yml'}\nnum_bits:\n $import: base\n" ) - # recipe imports mid recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( f"imports:\n" @@ -460,11 +514,12 @@ def test_import_recursive(tmp_path): f" algorithm: max\n" f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" - f" cfg: mid\n" + f" cfg:\n" + f" $import: mid\n" ) recipe = load_recipe(recipe_file) - # mid.yml's "num_bits: base" should have been resolved to the base snippet's content cfg = recipe.quantize["quant_cfg"][0]["cfg"] + # mid.yml resolved "num_bits: {$import: base}" → base.yml content assert cfg["num_bits"] == {"num_bits": (4, 3)} @@ -487,26 +542,12 @@ def test_import_circular_raises(tmp_path): def test_import_cross_file_same_name_no_conflict(tmp_path): - """Same import name in parent and child files resolve independently (no conflict). - - recipe.yml imports ``fmt`` → fp8.yml (num_bits: e4m3) - recipe.yml also imports ``child`` → child.yml - child.yml imports ``fmt`` → nvfp4.yml (num_bits: e2m1, block_sizes: ...) - - The parent's ``fmt`` and the child's ``fmt`` are different configs. - The parent should get fp8 for its own ``fmt`` reference, and the child's - ``fmt`` should be resolved within the child's scope only. - """ - # Two different snippets, both will be imported under the name "fmt" + """Same import name in parent and child resolve independently (scoped).""" (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") (tmp_path / "nvfp4.yml").write_text("num_bits: e2m1\nblock_sizes:\n -1: 16\n") - - # Child snippet imports "fmt" → nvfp4 (tmp_path / "child.yml").write_text( f"imports:\n fmt: {tmp_path / 'nvfp4.yml'}\nweight_format: fmt\n" ) - - # Parent recipe imports "fmt" → fp8, and also imports "child" recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( f"imports:\n" @@ -518,10 +559,10 @@ def test_import_cross_file_same_name_no_conflict(tmp_path): f" algorithm: max\n" f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" - f" cfg: fmt\n" + f" cfg:\n" + f" $import: fmt\n" ) recipe = load_recipe(recipe_file) - - # Parent's "fmt" should resolve to fp8 (e4m3), not nvfp4 + # Parent's "fmt" resolves to fp8 (e4m3), not child's nvfp4 cfg = recipe.quantize["quant_cfg"][0]["cfg"] assert cfg == {"num_bits": (4, 3)} diff --git a/tools/precommit/check_modelopt_recipes.py b/tools/precommit/check_modelopt_recipes.py index f31145bc50..600de317b5 100644 --- a/tools/precommit/check_modelopt_recipes.py +++ b/tools/precommit/check_modelopt_recipes.py @@ -48,20 +48,20 @@ def _check_quant_cfg(quant_cfg, label: str) -> list[str]: ) elif isinstance(quant_cfg, list): for i, entry in enumerate(quant_cfg): - if isinstance(entry, str): - # String entries are import references — resolved at load time - continue if not isinstance(entry, dict): errors.append( f"{label}: quant_cfg[{i}] must be a dict with " - f"'quantizer_name', got {type(entry).__name__}. " + f"'quantizer_name' or '$import', got {type(entry).__name__}. " "See https://nvidia.github.io/Model-Optimizer/guides/_quant_cfg.html" ) continue + # {$import: name} entries are resolved at load time + if "$import" in entry: + continue if "quantizer_name" not in entry: errors.append( f"{label}: quant_cfg[{i}] is missing 'quantizer_name'. " - "Each entry must have an explicit 'quantizer_name' key. " + "Each entry must have an explicit 'quantizer_name' or '$import' key. " "See https://nvidia.github.io/Model-Optimizer/guides/_quant_cfg.html" ) return errors From f3caa85f5c6716fd9ba7c4b43b7e845600adae13 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 16:41:31 -0700 Subject: [PATCH 03/30] remove enable: true Signed-off-by: Shengliang Xu --- modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml | 1 - modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml | 3 --- .../general/ptq/nvfp4_experts_only-fp8_kv.yaml | 5 ----- modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 5 ----- modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 7 ------- 5 files changed, 21 deletions(-) diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index 5048b4f74c..c6eedb824a 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -32,7 +32,6 @@ quantize: cfg: $import: fp8 - quantizer_name: '*[kv]_bmm_quantizer' - enable: true cfg: $import: fp8 - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index d5ba09c44d..65b73f9d02 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -27,15 +27,12 @@ quantize: quant_cfg: - $import: base_disable_all - quantizer_name: '*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' - enable: true cfg: $import: fp8 - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index c41005b8fd..9d17dbab5a 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -27,23 +27,18 @@ quantize: quant_cfg: - $import: base_disable_all - quantizer_name: '*mlp.experts*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*mlp.experts*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' - enable: true cfg: $import: fp8 - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index 86b8ba7fac..47bd5e62e6 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -27,23 +27,18 @@ quantize: quant_cfg: - $import: base_disable_all - quantizer_name: '*mlp*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*mlp*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' - enable: true cfg: $import: fp8 - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index c329849ad0..732255b0e9 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -27,31 +27,24 @@ quantize: quant_cfg: - $import: base_disable_all - quantizer_name: '*mlp*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*mlp*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*block_sparse_moe*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*block_sparse_moe*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*o_proj*weight_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*o_proj*input_quantizer' - enable: true cfg: $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' - enable: true cfg: $import: fp8 - $import: default_disabled From f29aed84fe4b819aed6e9784b653cfc472022065 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 16:59:35 -0700 Subject: [PATCH 04/30] remove incorrect indent Signed-off-by: Shengliang Xu --- .pre-commit-config.yaml | 3 +- .../ptq/default_disabled_quantizers.yaml | 65 ++++++++++--------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dd546394c3..b48c9fdf2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,7 +53,8 @@ repos: hooks: - id: yamlfmt args: [--mapping=2, --sequence=4, --offset=2, --implicit_start, --implicit_end, --preserve-quotes] - exclude: ^.github/workflows/ + # configs/ contains reusable snippets that may be top-level YAML lists — yamlfmt misformats these + exclude: ^(.github/workflows/|modelopt_recipes/configs/) - repo: local hooks: diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml index 98934ae725..4b181e8f38 100644 --- a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml +++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml @@ -1,33 +1,34 @@ # Standard quantizer exclusions: layers that should not be quantized. - - quantizer_name: '*block_sparse_moe.gate*' - enable: false - - quantizer_name: '*linear_attn.conv1d*' - enable: false - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*mixer.conv1d*' - enable: false - - quantizer_name: '*mlp.gate.*' - enable: false - - quantizer_name: '*mlp.shared_expert_gate.*' - enable: false - - quantizer_name: '*output_layer*' - enable: false - - quantizer_name: '*proj_out.*' - enable: false - - quantizer_name: '*router*' - enable: false - - quantizer_name: 'output.*' - enable: false - - parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false - - parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + +- quantizer_name: '*block_sparse_moe.gate*' + enable: false +- quantizer_name: '*linear_attn.conv1d*' + enable: false +- quantizer_name: '*lm_head*' + enable: false +- quantizer_name: '*mixer.conv1d*' + enable: false +- quantizer_name: '*mlp.gate.*' + enable: false +- quantizer_name: '*mlp.shared_expert_gate.*' + enable: false +- quantizer_name: '*output_layer*' + enable: false +- quantizer_name: '*proj_out.*' + enable: false +- quantizer_name: '*router*' + enable: false +- quantizer_name: 'output.*' + enable: false +- parent_class: 'nn.BatchNorm1d' + quantizer_name: '*' + enable: false +- parent_class: 'nn.BatchNorm2d' + quantizer_name: '*' + enable: false +- parent_class: 'nn.BatchNorm3d' + quantizer_name: '*' + enable: false +- parent_class: 'nn.LeakyReLU' + quantizer_name: '*' + enable: false From eb0842b439bfb137824f74fbb0e2fb4b1203ccc4 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 17:01:01 -0700 Subject: [PATCH 05/30] remove filter Signed-off-by: Shengliang Xu --- .pre-commit-config.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b48c9fdf2f..dd546394c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -53,8 +53,7 @@ repos: hooks: - id: yamlfmt args: [--mapping=2, --sequence=4, --offset=2, --implicit_start, --implicit_end, --preserve-quotes] - # configs/ contains reusable snippets that may be top-level YAML lists — yamlfmt misformats these - exclude: ^(.github/workflows/|modelopt_recipes/configs/) + exclude: ^.github/workflows/ - repo: local hooks: From d69260605cdaf727cfead1d2318543a70f47df36 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 17:12:49 -0700 Subject: [PATCH 06/30] simplify list import Signed-off-by: Shengliang Xu --- modelopt/recipe/loader.py | 13 ++-- .../configs/ptq/base_disable_all.yaml | 5 +- .../ptq/default_disabled_quantizers.yaml | 64 +++++++++---------- tests/unit/recipe/test_loader.py | 25 +++++++- 4 files changed, 65 insertions(+), 42 deletions(-) diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index 68d0f32c63..0191266346 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -97,12 +97,15 @@ def _lookup(ref_name: str, context: str) -> Any: resolved_cfg: list[Any] = [] for entry in quant_cfg: if isinstance(entry, dict) and _IMPORT_KEY in entry: - # {$import: name} → replace entire entry (or splice list) + # {$import: name} → splice imported list into quant_cfg imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry") - if isinstance(imported, list): - resolved_cfg.extend(imported) - else: - resolved_cfg.append(imported) + if not isinstance(imported, list): + raise ValueError( + f"$import {entry[_IMPORT_KEY]!r} in quant_cfg must resolve to a " + f"list, got {type(imported).__name__}. Config snippets used as " + f"quant_cfg entries must be YAML lists." + ) + resolved_cfg.extend(imported) elif ( isinstance(entry, dict) and isinstance(entry.get("cfg"), dict) diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/base_disable_all.yaml index 7035b55c5f..fbe6cf514c 100644 --- a/modelopt_recipes/configs/ptq/base_disable_all.yaml +++ b/modelopt_recipes/configs/ptq/base_disable_all.yaml @@ -1,3 +1,4 @@ # Disable all quantizers by default (deny-all-then-configure pattern). -quantizer_name: '*' -enable: false + + - quantizer_name: '*' + enable: false diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml index 4b181e8f38..7c1cd532fb 100644 --- a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml +++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml @@ -1,34 +1,34 @@ # Standard quantizer exclusions: layers that should not be quantized. -- quantizer_name: '*block_sparse_moe.gate*' - enable: false -- quantizer_name: '*linear_attn.conv1d*' - enable: false -- quantizer_name: '*lm_head*' - enable: false -- quantizer_name: '*mixer.conv1d*' - enable: false -- quantizer_name: '*mlp.gate.*' - enable: false -- quantizer_name: '*mlp.shared_expert_gate.*' - enable: false -- quantizer_name: '*output_layer*' - enable: false -- quantizer_name: '*proj_out.*' - enable: false -- quantizer_name: '*router*' - enable: false -- quantizer_name: 'output.*' - enable: false -- parent_class: 'nn.BatchNorm1d' - quantizer_name: '*' - enable: false -- parent_class: 'nn.BatchNorm2d' - quantizer_name: '*' - enable: false -- parent_class: 'nn.BatchNorm3d' - quantizer_name: '*' - enable: false -- parent_class: 'nn.LeakyReLU' - quantizer_name: '*' - enable: false + - quantizer_name: '*block_sparse_moe.gate*' + enable: false + - quantizer_name: '*linear_attn.conv1d*' + enable: false + - quantizer_name: '*lm_head*' + enable: false + - quantizer_name: '*mixer.conv1d*' + enable: false + - quantizer_name: '*mlp.gate.*' + enable: false + - quantizer_name: '*mlp.shared_expert_gate.*' + enable: false + - quantizer_name: '*output_layer*' + enable: false + - quantizer_name: '*proj_out.*' + enable: false + - quantizer_name: '*router*' + enable: false + - quantizer_name: 'output.*' + enable: false + - parent_class: 'nn.BatchNorm1d' + quantizer_name: '*' + enable: false + - parent_class: 'nn.BatchNorm2d' + quantizer_name: '*' + enable: false + - parent_class: 'nn.BatchNorm3d' + quantizer_name: '*' + enable: false + - parent_class: 'nn.LeakyReLU' + quantizer_name: '*' + enable: false diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index d217518732..e6a5cdf90e 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -429,9 +429,9 @@ def test_import_builtin_recipe_with_imports(): assert "$import" not in entry["cfg"], f"Unresolved $import in {entry}" -def test_import_entry_dict_replacement(tmp_path): - """$import as a quant_cfg list entry replaces with the imported dict.""" - (tmp_path / "disable.yml").write_text("quantizer_name: '*'\nenable: false\n") +def test_import_entry_single_element_list(tmp_path): + """$import splices a single-element list snippet into quant_cfg.""" + (tmp_path / "disable.yml").write_text("- quantizer_name: '*'\n enable: false\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( f"imports:\n" @@ -444,9 +444,28 @@ def test_import_entry_dict_replacement(tmp_path): f" - $import: disable_all\n" ) recipe = load_recipe(recipe_file) + assert len(recipe.quantize["quant_cfg"]) == 1 assert recipe.quantize["quant_cfg"][0] == {"quantizer_name": "*", "enable": False} +def test_import_entry_non_list_raises(tmp_path): + """$import in quant_cfg list position raises if snippet is not a list.""" + (tmp_path / "disable.yml").write_text("quantizer_name: '*'\nenable: false\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" disable_all: {tmp_path / 'disable.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - $import: disable_all\n" + ) + with pytest.raises(ValueError, match="must resolve to a list"): + load_recipe(recipe_file) + + def test_import_entry_list_splice(tmp_path): """$import as a quant_cfg list entry splices a list-valued snippet.""" (tmp_path / "disables.yml").write_text( From e267edc9c6bd651b26dcc59905da670468913d40 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 17:46:34 -0700 Subject: [PATCH 07/30] update docs Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 100 +++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 8 deletions(-) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index 468a1d2d6c..d684efc197 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -125,6 +125,79 @@ example: axis: +Composable imports +------------------ + +Recipes can import **reusable config snippets** via the ``imports`` section. +This eliminates duplication — numeric format definitions and standard exclusion +lists are authored once and referenced by name across recipes. + +The ``imports`` section is a dict mapping short names to config file paths. +References use the explicit ``{$import: name}`` marker so they are never +confused with literal values. The marker can appear anywhere in the recipe: + +- As a **dict value** — the marker is replaced with the snippet content. +- As a **list element** — the snippet (which must itself be a list) is spliced + into the surrounding list. + +.. code-block:: yaml + + imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + fp8: configs/numerics/fp8 + + metadata: + recipe_type: ptq + description: FP8 W8A8, FP8 KV cache. + + quantize: + algorithm: max + quant_cfg: + - $import: base_disable_all # spliced from a single-element list snippet + - quantizer_name: '*weight_quantizer' + cfg: + $import: fp8 # cfg value replaced with imported dict + - $import: default_disabled # spliced from a multi-element list snippet + +In this example: + +- ``$import: base_disable_all`` and ``$import: default_disabled`` are **list elements** + — their snippets (YAML lists) are spliced into ``quant_cfg``. +- ``$import: fp8`` under ``cfg`` is a **dict value** — the snippet (a YAML dict of + quantizer attributes) replaces the ``cfg`` field. + +Import paths are resolved via :func:`~modelopt.recipe.load_config` — the +built-in ``modelopt_recipes/`` library is checked first, then the filesystem. + +**Recursive imports:** An imported snippet may itself contain an ``imports`` +section. Each file's imports are scoped to that file — the same name can be +used in different files without conflict. Circular imports are detected and +raise ``ValueError``. + +Built-in config snippets +^^^^^^^^^^^^^^^^^^^^^^^^ + +Reusable snippets are stored under ``modelopt_recipes/configs/``: + +.. list-table:: + :header-rows: 1 + :widths: 45 55 + + * - Snippet path + - Description + * - ``configs/numerics/fp8`` + - FP8 E4M3 quantizer attributes + * - ``configs/numerics/nvfp4_dynamic`` + - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales + * - ``configs/numerics/nvfp4_static`` + - NVFP4 E2M1 blockwise, static calibration, FP8 scales + * - ``configs/ptq/base_disable_all`` + - Disable all quantizers (deny-all-then-configure pattern) + * - ``configs/ptq/default_disabled_quantizers`` + - Standard exclusions (LM head, routers, BatchNorm, etc.) + + Metadata section ================ @@ -355,11 +428,15 @@ To create a custom recipe: 3. Update the ``metadata.description`` to describe your changes. 4. Save the file (or directory) and pass its path to ``load_recipe()`` or ``--recipe``. -Example -- creating a custom PTQ recipe (INT8 per-channel): +Example -- creating a custom PTQ recipe using imports: .. code-block:: yaml # my_int8_recipe.yml + imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + metadata: recipe_type: ptq description: INT8 per-channel weight, per-tensor activation. @@ -367,8 +444,7 @@ Example -- creating a custom PTQ recipe (INT8 per-channel): quantize: algorithm: max quant_cfg: - - quantizer_name: '*' - enable: false + - $import: base_disable_all - quantizer_name: '*weight_quantizer' cfg: num_bits: 8 @@ -377,10 +453,11 @@ Example -- creating a custom PTQ recipe (INT8 per-channel): cfg: num_bits: 8 axis: - - quantizer_name: '*lm_head*' - enable: false - - quantizer_name: '*output_layer*' - enable: false + - $import: default_disabled + +The built-in snippets (``base_disable_all``, ``default_disabled``) handle the +deny-all prefix and standard exclusions. Only the format-specific entries need +to be written inline. Recipe repository layout @@ -402,7 +479,14 @@ The ``modelopt_recipes/`` package is organized as follows: +-- models/ # Model-specific recipes | +-- Step3.5-Flash/ | +-- nvfp4-mlp-only.yaml - +-- configs/ # Shared configuration fragments + +-- configs/ # Reusable config snippets (imported via $import) + +-- numerics/ # Numeric format definitions + | +-- fp8.yml + | +-- nvfp4_dynamic.yml + | +-- nvfp4_static.yml + +-- ptq/ # PTQ-specific entry snippets + +-- base_disable_all.yaml + +-- default_disabled_quantizers.yaml Recipe data model From bc47154eee203d6abab2f47f45d2acf268d533d9 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 18:23:27 -0700 Subject: [PATCH 08/30] add import override semantic Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 31 +++++++ modelopt/recipe/loader.py | 41 +++++++++- tests/unit/recipe/test_loader.py | 131 ++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+), 2 deletions(-) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index d684efc197..5abbf0a5c6 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -140,6 +140,37 @@ confused with literal values. The marker can appear anywhere in the recipe: - As a **list element** — the snippet (which must itself be a list) is spliced into the surrounding list. +As a **dict value**, ``$import`` supports three composition modes: + +- **Single import:** ``$import: name`` — replaced with the snippet content. +- **Multiple imports:** ``$import: [name1, name2]`` — snippets are merged into + one dict. The snippets must not have overlapping keys. +- **Import + extend:** extra keys alongside ``$import`` are merged in after the + import(s). Extra keys must not conflict with any imported key. + +.. code-block:: yaml + + # Single import + cfg: + $import: fp8 + + # Multiple imports — merge two non-overlapping snippets + cfg: + $import: [bits, scale] + + # Import + extend — add axis on top of imported fp8 + cfg: + $import: fp8 + axis: 0 # result: {num_bits: e4m3, axis: 0} + +Key conflicts are never allowed — whether between imported snippets or between +imports and inline keys. If a key appears in more than one source, the loader +raises an error. This avoids ambiguous merge semantics. If you need different +values for an existing key, create a new snippet instead. + +As a **list element**, ``$import`` must be the only key — extra keys alongside +a list splice are not supported. + .. code-block:: yaml imports: diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index 0191266346..232a3c71ee 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -98,6 +98,11 @@ def _lookup(ref_name: str, context: str) -> Any: for entry in quant_cfg: if isinstance(entry, dict) and _IMPORT_KEY in entry: # {$import: name} → splice imported list into quant_cfg + if len(entry) > 1: + raise ValueError( + f"$import must be the only key in the dict, got extra keys: " + f"{sorted(k for k in entry if k != _IMPORT_KEY)}" + ) imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry") if not isinstance(imported, list): raise ValueError( @@ -111,8 +116,40 @@ def _lookup(ref_name: str, context: str) -> Any: and isinstance(entry.get("cfg"), dict) and _IMPORT_KEY in entry["cfg"] ): - # cfg: {$import: name} → replace cfg value - entry["cfg"] = _lookup(entry["cfg"][_IMPORT_KEY], f"cfg of {entry}") + # cfg: {$import: name_or_list, ...extra} → import, merge, extend + ref = entry["cfg"].pop(_IMPORT_KEY) + extra_keys = dict(entry["cfg"]) # remaining inline keys + ref_names = ref if isinstance(ref, list) else [ref] + + # Merge all imported snippets, detecting conflicts between them + merged: dict[str, Any] = {} + for name in ref_names: + snippet = _lookup(name, f"cfg of {entry}") + if not isinstance(snippet, dict): + raise ValueError( + f"$import {name!r} in cfg must resolve to a dict, " + f"got {type(snippet).__name__}." + ) + conflicts = set(snippet) & set(merged) + if conflicts: + raise ValueError( + f"$import {name!r} conflicts with keys from prior imports: " + f"{sorted(conflicts)}. Imported snippets must not overlap." + ) + merged.update(snippet) + + # Extend with inline keys, detecting conflicts with imports + if extra_keys: + conflicts = set(extra_keys) & set(merged) + if conflicts: + raise ValueError( + f"Inline keys {sorted(conflicts)} conflict with imported " + f"values. Cannot override imported values — create a new " + f"snippet instead." + ) + merged.update(extra_keys) + + entry["cfg"] = merged resolved_cfg.append(entry) else: resolved_cfg.append(entry) diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index e6a5cdf90e..d335f1fd47 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -491,6 +491,137 @@ def test_import_entry_list_splice(tmp_path): assert recipe.quantize["quant_cfg"][2]["quantizer_name"] == "*router*" +def test_import_entry_sibling_keys_raises(tmp_path): + """$import as a list entry with sibling keys raises ValueError.""" + (tmp_path / "disable.yml").write_text("- quantizer_name: '*'\n enable: false\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" disable_all: {tmp_path / 'disable.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - $import: disable_all\n" + f" quantizer_name: '*extra*'\n" + ) + with pytest.raises(ValueError, match="must be the only key"): + load_recipe(recipe_file) + + +def test_import_cfg_extend(tmp_path): + """$import in cfg with extra non-conflicting keys extends the snippet.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: fp8\n" + f" axis: 0\n" + ) + recipe = load_recipe(recipe_file) + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + assert cfg == {"num_bits": (4, 3), "axis": 0} + + +def test_import_cfg_conflict_raises(tmp_path): + """$import in cfg with conflicting keys raises ValueError.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: fp8\n" + f" num_bits: 8\n" + ) + with pytest.raises(ValueError, match="conflict with imported"): + load_recipe(recipe_file) + + +def test_import_cfg_multi_import(tmp_path): + """$import with a list of names merges non-overlapping snippets.""" + (tmp_path / "bits.yml").write_text("num_bits: e4m3\n") + (tmp_path / "axis.yml").write_text("axis: 0\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" bits: {tmp_path / 'bits.yml'}\n" + f" axis: {tmp_path / 'axis.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: [bits, axis]\n" + ) + recipe = load_recipe(recipe_file) + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + assert cfg == {"num_bits": (4, 3), "axis": 0} + + +def test_import_cfg_multi_import_conflict_raises(tmp_path): + """$import with a list of names raises when snippets have overlapping keys.""" + (tmp_path / "a.yml").write_text("num_bits: e4m3\n") + (tmp_path / "b.yml").write_text("num_bits: 8\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" a: {tmp_path / 'a.yml'}\n" + f" b: {tmp_path / 'b.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: [a, b]\n" + ) + with pytest.raises(ValueError, match="conflicts with keys from prior imports"): + load_recipe(recipe_file) + + +def test_import_cfg_multi_import_with_extend(tmp_path): + """$import list + inline keys all merge without conflicts.""" + (tmp_path / "bits.yml").write_text("num_bits: e4m3\n") + (tmp_path / "scale.yml").write_text("scale_bits: e8m0\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" bits: {tmp_path / 'bits.yml'}\n" + f" scale: {tmp_path / 'scale.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: [bits, scale]\n" + f" axis: 0\n" + ) + recipe = load_recipe(recipe_file) + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + assert cfg == {"num_bits": (4, 3), "scale_bits": (8, 0), "axis": 0} + + def test_import_dir_format(tmp_path): """Imports in recipe.yml work with the directory recipe format.""" (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") From dbb524d9026e7ac2aebe18253e15ae357259cf01 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 18:32:27 -0700 Subject: [PATCH 09/30] more clear docs Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 49 +++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index 5abbf0a5c6..fe99a22e1a 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -54,14 +54,18 @@ A recipe contains two top-level sections: ``metadata`` and a type-specific configuration section (for example, ``quantize`` for PTQ recipes). These can live in a single YAML file or be split across files in a directory. +Recipes support two authoring styles: **inline** (all values written directly) +and **import-based** (reusable snippets referenced via ``$import``). Both +styles can be used in a single-file or directory layout. + Single-file format ------------------ -The simplest form is a single ``.yml`` or ``.yaml`` file. Here is a PTQ example: +The simplest form is a single ``.yml`` or ``.yaml`` file. -.. code-block:: yaml +**Inline style** — all config values are written directly: - # modelopt_recipes/general/ptq/fp8_default-fp8_kv.yml +.. code-block:: yaml metadata: recipe_type: ptq @@ -81,11 +85,42 @@ The simplest form is a single ``.yml`` or ``.yaml`` file. Here is a PTQ example num_bits: e4m3 axis: - quantizer_name: '*[kv]_bmm_quantizer' - enable: true cfg: num_bits: e4m3 # ... standard exclusions omitted for brevity +**Import style** — the same recipe using reusable config snippets: + +.. code-block:: yaml + + imports: + base_disable_all: configs/ptq/base_disable_all + default_disabled: configs/ptq/default_disabled_quantizers + fp8: configs/numerics/fp8 + + metadata: + recipe_type: ptq + description: FP8 per-tensor weight and activation (W8A8), FP8 KV cache, max calibration. + + quantize: + algorithm: max + quant_cfg: + - $import: base_disable_all + - quantizer_name: '*input_quantizer' + cfg: + $import: fp8 + - quantizer_name: '*weight_quantizer' + cfg: + $import: fp8 + - quantizer_name: '*[kv]_bmm_quantizer' + cfg: + $import: fp8 + - $import: default_disabled + +Both styles produce identical results at load time. The import style reduces +duplication when multiple recipes share the same numeric formats or exclusion +lists. See :ref:`composable-imports` below for the full ``$import`` specification. + Directory format ---------------- @@ -96,7 +131,7 @@ example: .. code-block:: text my_recipe/ - recipe.yml # metadata section + recipe.yml # metadata section (+ optional imports) quantize.yml # quantize section (quant_cfg + algorithm) ``recipe.yml``: @@ -124,6 +159,10 @@ example: num_bits: e4m3 axis: +Both inline and import styles work with the directory format. When using +imports in a directory recipe, place the ``imports`` section in ``recipe.yml``. + +.. _composable-imports: Composable imports ------------------ From 99414905b929e2422fda0ecd339f49874df31036 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Mon, 13 Apr 2026 18:38:13 -0700 Subject: [PATCH 10/30] changelog Signed-off-by: Shengliang Xu --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index fdd738590a..3712d505f9 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,7 @@ Changelog - Enable PTQ workflow for the Step3.5-Flash MoE model with NVFP4 W4A4 + FP8 KV cache quantization. See `modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml `_ for more details. - Add support for vLLM fakequant reload using ModelOpt state for HF models. See `examples/vllm_serve/README.md `_ for more details. - [Early Testing] Add Claude Code PTQ skill (``.claude/skills/ptq/``) for agent-assisted post-training quantization. The skill guides the agent through environment detection, model support checking, format selection, and execution via the launcher or manual SLURM/Docker/bare GPU paths. Includes handling for unlisted models with custom module patching. This feature is in early testing — use with caution. +- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content (with optional extend and multi-import via ``$import: [a, b]``), as a list element it splices the snippet entries. Key conflicts between imports or inline keys raise errors. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification. **Backward Breaking Changes** From 74235a9e246898c982ae27f4e3cdfa8f71039680 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Tue, 14 Apr 2026 10:13:41 -0700 Subject: [PATCH 11/30] new conflict semantic Signed-off-by: Shengliang Xu --- CHANGELOG.rst | 2 +- docs/source/guides/10_recipes.rst | 37 ++++++++++++++++++------------- modelopt/recipe/loader.py | 27 ++++++---------------- tests/unit/recipe/test_loader.py | 27 +++++++++++++--------- 4 files changed, 46 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3712d505f9..e2c4b2a7c0 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,7 +15,7 @@ Changelog - Enable PTQ workflow for the Step3.5-Flash MoE model with NVFP4 W4A4 + FP8 KV cache quantization. See `modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml `_ for more details. - Add support for vLLM fakequant reload using ModelOpt state for HF models. See `examples/vllm_serve/README.md `_ for more details. - [Early Testing] Add Claude Code PTQ skill (``.claude/skills/ptq/``) for agent-assisted post-training quantization. The skill guides the agent through environment detection, model support checking, format selection, and execution via the launcher or manual SLURM/Docker/bare GPU paths. Includes handling for unlisted models with custom module patching. This feature is in early testing — use with caution. -- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content (with optional extend and multi-import via ``$import: [a, b]``), as a list element it splices the snippet entries. Key conflicts between imports or inline keys raise errors. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification. +- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content with ordered override precedence (later imports override earlier, inline keys override all), as a list element it splices the snippet entries. Supports multi-import (``$import: [a, b]``) and inline extension/override. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification. **Backward Breaking Changes** diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index fe99a22e1a..d26c5d3671 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -179,33 +179,38 @@ confused with literal values. The marker can appear anywhere in the recipe: - As a **list element** — the snippet (which must itself be a list) is spliced into the surrounding list. -As a **dict value**, ``$import`` supports three composition modes: +As a **dict value**, ``$import`` supports composition with clear override +precedence (lowest to highest): -- **Single import:** ``$import: name`` — replaced with the snippet content. -- **Multiple imports:** ``$import: [name1, name2]`` — snippets are merged into - one dict. The snippets must not have overlapping keys. -- **Import + extend:** extra keys alongside ``$import`` are merged in after the - import(s). Extra keys must not conflict with any imported key. +1. **Imports in list order** — ``$import: [base, override]``: later snippets + override earlier ones on key conflicts. +2. **Inline keys** — extra keys alongside ``$import`` override all imported + values. + +This is equivalent to calling ``dict.update()`` in order: imports first (in +list order), then inline keys last. .. code-block:: yaml # Single import cfg: - $import: fp8 + $import: nvfp4 - # Multiple imports — merge two non-overlapping snippets + # Import + override — import nvfp4_dynamic, then override type inline cfg: - $import: [bits, scale] + $import: nvfp4 # imports {num_bits: e2m1, block_sizes: {-1: 16, type: dynamic, ...}} + block_sizes: + -1: 16 + type: static # overrides type: dynamic → static calibration - # Import + extend — add axis on top of imported fp8 + # Multiple imports — later snippet overrides earlier on conflict cfg: - $import: fp8 - axis: 0 # result: {num_bits: e4m3, axis: 0} + $import: [base_format, kv_tweaks] # kv_tweaks wins on shared keys -Key conflicts are never allowed — whether between imported snippets or between -imports and inline keys. If a key appears in more than one source, the loader -raises an error. This avoids ambiguous merge semantics. If you need different -values for an existing key, create a new snippet instead. + # All three: multi-import + inline override + cfg: + $import: [bits, scale] + axis: 0 # highest precedence As a **list element**, ``$import`` must be the only key — extra keys alongside a list splice are not supported. diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index 232a3c71ee..32f610e12b 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -116,12 +116,15 @@ def _lookup(ref_name: str, context: str) -> Any: and isinstance(entry.get("cfg"), dict) and _IMPORT_KEY in entry["cfg"] ): - # cfg: {$import: name_or_list, ...extra} → import, merge, extend + # cfg: {$import: name_or_list, ...inline} → import then override + # + # Precedence (lowest → highest): + # 1. Imports in list order (later imports override earlier) + # 2. Inline keys (override all imports) ref = entry["cfg"].pop(_IMPORT_KEY) - extra_keys = dict(entry["cfg"]) # remaining inline keys + inline_keys = dict(entry["cfg"]) # remaining inline keys ref_names = ref if isinstance(ref, list) else [ref] - # Merge all imported snippets, detecting conflicts between them merged: dict[str, Any] = {} for name in ref_names: snippet = _lookup(name, f"cfg of {entry}") @@ -130,25 +133,9 @@ def _lookup(ref_name: str, context: str) -> Any: f"$import {name!r} in cfg must resolve to a dict, " f"got {type(snippet).__name__}." ) - conflicts = set(snippet) & set(merged) - if conflicts: - raise ValueError( - f"$import {name!r} conflicts with keys from prior imports: " - f"{sorted(conflicts)}. Imported snippets must not overlap." - ) merged.update(snippet) - # Extend with inline keys, detecting conflicts with imports - if extra_keys: - conflicts = set(extra_keys) & set(merged) - if conflicts: - raise ValueError( - f"Inline keys {sorted(conflicts)} conflict with imported " - f"values. Cannot override imported values — create a new " - f"snippet instead." - ) - merged.update(extra_keys) - + merged.update(inline_keys) entry["cfg"] = merged resolved_cfg.append(entry) else: diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index d335f1fd47..723fbdcd35 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -532,9 +532,9 @@ def test_import_cfg_extend(tmp_path): assert cfg == {"num_bits": (4, 3), "axis": 0} -def test_import_cfg_conflict_raises(tmp_path): - """$import in cfg with conflicting keys raises ValueError.""" - (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") +def test_import_cfg_inline_overrides_import(tmp_path): + """Inline keys override imported values (highest precedence).""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\naxis:\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( f"imports:\n" @@ -549,8 +549,12 @@ def test_import_cfg_conflict_raises(tmp_path): f" $import: fp8\n" f" num_bits: 8\n" ) - with pytest.raises(ValueError, match="conflict with imported"): - load_recipe(recipe_file) + recipe = load_recipe(recipe_file) + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + # inline num_bits: 8 overrides imported num_bits: e4m3 → (4,3) + assert cfg["num_bits"] == 8 + # imported axis: None is preserved (no inline override) + assert cfg["axis"] is None def test_import_cfg_multi_import(tmp_path): @@ -576,9 +580,9 @@ def test_import_cfg_multi_import(tmp_path): assert cfg == {"num_bits": (4, 3), "axis": 0} -def test_import_cfg_multi_import_conflict_raises(tmp_path): - """$import with a list of names raises when snippets have overlapping keys.""" - (tmp_path / "a.yml").write_text("num_bits: e4m3\n") +def test_import_cfg_multi_import_later_overrides_earlier(tmp_path): + """In $import list, later snippets override earlier ones on key conflicts.""" + (tmp_path / "a.yml").write_text("num_bits: e4m3\naxis: 0\n") (tmp_path / "b.yml").write_text("num_bits: 8\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( @@ -594,8 +598,11 @@ def test_import_cfg_multi_import_conflict_raises(tmp_path): f" cfg:\n" f" $import: [a, b]\n" ) - with pytest.raises(ValueError, match="conflicts with keys from prior imports"): - load_recipe(recipe_file) + recipe = load_recipe(recipe_file) + cfg = recipe.quantize["quant_cfg"][0]["cfg"] + # b overrides a's num_bits; a's axis is preserved + assert cfg["num_bits"] == 8 + assert cfg["axis"] == 0 def test_import_cfg_multi_import_with_extend(tmp_path): From 8182b74d06e67b9547e91ee36dc3844fa95faaa5 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Tue, 14 Apr 2026 15:00:31 -0700 Subject: [PATCH 12/30] support import for recipe snippets Signed-off-by: Shengliang Xu --- modelopt/recipe/_config_loader.py | 34 +++++- modelopt/recipe/loader.py | 104 ++++++++++-------- modelopt_recipes/configs/ptq/fp8_kv.yaml | 7 ++ .../general/ptq/fp8_default-fp8_kv.yaml | 5 +- .../general/ptq/nvfp4_default-fp8_kv.yaml | 6 +- .../ptq/nvfp4_experts_only-fp8_kv.yaml | 6 +- .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 6 +- .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 6 +- tests/unit/recipe/test_loader.py | 43 ++++++++ 9 files changed, 149 insertions(+), 68 deletions(-) create mode 100644 modelopt_recipes/configs/ptq/fp8_kv.yaml diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py index da6f4b7640..a94b67edb2 100644 --- a/modelopt/recipe/_config_loader.py +++ b/modelopt/recipe/_config_loader.py @@ -103,9 +103,39 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[ f"Cannot find config file of {config_file}, paths checked: {paths_to_check}" ) - _raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - if _raw is None: + text = config_path.read_text(encoding="utf-8") + docs = list(yaml.safe_load_all(text)) + + if len(docs) == 0 or docs[0] is None: return {} + if len(docs) == 1: + _raw = docs[0] + elif len(docs) == 2: + # Multi-document: first doc is imports/metadata, second is content. + # Merge the imports into the content for downstream resolution. + header, content = docs[0], docs[1] + if not isinstance(header, dict): + raise ValueError( + f"Config file {config_path}: first YAML document must be a mapping, " + f"got {type(header).__name__}" + ) + if content is None: + content = {} + if isinstance(content, dict): + _raw = {**header, **content} + elif isinstance(content, list): + # List content with a header dict — attach imports via wrapper + _raw = {**header, "_list_content": content} + else: + raise ValueError( + f"Config file {config_path}: second YAML document must be a mapping or list, " + f"got {type(content).__name__}" + ) + else: + raise ValueError( + f"Config file {config_path}: expected 1 or 2 YAML documents, got {len(docs)}" + ) + if not isinstance(_raw, (dict, list)): raise ValueError( f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}" diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index 32f610e12b..da62b17b63 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -79,6 +79,9 @@ def _resolve_imports( snippet = load_config(config_path) if isinstance(snippet, dict) and "imports" in snippet: snippet = _resolve_imports(snippet, _loading | {config_path}) + # Unwrap _list_content (multi-document YAML: imports + list content) + if isinstance(snippet, dict) and "_list_content" in snippet: + snippet = snippet["_list_content"] import_map[name] = snippet def _lookup(ref_name: str, context: str) -> Any: @@ -89,58 +92,65 @@ def _lookup(ref_name: str, context: str) -> Any: ) return import_map[ref_name] + def _resolve_list(entries: list[Any]) -> list[Any]: + """Resolve $import markers in a list of quant_cfg-style entries.""" + resolved: list[Any] = [] + for entry in entries: + if isinstance(entry, dict) and _IMPORT_KEY in entry: + # {$import: name} → splice imported list + if len(entry) > 1: + raise ValueError( + f"$import must be the only key in the dict, got extra keys: " + f"{sorted(k for k in entry if k != _IMPORT_KEY)}" + ) + imported = _lookup(entry[_IMPORT_KEY], "list entry") + if not isinstance(imported, list): + raise ValueError( + f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " + f"list, got {type(imported).__name__}." + ) + resolved.extend(imported) + elif ( + isinstance(entry, dict) + and isinstance(entry.get("cfg"), dict) + and _IMPORT_KEY in entry["cfg"] + ): + # cfg: {$import: name_or_list, ...inline} → import then override + # + # Precedence (lowest → highest): + # 1. Imports in list order (later imports override earlier) + # 2. Inline keys (override all imports) + ref = entry["cfg"].pop(_IMPORT_KEY) + inline_keys = dict(entry["cfg"]) + ref_names = ref if isinstance(ref, list) else [ref] + + merged: dict[str, Any] = {} + for name in ref_names: + snippet = _lookup(name, f"cfg of {entry}") + if not isinstance(snippet, dict): + raise ValueError( + f"$import {name!r} in cfg must resolve to a dict, " + f"got {type(snippet).__name__}." + ) + merged.update(snippet) + + merged.update(inline_keys) + entry["cfg"] = merged + resolved.append(entry) + else: + resolved.append(entry) + return resolved + # Resolve $import references in quant_cfg entries quantize = data.get("quantize") if isinstance(quantize, dict): quant_cfg = quantize.get("quant_cfg") if isinstance(quant_cfg, list): - resolved_cfg: list[Any] = [] - for entry in quant_cfg: - if isinstance(entry, dict) and _IMPORT_KEY in entry: - # {$import: name} → splice imported list into quant_cfg - if len(entry) > 1: - raise ValueError( - f"$import must be the only key in the dict, got extra keys: " - f"{sorted(k for k in entry if k != _IMPORT_KEY)}" - ) - imported = _lookup(entry[_IMPORT_KEY], "quant_cfg entry") - if not isinstance(imported, list): - raise ValueError( - f"$import {entry[_IMPORT_KEY]!r} in quant_cfg must resolve to a " - f"list, got {type(imported).__name__}. Config snippets used as " - f"quant_cfg entries must be YAML lists." - ) - resolved_cfg.extend(imported) - elif ( - isinstance(entry, dict) - and isinstance(entry.get("cfg"), dict) - and _IMPORT_KEY in entry["cfg"] - ): - # cfg: {$import: name_or_list, ...inline} → import then override - # - # Precedence (lowest → highest): - # 1. Imports in list order (later imports override earlier) - # 2. Inline keys (override all imports) - ref = entry["cfg"].pop(_IMPORT_KEY) - inline_keys = dict(entry["cfg"]) # remaining inline keys - ref_names = ref if isinstance(ref, list) else [ref] - - merged: dict[str, Any] = {} - for name in ref_names: - snippet = _lookup(name, f"cfg of {entry}") - if not isinstance(snippet, dict): - raise ValueError( - f"$import {name!r} in cfg must resolve to a dict, " - f"got {type(snippet).__name__}." - ) - merged.update(snippet) - - merged.update(inline_keys) - entry["cfg"] = merged - resolved_cfg.append(entry) - else: - resolved_cfg.append(entry) - quantize["quant_cfg"] = resolved_cfg + quantize["quant_cfg"] = _resolve_list(quant_cfg) + + # Resolve $import references in _list_content (multi-document snippets) + if "_list_content" in data: + data["_list_content"] = _resolve_list(data["_list_content"]) return data diff --git a/modelopt_recipes/configs/ptq/fp8_kv.yaml b/modelopt_recipes/configs/ptq/fp8_kv.yaml new file mode 100644 index 0000000000..cb3ff3a009 --- /dev/null +++ b/modelopt_recipes/configs/ptq/fp8_kv.yaml @@ -0,0 +1,7 @@ +# FP8 E4M3 KV cache quantization. +imports: + fp8: configs/numerics/fp8 +--- + - quantizer_name: '*[kv]_bmm_quantizer' + cfg: + $import: fp8 diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index c6eedb824a..680677d607 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -17,6 +17,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers fp8: configs/numerics/fp8 + fp8_kv: configs/ptq/fp8_kv metadata: recipe_type: ptq @@ -31,7 +32,5 @@ quantize: - quantizer_name: '*weight_quantizer' cfg: $import: fp8 - - quantizer_name: '*[kv]_bmm_quantizer' - cfg: - $import: fp8 + - $import: fp8_kv - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index 65b73f9d02..9dc6da1ace 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -17,7 +17,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers nvfp4: configs/numerics/nvfp4_dynamic - fp8: configs/numerics/fp8 + fp8_kv: configs/ptq/fp8_kv metadata: recipe_type: ptq @@ -32,7 +32,5 @@ quantize: - quantizer_name: '*input_quantizer' cfg: $import: nvfp4 - - quantizer_name: '*[kv]_bmm_quantizer' - cfg: - $import: fp8 + - $import: fp8_kv - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index 9d17dbab5a..a3730f839e 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -17,7 +17,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers nvfp4: configs/numerics/nvfp4_dynamic - fp8: configs/numerics/fp8 + fp8_kv: configs/ptq/fp8_kv metadata: recipe_type: ptq @@ -38,7 +38,5 @@ quantize: - quantizer_name: '*block_sparse_moe*input_quantizer' cfg: $import: nvfp4 - - quantizer_name: '*[kv]_bmm_quantizer' - cfg: - $import: fp8 + - $import: fp8_kv - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index 47bd5e62e6..0d9d0861ca 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -17,7 +17,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers nvfp4: configs/numerics/nvfp4_dynamic - fp8: configs/numerics/fp8 + fp8_kv: configs/ptq/fp8_kv metadata: recipe_type: ptq @@ -38,7 +38,5 @@ quantize: - quantizer_name: '*block_sparse_moe*input_quantizer' cfg: $import: nvfp4 - - quantizer_name: '*[kv]_bmm_quantizer' - cfg: - $import: fp8 + - $import: fp8_kv - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index 732255b0e9..1a1fa63255 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -17,7 +17,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers nvfp4: configs/numerics/nvfp4_dynamic - fp8: configs/numerics/fp8 + fp8_kv: configs/ptq/fp8_kv metadata: recipe_type: ptq @@ -44,7 +44,5 @@ quantize: - quantizer_name: '*o_proj*input_quantizer' cfg: $import: nvfp4 - - quantizer_name: '*[kv]_bmm_quantizer' - cfg: - $import: fp8 + - $import: fp8_kv - $import: default_disabled diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index 723fbdcd35..4dd235a081 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -650,6 +650,49 @@ def test_import_dir_format(tmp_path): assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3), "axis": None} +# --------------------------------------------------------------------------- +# imports — multi-document snippets +# --------------------------------------------------------------------------- + + +def test_import_multi_document_list_snippet(tmp_path): + """List snippet using multi-document YAML (imports --- content) resolves $import.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + (tmp_path / "kv.yaml").write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"---\n" + f"- quantizer_name: '*[kv]_bmm_quantizer'\n" + f" cfg:\n" + f" $import: fp8\n" + ) + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" kv: {tmp_path / 'kv.yaml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - $import: kv\n" + ) + recipe = load_recipe(recipe_file) + assert len(recipe.quantize["quant_cfg"]) == 1 + assert recipe.quantize["quant_cfg"][0]["quantizer_name"] == "*[kv]_bmm_quantizer" + assert recipe.quantize["quant_cfg"][0]["cfg"] == {"num_bits": (4, 3)} + + +def test_import_builtin_fp8_kv_snippet(): + """Built-in fp8_kv snippet uses multi-document format and resolves correctly.""" + recipe = load_recipe("general/ptq/fp8_default-fp8_kv") + kv_entries = [ + e for e in recipe.quantize["quant_cfg"] if e.get("quantizer_name") == "*[kv]_bmm_quantizer" + ] + assert len(kv_entries) == 1 + assert kv_entries[0]["cfg"]["num_bits"] == (4, 3) + + # --------------------------------------------------------------------------- # imports — recursive resolution and cycle detection # --------------------------------------------------------------------------- From fd13e6ed6d7489367ed5591a53184409a9d0a2d1 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Tue, 14 Apr 2026 15:19:46 -0700 Subject: [PATCH 13/30] license headers + more doc Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 42 ++++++++++++++++++- modelopt_recipes/configs/numerics/fp8.yml | 15 +++++++ .../configs/numerics/nvfp4_dynamic.yml | 15 +++++++ .../configs/numerics/nvfp4_static.yml | 15 +++++++ .../configs/ptq/base_disable_all.yaml | 15 +++++++ .../ptq/default_disabled_quantizers.yaml | 15 +++++++ modelopt_recipes/configs/ptq/fp8_kv.yaml | 21 ++++++++++ 7 files changed, 137 insertions(+), 1 deletion(-) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index d26c5d3671..aa59583446 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -173,7 +173,16 @@ lists are authored once and referenced by name across recipes. The ``imports`` section is a dict mapping short names to config file paths. References use the explicit ``{$import: name}`` marker so they are never -confused with literal values. The marker can appear anywhere in the recipe: +confused with literal values. + +.. note:: + + ``imports`` (no ``$``) is a **top-level structural section** — like + ``metadata`` or ``quantize``, it declares the recipe's dependencies. + ``$import`` (with ``$``) is an **inline directive** that appears inside + data values and gets resolved at load time. + +The ``$import`` marker can appear anywhere in the recipe: - As a **dict value** — the marker is replaced with the snippet content. - As a **list element** — the snippet (which must itself be a list) is spliced @@ -250,6 +259,35 @@ section. Each file's imports are scoped to that file — the same name can be used in different files without conflict. Circular imports are detected and raise ``ValueError``. +Multi-document snippets +^^^^^^^^^^^^^^^^^^^^^^^ + +Dict-valued snippets (e.g., numeric format definitions) can use ``imports`` +directly because the ``imports`` key and the snippet content are both part of +the same YAML mapping. List-valued snippets have a problem: YAML only allows +one root node per document, so a file cannot be both a mapping (for +``imports``) and a list (for entries) at the same time. + +The solution is **multi-document YAML**: the first document holds the +``imports``, and the second document (after ``---``) holds the list content. +The loader parses both documents, resolves ``$import`` markers in the content, +and returns the resolved list: + +.. code-block:: yaml + + # configs/ptq/fp8_kv.yaml — list snippet that imports a dict snippet + imports: + fp8: configs/numerics/fp8 + --- + - quantizer_name: '*[kv]_bmm_quantizer' + cfg: + $import: fp8 + +This enables full composability — list snippets can reference dict snippets, +dict snippets can reference other dict snippets, and recipes can reference +any of them. All import resolution happens at load time with the same +precedence rules. + Built-in config snippets ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -271,6 +309,8 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``: - Disable all quantizers (deny-all-then-configure pattern) * - ``configs/ptq/default_disabled_quantizers`` - Standard exclusions (LM head, routers, BatchNorm, etc.) + * - ``configs/ptq/fp8_kv`` + - FP8 E4M3 KV cache quantization (multi-document, imports ``fp8``) Metadata section diff --git a/modelopt_recipes/configs/numerics/fp8.yml b/modelopt_recipes/configs/numerics/fp8.yml index e84779c8f4..2fd99627df 100644 --- a/modelopt_recipes/configs/numerics/fp8.yml +++ b/modelopt_recipes/configs/numerics/fp8.yml @@ -1,2 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # FP8 E4M3 quantizer attributes (no axis — used for KV cache, etc.). num_bits: e4m3 diff --git a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml index 335e357a7f..e07ba9e19b 100644 --- a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml +++ b/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales. num_bits: e2m1 block_sizes: diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yml index 90d15bf489..758be89a30 100644 --- a/modelopt_recipes/configs/numerics/nvfp4_static.yml +++ b/modelopt_recipes/configs/numerics/nvfp4_static.yml @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales. num_bits: e2m1 block_sizes: diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/base_disable_all.yaml index fbe6cf514c..35bdf2c6a4 100644 --- a/modelopt_recipes/configs/ptq/base_disable_all.yaml +++ b/modelopt_recipes/configs/ptq/base_disable_all.yaml @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Disable all quantizers by default (deny-all-then-configure pattern). - quantizer_name: '*' diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml index 7c1cd532fb..a8c04357d7 100644 --- a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml +++ b/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Standard quantizer exclusions: layers that should not be quantized. - quantizer_name: '*block_sparse_moe.gate*' diff --git a/modelopt_recipes/configs/ptq/fp8_kv.yaml b/modelopt_recipes/configs/ptq/fp8_kv.yaml index cb3ff3a009..85ff617ead 100644 --- a/modelopt_recipes/configs/ptq/fp8_kv.yaml +++ b/modelopt_recipes/configs/ptq/fp8_kv.yaml @@ -1,4 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # FP8 E4M3 KV cache quantization. +# +# This snippet uses multi-document YAML (separated by ---) because it is a +# list-valued snippet that also needs to $import another snippet. YAML only +# allows one root node per document, so a file cannot be both a mapping +# (for imports) and a list (for entries). The first document holds the +# imports, the second holds the list content that references them. imports: fp8: configs/numerics/fp8 --- From dcf10a6e7cf3546ebdd88b234a82880bd3fcd7f6 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Tue, 14 Apr 2026 15:54:09 -0700 Subject: [PATCH 14/30] more snippets Signed-off-by: Shengliang Xu --- .../configs/ptq/w8a8_fp8_fp8.yaml | 25 +++++++++++++++++++ .../general/ptq/fp8_default-fp8_kv.yaml | 9 ++----- 2 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml diff --git a/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml b/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml new file mode 100644 index 0000000000..c55cbf1d6b --- /dev/null +++ b/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# W8A8 FP8: FP8 E4M3 weight and activation quantizers. +imports: + fp8: configs/numerics/fp8 +--- + - quantizer_name: '*weight_quantizer' + cfg: + $import: fp8 + - quantizer_name: '*input_quantizer' + cfg: + $import: fp8 diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index 680677d607..6b30a04022 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -16,7 +16,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers - fp8: configs/numerics/fp8 + w8a8_fp8_fp8: configs/ptq/w8a8_fp8_fp8 fp8_kv: configs/ptq/fp8_kv metadata: @@ -26,11 +26,6 @@ quantize: algorithm: max quant_cfg: - $import: base_disable_all - - quantizer_name: '*input_quantizer' - cfg: - $import: fp8 - - quantizer_name: '*weight_quantizer' - cfg: - $import: fp8 + - $import: w8a8_fp8_fp8 - $import: fp8_kv - $import: default_disabled From dc670010bbf6086c7205a6d730db9ca6c0cee4b5 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Tue, 14 Apr 2026 17:16:16 -0700 Subject: [PATCH 15/30] nvfp4_dynamic is default Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 8 +++--- .../numerics/{nvfp4_dynamic.yml => nvfp4.yml} | 2 +- .../configs/numerics/nvfp4_static.yml | 2 +- .../configs/ptq/w4a4_nvfp4_nvfp4.yaml | 25 +++++++++++++++++++ .../general/ptq/nvfp4_default-fp8_kv.yaml | 9 ++----- .../ptq/nvfp4_default-none_kv_gptq.yaml | 4 +-- .../ptq/nvfp4_experts_only-fp8_kv.yaml | 2 +- .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 2 +- .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 2 +- 9 files changed, 38 insertions(+), 18 deletions(-) rename modelopt_recipes/configs/numerics/{nvfp4_dynamic.yml => nvfp4.yml} (88%) create mode 100644 modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index aa59583446..9a4b2e8f4a 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -205,7 +205,7 @@ list order), then inline keys last. cfg: $import: nvfp4 - # Import + override — import nvfp4_dynamic, then override type inline + # Import + override — import nvfp4, then override type inline cfg: $import: nvfp4 # imports {num_bits: e2m1, block_sizes: {-1: 16, type: dynamic, ...}} block_sizes: @@ -301,8 +301,8 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``: - Description * - ``configs/numerics/fp8`` - FP8 E4M3 quantizer attributes - * - ``configs/numerics/nvfp4_dynamic`` - - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales + * - ``configs/numerics/nvfp4`` + - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales (default) * - ``configs/numerics/nvfp4_static`` - NVFP4 E2M1 blockwise, static calibration, FP8 scales * - ``configs/ptq/base_disable_all`` @@ -597,8 +597,8 @@ The ``modelopt_recipes/`` package is organized as follows: +-- configs/ # Reusable config snippets (imported via $import) +-- numerics/ # Numeric format definitions | +-- fp8.yml - | +-- nvfp4_dynamic.yml | +-- nvfp4_static.yml + | +-- nvfp4.yml +-- ptq/ # PTQ-specific entry snippets +-- base_disable_all.yaml +-- default_disabled_quantizers.yaml diff --git a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml b/modelopt_recipes/configs/numerics/nvfp4.yml similarity index 88% rename from modelopt_recipes/configs/numerics/nvfp4_dynamic.yml rename to modelopt_recipes/configs/numerics/nvfp4.yml index e07ba9e19b..0639e51c14 100644 --- a/modelopt_recipes/configs/numerics/nvfp4_dynamic.yml +++ b/modelopt_recipes/configs/numerics/nvfp4.yml @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# NVFP4 E2M1 blockwise with dynamic calibration and FP8 E4M3 scales. +# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (dynamic calibration, the default). num_bits: e2m1 block_sizes: -1: 16 diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yml index 758be89a30..9dda0cae91 100644 --- a/modelopt_recipes/configs/numerics/nvfp4_static.yml +++ b/modelopt_recipes/configs/numerics/nvfp4_static.yml @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# NVFP4 E2M1 blockwise with static calibration and FP8 E4M3 scales. +# NVFP4 E2M1 blockwise quantizer attributes with FP8 E4M3 scales (static calibration). num_bits: e2m1 block_sizes: -1: 16 diff --git a/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml b/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml new file mode 100644 index 0000000000..2fc516e5dc --- /dev/null +++ b/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# W4A4 NVFP4: NVFP4 E2M1 dynamic weight and activation quantizers. +imports: + nvfp4: configs/numerics/nvfp4 +--- + - quantizer_name: '*weight_quantizer' + cfg: + $import: nvfp4 + - quantizer_name: '*input_quantizer' + cfg: + $import: nvfp4 diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index 9dc6da1ace..f3c368a620 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -16,7 +16,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers - nvfp4: configs/numerics/nvfp4_dynamic + w4a4_nvfp4_nvfp4: configs/ptq/w4a4_nvfp4_nvfp4 fp8_kv: configs/ptq/fp8_kv metadata: @@ -26,11 +26,6 @@ quantize: algorithm: max quant_cfg: - $import: base_disable_all - - quantizer_name: '*weight_quantizer' - cfg: - $import: nvfp4 - - quantizer_name: '*input_quantizer' - cfg: - $import: nvfp4 + - $import: w4a4_nvfp4_nvfp4 - $import: fp8_kv - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml index 45db9aa80c..1754763f65 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml @@ -17,7 +17,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers nvfp4_static: configs/numerics/nvfp4_static - nvfp4_dynamic: configs/numerics/nvfp4_dynamic + nvfp4: configs/numerics/nvfp4 metadata: recipe_type: ptq @@ -33,7 +33,7 @@ quantize: $import: nvfp4_static - quantizer_name: '*input_quantizer' cfg: - $import: nvfp4_dynamic + $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: false - $import: default_disabled diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index a3730f839e..845f45b5f7 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -16,7 +16,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers - nvfp4: configs/numerics/nvfp4_dynamic + nvfp4: configs/numerics/nvfp4 fp8_kv: configs/ptq/fp8_kv metadata: diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index 0d9d0861ca..f1ecd23acf 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -16,7 +16,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers - nvfp4: configs/numerics/nvfp4_dynamic + nvfp4: configs/numerics/nvfp4 fp8_kv: configs/ptq/fp8_kv metadata: diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index 1a1fa63255..77cf8b2b76 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -16,7 +16,7 @@ imports: base_disable_all: configs/ptq/base_disable_all default_disabled: configs/ptq/default_disabled_quantizers - nvfp4: configs/numerics/nvfp4_dynamic + nvfp4: configs/numerics/nvfp4 fp8_kv: configs/ptq/fp8_kv metadata: From 5baba0b532dfdfab950d2e6b70b6858a6b5dcd46 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 12:59:11 -0700 Subject: [PATCH 16/30] quant config Signed-off-by: Shengliang Xu --- modelopt/recipe/_config_loader.py | 128 ++++++++++++++++- modelopt/recipe/loader.py | 135 +----------------- modelopt/torch/quantization/config.py | 17 +-- .../configs/ptq/presets/README.md | 14 ++ .../configs/ptq/presets/fp8_default.yaml | 27 ++++ 5 files changed, 173 insertions(+), 148 deletions(-) create mode 100644 modelopt_recipes/configs/ptq/presets/README.md create mode 100644 modelopt_recipes/configs/ptq/presets/fp8_default.yaml diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py index a94b67edb2..922875becb 100644 --- a/modelopt/recipe/_config_loader.py +++ b/modelopt/recipe/_config_loader.py @@ -62,8 +62,8 @@ def _parse_exmy(s: str) -> tuple[int, int] | str: return s -def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]: - """Load a config yaml. +def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]: + """Load a config YAML without resolving ``$import`` references. config_file: Path to a config yaml file. The path suffix can be omitted. """ @@ -141,3 +141,127 @@ def load_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[ f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}" ) return _parse_exmy_num_bits(_raw) + + +# --------------------------------------------------------------------------- +# $import resolution +# --------------------------------------------------------------------------- + +_IMPORT_KEY = "$import" + + +def _resolve_imports( + data: dict[str, Any], _loading: frozenset[str] | None = None +) -> dict[str, Any]: + """Resolve the ``imports`` section and ``$import`` references. + + See ``modelopt.recipe.loader`` module docstring for the full specification. + This function lives in ``_config_loader`` (not ``loader``) so that it can be + used from ``modelopt.torch.quantization.config`` without circular imports. + """ + imports_dict = data.pop("imports", None) + if not imports_dict: + return data + + if not isinstance(imports_dict, dict): + raise ValueError( + f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}" + ) + + if _loading is None: + _loading = frozenset() + + # Build name → config mapping (recursively resolve nested imports) + import_map: dict[str, Any] = {} + for name, config_path in imports_dict.items(): + if not config_path: + raise ValueError(f"Import {name!r} has an empty config path.") + if config_path in _loading: + raise ValueError( + f"Circular import detected: {config_path!r} is already being loaded. " + f"Import chain: {sorted(_loading)}" + ) + snippet = _load_raw_config(config_path) + if isinstance(snippet, dict) and "imports" in snippet: + snippet = _resolve_imports(snippet, _loading | {config_path}) + # Unwrap _list_content (multi-document YAML: imports + list content) + if isinstance(snippet, dict) and "_list_content" in snippet: + snippet = snippet["_list_content"] + import_map[name] = snippet + + def _lookup(ref_name: str, context: str) -> Any: + if ref_name not in import_map: + raise ValueError( + f"Unknown $import reference {ref_name!r} in {context}. " + f"Available imports: {list(import_map.keys())}" + ) + return import_map[ref_name] + + def _resolve_list(entries: list[Any]) -> list[Any]: + """Resolve $import markers in a list of entries.""" + resolved: list[Any] = [] + for entry in entries: + if isinstance(entry, dict) and _IMPORT_KEY in entry: + if len(entry) > 1: + raise ValueError( + f"$import must be the only key in the dict, got extra keys: " + f"{sorted(k for k in entry if k != _IMPORT_KEY)}" + ) + imported = _lookup(entry[_IMPORT_KEY], "list entry") + if not isinstance(imported, list): + raise ValueError( + f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " + f"list, got {type(imported).__name__}." + ) + resolved.extend(imported) + elif ( + isinstance(entry, dict) + and isinstance(entry.get("cfg"), dict) + and _IMPORT_KEY in entry["cfg"] + ): + ref = entry["cfg"].pop(_IMPORT_KEY) + inline_keys = dict(entry["cfg"]) + ref_names = ref if isinstance(ref, list) else [ref] + + merged: dict[str, Any] = {} + for rname in ref_names: + snippet = _lookup(rname, f"cfg of {entry}") + if not isinstance(snippet, dict): + raise ValueError( + f"$import {rname!r} in cfg must resolve to a dict, " + f"got {type(snippet).__name__}." + ) + merged.update(snippet) + + merged.update(inline_keys) + entry["cfg"] = merged + resolved.append(entry) + else: + resolved.append(entry) + return resolved + + # Resolve in quant_cfg (top-level or nested under quantize) + for container in [data, data.get("quantize", {})]: + if isinstance(container, dict): + quant_cfg = container.get("quant_cfg") + if isinstance(quant_cfg, list): + container["quant_cfg"] = _resolve_list(quant_cfg) + + # Resolve in _list_content (multi-document snippets) + if "_list_content" in data: + data["_list_content"] = _resolve_list(data["_list_content"]) + + return data + + +def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]: + """Load a YAML config and resolve all ``$import`` references. + + This is the primary config loading entry point. It loads the YAML file, + resolves any ``imports`` / ``$import`` directives, and returns the final + config dict or list. + """ + data = _load_raw_config(config_path) + if isinstance(data, dict) and "imports" in data: + data = _resolve_imports(data) + return data diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index da62b17b63..f91f6d9920 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -22,139 +22,12 @@ from pathlib import Path from typing import Any -from ._config_loader import BUILTIN_RECIPES_LIB, load_config +from ._config_loader import BUILTIN_RECIPES_LIB, _load_raw_config, _resolve_imports, load_config from .config import ModelOptPTQRecipe, ModelOptRecipeBase, RecipeType __all__ = ["load_config", "load_recipe"] -_IMPORT_KEY = "$import" - - -def _resolve_imports( - data: dict[str, Any], _loading: frozenset[str] | None = None -) -> dict[str, Any]: - """Resolve the ``imports`` section and ``$import`` references in a recipe. - - An ``imports`` block is a dict mapping short names to config file paths:: - - imports: - fp8: configs/numerics/fp8 - nvfp4: configs/numerics/nvfp4_dynamic - - References use the explicit ``$import`` marker so they are never confused - with literal string values:: - - quant_cfg: - - $import: base_disable_all # entire entry replaced (or list spliced) - - quantizer_name: '*weight_quantizer' - cfg: - $import: fp8 # cfg value replaced - - Resolution is **recursive**: an imported snippet may itself contain an - ``imports`` section. Circular imports are detected and raise ``ValueError``. - """ - imports_dict = data.pop("imports", None) - if not imports_dict: - return data - - if not isinstance(imports_dict, dict): - raise ValueError( - f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}" - ) - - if _loading is None: - _loading = frozenset() - - # Build name → config mapping (recursively resolve nested imports) - import_map: dict[str, Any] = {} - for name, config_path in imports_dict.items(): - if not config_path: - raise ValueError(f"Import {name!r} has an empty config path.") - if config_path in _loading: - raise ValueError( - f"Circular import detected: {config_path!r} is already being loaded. " - f"Import chain: {sorted(_loading)}" - ) - snippet = load_config(config_path) - if isinstance(snippet, dict) and "imports" in snippet: - snippet = _resolve_imports(snippet, _loading | {config_path}) - # Unwrap _list_content (multi-document YAML: imports + list content) - if isinstance(snippet, dict) and "_list_content" in snippet: - snippet = snippet["_list_content"] - import_map[name] = snippet - - def _lookup(ref_name: str, context: str) -> Any: - if ref_name not in import_map: - raise ValueError( - f"Unknown $import reference {ref_name!r} in {context}. " - f"Available imports: {list(import_map.keys())}" - ) - return import_map[ref_name] - - def _resolve_list(entries: list[Any]) -> list[Any]: - """Resolve $import markers in a list of quant_cfg-style entries.""" - resolved: list[Any] = [] - for entry in entries: - if isinstance(entry, dict) and _IMPORT_KEY in entry: - # {$import: name} → splice imported list - if len(entry) > 1: - raise ValueError( - f"$import must be the only key in the dict, got extra keys: " - f"{sorted(k for k in entry if k != _IMPORT_KEY)}" - ) - imported = _lookup(entry[_IMPORT_KEY], "list entry") - if not isinstance(imported, list): - raise ValueError( - f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " - f"list, got {type(imported).__name__}." - ) - resolved.extend(imported) - elif ( - isinstance(entry, dict) - and isinstance(entry.get("cfg"), dict) - and _IMPORT_KEY in entry["cfg"] - ): - # cfg: {$import: name_or_list, ...inline} → import then override - # - # Precedence (lowest → highest): - # 1. Imports in list order (later imports override earlier) - # 2. Inline keys (override all imports) - ref = entry["cfg"].pop(_IMPORT_KEY) - inline_keys = dict(entry["cfg"]) - ref_names = ref if isinstance(ref, list) else [ref] - - merged: dict[str, Any] = {} - for name in ref_names: - snippet = _lookup(name, f"cfg of {entry}") - if not isinstance(snippet, dict): - raise ValueError( - f"$import {name!r} in cfg must resolve to a dict, " - f"got {type(snippet).__name__}." - ) - merged.update(snippet) - - merged.update(inline_keys) - entry["cfg"] = merged - resolved.append(entry) - else: - resolved.append(entry) - return resolved - - # Resolve $import references in quant_cfg entries - quantize = data.get("quantize") - if isinstance(quantize, dict): - quant_cfg = quantize.get("quant_cfg") - if isinstance(quant_cfg, list): - quantize["quant_cfg"] = _resolve_list(quant_cfg) - - # Resolve $import references in _list_content (multi-document snippets) - if "_list_content" in data: - data["_list_content"] = _resolve_list(data["_list_content"]) - - return data - - def _resolve_recipe_path(recipe_path: str | Path | Traversable) -> Path | Traversable: """Resolve a recipe path, checking the built-in library first then the filesystem. @@ -214,7 +87,7 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas The file must contain a ``metadata`` section with at least ``recipe_type``, plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes. """ - raw = load_config(recipe_file) + raw = _load_raw_config(recipe_file) assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping." data = _resolve_imports(raw) @@ -247,7 +120,7 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase: f"Cannot find a recipe descriptor in {recipe_dir}. Looked for: recipe.yml, recipe.yaml" ) - recipe_data = load_config(recipe_file) + recipe_data = _load_raw_config(recipe_file) assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping." metadata = recipe_data.get("metadata", {}) recipe_type = metadata.get("recipe_type") @@ -266,7 +139,7 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase: f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml" ) # Resolve imports: imports are in recipe.yml, quantize data is separate - quantize_data = load_config(quantize_file) + quantize_data = _load_raw_config(quantize_file) assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping." combined: dict[str, Any] = {"quantize": quantize_data} imports = recipe_data.get("imports") diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 99c729efbc..5535156438 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -157,6 +157,7 @@ from pydantic import ValidationInfo, field_validator, model_validator from typing_extensions import Required, TypedDict +from modelopt.recipe._config_loader import load_config from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField from modelopt.torch.utils.network import ConstructorLike @@ -272,21 +273,7 @@ def find_quant_cfg_entry_by_path( "algorithm": "max", } -FP8_DEFAULT_CFG = { - "quant_cfg": [ - *_base_disable_all, - { - "quantizer_name": "*weight_quantizer", - "cfg": {"num_bits": (4, 3), "axis": None}, - }, - { - "quantizer_name": "*input_quantizer", - "cfg": {"num_bits": (4, 3), "axis": None}, - }, - *_default_disabled_quantizer_cfg, - ], - "algorithm": "max", -} +FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/fp8_default") MAMBA_MOE_FP8_AGGRESSIVE_CFG = { "quant_cfg": [ diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md new file mode 100644 index 0000000000..80d186d6fc --- /dev/null +++ b/modelopt_recipes/configs/ptq/presets/README.md @@ -0,0 +1,14 @@ +# PTQ Preset Configs + +This directory holds preset quantization configurations that serve as the +single source of truth for the hardcoded `*_CFG` dicts in +`modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`). + +Each preset is a complete, self-contained config with `algorithm` and +`quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose +from the reusable snippets in `configs/numerics/` and `configs/ptq/` via +the `$import` system. + +When adding a new preset, use existing snippets where possible and keep +the YAML as the authoritative definition — the Python config should load +from here rather than hardcoding the dict. diff --git a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml b/modelopt_recipes/configs/ptq/presets/fp8_default.yaml new file mode 100644 index 0000000000..21ce58f4e7 --- /dev/null +++ b/modelopt_recipes/configs/ptq/presets/fp8_default.yaml @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# FP8 per-tensor weight and activation (W8A8), max calibration. +# Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py. +imports: + base_disable_all: configs/ptq/base_disable_all + w8a8: configs/ptq/w8a8_fp8_fp8 + default_disabled: configs/ptq/default_disabled_quantizers + +algorithm: max +quant_cfg: + - $import: base_disable_all + - $import: w8a8 + - $import: default_disabled From 82d5a12620e9ca057768daa1e6dad092df3ac508 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 14:06:03 -0700 Subject: [PATCH 17/30] presets Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 20 ++++++++-------- modelopt/torch/quantization/config.py | 11 ++------- .../configs/ptq/presets/README.md | 18 +++++++++----- .../configs/ptq/presets/kv/fp8.yaml | 24 +++++++++++++++++++ .../{fp8_default.yaml => model/fp8.yaml} | 6 ++--- .../ptq/{ => units}/base_disable_all.yaml | 0 .../default_disabled_quantizers.yaml | 0 .../configs/ptq/{ => units}/fp8_kv.yaml | 0 .../ptq/{ => units}/w4a4_nvfp4_nvfp4.yaml | 0 .../configs/ptq/{ => units}/w8a8_fp8_fp8.yaml | 0 .../general/ptq/fp8_default-fp8_kv.yaml | 8 +++---- .../general/ptq/nvfp4_default-fp8_kv.yaml | 8 +++---- .../ptq/nvfp4_default-none_kv_gptq.yaml | 4 ++-- .../ptq/nvfp4_experts_only-fp8_kv.yaml | 6 ++--- .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 6 ++--- .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 6 ++--- 16 files changed, 70 insertions(+), 47 deletions(-) create mode 100644 modelopt_recipes/configs/ptq/presets/kv/fp8.yaml rename modelopt_recipes/configs/ptq/presets/{fp8_default.yaml => model/fp8.yaml} (85%) rename modelopt_recipes/configs/ptq/{ => units}/base_disable_all.yaml (100%) rename modelopt_recipes/configs/ptq/{ => units}/default_disabled_quantizers.yaml (100%) rename modelopt_recipes/configs/ptq/{ => units}/fp8_kv.yaml (100%) rename modelopt_recipes/configs/ptq/{ => units}/w4a4_nvfp4_nvfp4.yaml (100%) rename modelopt_recipes/configs/ptq/{ => units}/w8a8_fp8_fp8.yaml (100%) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index 9a4b2e8f4a..a8986312f0 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -94,8 +94,8 @@ The simplest form is a single ``.yml`` or ``.yaml`` file. .. code-block:: yaml imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers fp8: configs/numerics/fp8 metadata: @@ -227,8 +227,8 @@ a list splice are not supported. .. code-block:: yaml imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers fp8: configs/numerics/fp8 metadata: @@ -275,7 +275,7 @@ and returns the resolved list: .. code-block:: yaml - # configs/ptq/fp8_kv.yaml — list snippet that imports a dict snippet + # configs/ptq/units/fp8_kv.yaml — list snippet that imports a dict snippet imports: fp8: configs/numerics/fp8 --- @@ -305,11 +305,11 @@ Reusable snippets are stored under ``modelopt_recipes/configs/``: - NVFP4 E2M1 blockwise, dynamic calibration, FP8 scales (default) * - ``configs/numerics/nvfp4_static`` - NVFP4 E2M1 blockwise, static calibration, FP8 scales - * - ``configs/ptq/base_disable_all`` + * - ``configs/ptq/units/base_disable_all`` - Disable all quantizers (deny-all-then-configure pattern) - * - ``configs/ptq/default_disabled_quantizers`` + * - ``configs/ptq/units/default_disabled_quantizers`` - Standard exclusions (LM head, routers, BatchNorm, etc.) - * - ``configs/ptq/fp8_kv`` + * - ``configs/ptq/units/fp8_kv`` - FP8 E4M3 KV cache quantization (multi-document, imports ``fp8``) @@ -549,8 +549,8 @@ Example -- creating a custom PTQ recipe using imports: # my_int8_recipe.yml imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers metadata: recipe_type: ptq diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 5535156438..5430391adb 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -273,7 +273,7 @@ def find_quant_cfg_entry_by_path( "algorithm": "max", } -FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/fp8_default") +FP8_DEFAULT_CFG: dict[str, Any] = load_config("configs/ptq/presets/model/fp8") MAMBA_MOE_FP8_AGGRESSIVE_CFG = { "quant_cfg": [ @@ -518,14 +518,7 @@ def find_quant_cfg_entry_by_path( # KV-cache configs are designed to be merged with a primary quantization config (e.g. # FP8_DEFAULT_CFG) that already contains _base_disable_all. They intentionally omit both # _base_disable_all and "algorithm" because these are provided by the primary config. -FP8_KV_CFG = { - "quant_cfg": [ - { - "quantizer_name": "*[kv]_bmm_quantizer", - "cfg": {"num_bits": (4, 3)}, - }, - ] -} +FP8_KV_CFG: dict[str, Any] = load_config("configs/ptq/presets/kv/fp8") FP8_AFFINE_KV_CFG = { "quant_cfg": [ diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md index 80d186d6fc..f8974fc78c 100644 --- a/modelopt_recipes/configs/ptq/presets/README.md +++ b/modelopt_recipes/configs/ptq/presets/README.md @@ -1,14 +1,20 @@ # PTQ Preset Configs This directory holds preset quantization configurations that serve as the -single source of truth for the hardcoded `*_CFG` dicts in +YAML source of truth for the hardcoded `*_CFG` dicts in `modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`). Each preset is a complete, self-contained config with `algorithm` and `quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose -from the reusable snippets in `configs/numerics/` and `configs/ptq/` via -the `$import` system. +from the reusable snippets in `configs/numerics/` and `configs/ptq/units/` +via the `$import` system. -When adding a new preset, use existing snippets where possible and keep -the YAML as the authoritative definition — the Python config should load -from here rather than hardcoding the dict. +**Note:** The main purpose of these presets is to support the existing +`hf_ptq.py` script's `--qformat` / `--kv_cache_qformat` flags and other +code paths that reference +the hardcoded `*_CFG` dicts, maintaining backward compatibility during +the transition to recipe-based workflows. Users are encouraged to use +`load_recipe` with full recipe files under `general/` or `models/` +instead. Some or all of these presets may be deprecated or removed in +future releases as the recipe-based workflow becomes the standard entry +point. diff --git a/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml b/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml new file mode 100644 index 0000000000..f23ba54145 --- /dev/null +++ b/modelopt_recipes/configs/ptq/presets/kv/fp8.yaml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# FP8 E4M3 KV cache quantization preset. +# Equivalent to the hardcoded FP8_KV_CFG in config.py. +# This is a partial config (no algorithm, no base_disable_all) — designed +# to be merged with a primary model quantization config. +imports: + fp8_kv: configs/ptq/units/fp8_kv + +quant_cfg: + - $import: fp8_kv diff --git a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml similarity index 85% rename from modelopt_recipes/configs/ptq/presets/fp8_default.yaml rename to modelopt_recipes/configs/ptq/presets/model/fp8.yaml index 21ce58f4e7..763fe8ee5f 100644 --- a/modelopt_recipes/configs/ptq/presets/fp8_default.yaml +++ b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml @@ -16,9 +16,9 @@ # FP8 per-tensor weight and activation (W8A8), max calibration. # Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py. imports: - base_disable_all: configs/ptq/base_disable_all - w8a8: configs/ptq/w8a8_fp8_fp8 - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + w8a8: configs/ptq/units/w8a8_fp8_fp8 + default_disabled: configs/ptq/units/default_disabled_quantizers algorithm: max quant_cfg: diff --git a/modelopt_recipes/configs/ptq/base_disable_all.yaml b/modelopt_recipes/configs/ptq/units/base_disable_all.yaml similarity index 100% rename from modelopt_recipes/configs/ptq/base_disable_all.yaml rename to modelopt_recipes/configs/ptq/units/base_disable_all.yaml diff --git a/modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml b/modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml similarity index 100% rename from modelopt_recipes/configs/ptq/default_disabled_quantizers.yaml rename to modelopt_recipes/configs/ptq/units/default_disabled_quantizers.yaml diff --git a/modelopt_recipes/configs/ptq/fp8_kv.yaml b/modelopt_recipes/configs/ptq/units/fp8_kv.yaml similarity index 100% rename from modelopt_recipes/configs/ptq/fp8_kv.yaml rename to modelopt_recipes/configs/ptq/units/fp8_kv.yaml diff --git a/modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml b/modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml similarity index 100% rename from modelopt_recipes/configs/ptq/w4a4_nvfp4_nvfp4.yaml rename to modelopt_recipes/configs/ptq/units/w4a4_nvfp4_nvfp4.yaml diff --git a/modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml b/modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml similarity index 100% rename from modelopt_recipes/configs/ptq/w8a8_fp8_fp8.yaml rename to modelopt_recipes/configs/ptq/units/w8a8_fp8_fp8.yaml diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index 6b30a04022..8fe8c121d2 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -14,10 +14,10 @@ # limitations under the License. imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers - w8a8_fp8_fp8: configs/ptq/w8a8_fp8_fp8 - fp8_kv: configs/ptq/fp8_kv + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers + w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8 + fp8_kv: configs/ptq/units/fp8_kv metadata: recipe_type: ptq diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index f3c368a620..8da3bebff1 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -14,10 +14,10 @@ # limitations under the License. imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers - w4a4_nvfp4_nvfp4: configs/ptq/w4a4_nvfp4_nvfp4 - fp8_kv: configs/ptq/fp8_kv + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers + w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4 + fp8_kv: configs/ptq/units/fp8_kv metadata: recipe_type: ptq diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml index 1754763f65..04cfcfa925 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml @@ -14,8 +14,8 @@ # limitations under the License. imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers nvfp4_static: configs/numerics/nvfp4_static nvfp4: configs/numerics/nvfp4 diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index 845f45b5f7..689e981b34 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -14,10 +14,10 @@ # limitations under the License. imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers nvfp4: configs/numerics/nvfp4 - fp8_kv: configs/ptq/fp8_kv + fp8_kv: configs/ptq/units/fp8_kv metadata: recipe_type: ptq diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index f1ecd23acf..ee26898cd9 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -14,10 +14,10 @@ # limitations under the License. imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers nvfp4: configs/numerics/nvfp4 - fp8_kv: configs/ptq/fp8_kv + fp8_kv: configs/ptq/units/fp8_kv metadata: recipe_type: ptq diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index 77cf8b2b76..1075303f72 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -14,10 +14,10 @@ # limitations under the License. imports: - base_disable_all: configs/ptq/base_disable_all - default_disabled: configs/ptq/default_disabled_quantizers + base_disable_all: configs/ptq/units/base_disable_all + default_disabled: configs/ptq/units/default_disabled_quantizers nvfp4: configs/numerics/nvfp4 - fp8_kv: configs/ptq/fp8_kv + fp8_kv: configs/ptq/units/fp8_kv metadata: recipe_type: ptq From cbf3f29975b1ffeb17f4b77d577105e302454e14 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 14:11:42 -0700 Subject: [PATCH 18/30] yml -> yaml Signed-off-by: Shengliang Xu --- modelopt_recipes/configs/numerics/{fp8.yml => fp8.yaml} | 0 modelopt_recipes/configs/numerics/{nvfp4.yml => nvfp4.yaml} | 0 .../configs/numerics/{nvfp4_static.yml => nvfp4_static.yaml} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename modelopt_recipes/configs/numerics/{fp8.yml => fp8.yaml} (100%) rename modelopt_recipes/configs/numerics/{nvfp4.yml => nvfp4.yaml} (100%) rename modelopt_recipes/configs/numerics/{nvfp4_static.yml => nvfp4_static.yaml} (100%) diff --git a/modelopt_recipes/configs/numerics/fp8.yml b/modelopt_recipes/configs/numerics/fp8.yaml similarity index 100% rename from modelopt_recipes/configs/numerics/fp8.yml rename to modelopt_recipes/configs/numerics/fp8.yaml diff --git a/modelopt_recipes/configs/numerics/nvfp4.yml b/modelopt_recipes/configs/numerics/nvfp4.yaml similarity index 100% rename from modelopt_recipes/configs/numerics/nvfp4.yml rename to modelopt_recipes/configs/numerics/nvfp4.yaml diff --git a/modelopt_recipes/configs/numerics/nvfp4_static.yml b/modelopt_recipes/configs/numerics/nvfp4_static.yaml similarity index 100% rename from modelopt_recipes/configs/numerics/nvfp4_static.yml rename to modelopt_recipes/configs/numerics/nvfp4_static.yaml From ae9e24527fdb0170c84d40ae256e78d273efb6c9 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 14:32:54 -0700 Subject: [PATCH 19/30] remove circular dependency Signed-off-by: Shengliang Xu --- modelopt/recipe/_config_loader.py | 258 +------------------------ modelopt/torch/opt/config_loader.py | 268 ++++++++++++++++++++++++++ modelopt/torch/quantization/config.py | 2 +- 3 files changed, 277 insertions(+), 251 deletions(-) create mode 100644 modelopt/torch/opt/config_loader.py diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py index 922875becb..5ed2c80361 100644 --- a/modelopt/recipe/_config_loader.py +++ b/modelopt/recipe/_config_loader.py @@ -13,255 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""YAML config loading utilities. +"""Re-export config loading utilities from ``modelopt.torch.opt.config_loader``.""" -This module is intentionally free of ``modelopt.torch`` imports so that -``modelopt.torch.quantization.config`` can import :func:`load_config` without -triggering a circular import through ``modelopt.recipe.loader``. -""" +from modelopt.torch.opt.config_loader import ( + BUILTIN_RECIPES_LIB, + _load_raw_config, + _resolve_imports, + load_config, +) -from importlib.resources import files - -try: - from importlib.resources.abc import Traversable -except ImportError: # Python < 3.11 - from importlib.abc import Traversable -import re -from pathlib import Path -from typing import Any - -import yaml - -# Root to all built-in recipes. Users can create own recipes. -BUILTIN_RECIPES_LIB = files("modelopt_recipes") - -_EXMY_RE = re.compile(r"^[Ee](\d+)[Mm](\d+)$") -_EXMY_KEYS = frozenset({"num_bits", "scale_bits"}) - - -def _parse_exmy_num_bits(obj: Any) -> Any: - """Recursively convert ``ExMy`` strings in ``num_bits`` / ``scale_bits`` to ``(x, y)`` tuples.""" - if isinstance(obj, dict): - return { - k: ( - _parse_exmy(v) - if k in _EXMY_KEYS and isinstance(v, str) - else _parse_exmy_num_bits(v) - ) - for k, v in obj.items() - } - if isinstance(obj, list): - return [_parse_exmy_num_bits(item) for item in obj] - return obj - - -def _parse_exmy(s: str) -> tuple[int, int] | str: - m = _EXMY_RE.match(s) - if m: - return (int(m.group(1)), int(m.group(2))) - return s - - -def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]: - """Load a config YAML without resolving ``$import`` references. - - config_file: Path to a config yaml file. The path suffix can be omitted. - """ - paths_to_check: list[Path | Traversable] = [] - if isinstance(config_file, str): - if not config_file.endswith(".yml") and not config_file.endswith(".yaml"): - paths_to_check.append(Path(f"{config_file}.yml")) - paths_to_check.append(Path(f"{config_file}.yaml")) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml")) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml")) - else: - paths_to_check.append(Path(config_file)) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(config_file)) - elif isinstance(config_file, Path): - if config_file.suffix in (".yml", ".yaml"): - paths_to_check.append(config_file) - if not config_file.is_absolute(): - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(str(config_file))) - else: - paths_to_check.append(Path(f"{config_file}.yml")) - paths_to_check.append(Path(f"{config_file}.yaml")) - if not config_file.is_absolute(): - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml")) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml")) - elif isinstance(config_file, Traversable): - paths_to_check.append(config_file) - else: - raise ValueError(f"Invalid config file of {config_file}") - - config_path = None - for path in paths_to_check: - if path.is_file(): - config_path = path - break - if not config_path: - raise ValueError( - f"Cannot find config file of {config_file}, paths checked: {paths_to_check}" - ) - - text = config_path.read_text(encoding="utf-8") - docs = list(yaml.safe_load_all(text)) - - if len(docs) == 0 or docs[0] is None: - return {} - if len(docs) == 1: - _raw = docs[0] - elif len(docs) == 2: - # Multi-document: first doc is imports/metadata, second is content. - # Merge the imports into the content for downstream resolution. - header, content = docs[0], docs[1] - if not isinstance(header, dict): - raise ValueError( - f"Config file {config_path}: first YAML document must be a mapping, " - f"got {type(header).__name__}" - ) - if content is None: - content = {} - if isinstance(content, dict): - _raw = {**header, **content} - elif isinstance(content, list): - # List content with a header dict — attach imports via wrapper - _raw = {**header, "_list_content": content} - else: - raise ValueError( - f"Config file {config_path}: second YAML document must be a mapping or list, " - f"got {type(content).__name__}" - ) - else: - raise ValueError( - f"Config file {config_path}: expected 1 or 2 YAML documents, got {len(docs)}" - ) - - if not isinstance(_raw, (dict, list)): - raise ValueError( - f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}" - ) - return _parse_exmy_num_bits(_raw) - - -# --------------------------------------------------------------------------- -# $import resolution -# --------------------------------------------------------------------------- - -_IMPORT_KEY = "$import" - - -def _resolve_imports( - data: dict[str, Any], _loading: frozenset[str] | None = None -) -> dict[str, Any]: - """Resolve the ``imports`` section and ``$import`` references. - - See ``modelopt.recipe.loader`` module docstring for the full specification. - This function lives in ``_config_loader`` (not ``loader``) so that it can be - used from ``modelopt.torch.quantization.config`` without circular imports. - """ - imports_dict = data.pop("imports", None) - if not imports_dict: - return data - - if not isinstance(imports_dict, dict): - raise ValueError( - f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}" - ) - - if _loading is None: - _loading = frozenset() - - # Build name → config mapping (recursively resolve nested imports) - import_map: dict[str, Any] = {} - for name, config_path in imports_dict.items(): - if not config_path: - raise ValueError(f"Import {name!r} has an empty config path.") - if config_path in _loading: - raise ValueError( - f"Circular import detected: {config_path!r} is already being loaded. " - f"Import chain: {sorted(_loading)}" - ) - snippet = _load_raw_config(config_path) - if isinstance(snippet, dict) and "imports" in snippet: - snippet = _resolve_imports(snippet, _loading | {config_path}) - # Unwrap _list_content (multi-document YAML: imports + list content) - if isinstance(snippet, dict) and "_list_content" in snippet: - snippet = snippet["_list_content"] - import_map[name] = snippet - - def _lookup(ref_name: str, context: str) -> Any: - if ref_name not in import_map: - raise ValueError( - f"Unknown $import reference {ref_name!r} in {context}. " - f"Available imports: {list(import_map.keys())}" - ) - return import_map[ref_name] - - def _resolve_list(entries: list[Any]) -> list[Any]: - """Resolve $import markers in a list of entries.""" - resolved: list[Any] = [] - for entry in entries: - if isinstance(entry, dict) and _IMPORT_KEY in entry: - if len(entry) > 1: - raise ValueError( - f"$import must be the only key in the dict, got extra keys: " - f"{sorted(k for k in entry if k != _IMPORT_KEY)}" - ) - imported = _lookup(entry[_IMPORT_KEY], "list entry") - if not isinstance(imported, list): - raise ValueError( - f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " - f"list, got {type(imported).__name__}." - ) - resolved.extend(imported) - elif ( - isinstance(entry, dict) - and isinstance(entry.get("cfg"), dict) - and _IMPORT_KEY in entry["cfg"] - ): - ref = entry["cfg"].pop(_IMPORT_KEY) - inline_keys = dict(entry["cfg"]) - ref_names = ref if isinstance(ref, list) else [ref] - - merged: dict[str, Any] = {} - for rname in ref_names: - snippet = _lookup(rname, f"cfg of {entry}") - if not isinstance(snippet, dict): - raise ValueError( - f"$import {rname!r} in cfg must resolve to a dict, " - f"got {type(snippet).__name__}." - ) - merged.update(snippet) - - merged.update(inline_keys) - entry["cfg"] = merged - resolved.append(entry) - else: - resolved.append(entry) - return resolved - - # Resolve in quant_cfg (top-level or nested under quantize) - for container in [data, data.get("quantize", {})]: - if isinstance(container, dict): - quant_cfg = container.get("quant_cfg") - if isinstance(quant_cfg, list): - container["quant_cfg"] = _resolve_list(quant_cfg) - - # Resolve in _list_content (multi-document snippets) - if "_list_content" in data: - data["_list_content"] = _resolve_list(data["_list_content"]) - - return data - - -def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]: - """Load a YAML config and resolve all ``$import`` references. - - This is the primary config loading entry point. It loads the YAML file, - resolves any ``imports`` / ``$import`` directives, and returns the final - config dict or list. - """ - data = _load_raw_config(config_path) - if isinstance(data, dict) and "imports" in data: - data = _resolve_imports(data) - return data +__all__ = ["BUILTIN_RECIPES_LIB", "_load_raw_config", "_resolve_imports", "load_config"] diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py new file mode 100644 index 0000000000..3c03f4b445 --- /dev/null +++ b/modelopt/torch/opt/config_loader.py @@ -0,0 +1,268 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""General-purpose YAML config loading with ``$import`` resolution. + +This module provides the config loading infrastructure used by both +``modelopt.recipe`` and ``modelopt.torch.quantization.config``. It lives +in ``modelopt.torch.opt`` (the lowest dependency layer) to avoid circular +imports. +""" + +from importlib.resources import files + +try: + from importlib.resources.abc import Traversable +except ImportError: # Python < 3.11 + from importlib.abc import Traversable +import re +from pathlib import Path +from typing import Any + +import yaml + +# Root to all built-in recipes. Users can create own recipes. +BUILTIN_RECIPES_LIB = files("modelopt_recipes") + +_EXMY_RE = re.compile(r"^[Ee](\d+)[Mm](\d+)$") +_EXMY_KEYS = frozenset({"num_bits", "scale_bits"}) + + +def _parse_exmy_num_bits(obj: Any) -> Any: + """Recursively convert ``ExMy`` strings in ``num_bits`` / ``scale_bits`` to ``(x, y)`` tuples.""" + if isinstance(obj, dict): + return { + k: ( + _parse_exmy(v) + if k in _EXMY_KEYS and isinstance(v, str) + else _parse_exmy_num_bits(v) + ) + for k, v in obj.items() + } + if isinstance(obj, list): + return [_parse_exmy_num_bits(item) for item in obj] + return obj + + +def _parse_exmy(s: str) -> tuple[int, int] | str: + m = _EXMY_RE.match(s) + if m: + return (int(m.group(1)), int(m.group(2))) + return s + + +def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | list[Any]: + """Load a config YAML without resolving ``$import`` references. + + config_file: Path to a config yaml file. The path suffix can be omitted. + """ + paths_to_check: list[Path | Traversable] = [] + if isinstance(config_file, str): + if not config_file.endswith(".yml") and not config_file.endswith(".yaml"): + paths_to_check.append(Path(f"{config_file}.yml")) + paths_to_check.append(Path(f"{config_file}.yaml")) + paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml")) + paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml")) + else: + paths_to_check.append(Path(config_file)) + paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(config_file)) + elif isinstance(config_file, Path): + if config_file.suffix in (".yml", ".yaml"): + paths_to_check.append(config_file) + if not config_file.is_absolute(): + paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(str(config_file))) + else: + paths_to_check.append(Path(f"{config_file}.yml")) + paths_to_check.append(Path(f"{config_file}.yaml")) + if not config_file.is_absolute(): + paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml")) + paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml")) + elif isinstance(config_file, Traversable): + paths_to_check.append(config_file) + else: + raise ValueError(f"Invalid config file of {config_file}") + + config_path = None + for path in paths_to_check: + if path.is_file(): + config_path = path + break + if not config_path: + raise ValueError( + f"Cannot find config file of {config_file}, paths checked: {paths_to_check}" + ) + + text = config_path.read_text(encoding="utf-8") + docs = list(yaml.safe_load_all(text)) + + if len(docs) == 0 or docs[0] is None: + return {} + if len(docs) == 1: + _raw = docs[0] + elif len(docs) == 2: + # Multi-document: first doc is imports/metadata, second is content. + # Merge the imports into the content for downstream resolution. + header, content = docs[0], docs[1] + if not isinstance(header, dict): + raise ValueError( + f"Config file {config_path}: first YAML document must be a mapping, " + f"got {type(header).__name__}" + ) + if content is None: + content = {} + if isinstance(content, dict): + _raw = {**header, **content} + elif isinstance(content, list): + # List content with a header dict — attach imports via wrapper + _raw = {**header, "_list_content": content} + else: + raise ValueError( + f"Config file {config_path}: second YAML document must be a mapping or list, " + f"got {type(content).__name__}" + ) + else: + raise ValueError( + f"Config file {config_path}: expected 1 or 2 YAML documents, got {len(docs)}" + ) + + if not isinstance(_raw, (dict, list)): + raise ValueError( + f"Config file {config_path} must contain a YAML mapping or list, got {type(_raw).__name__}" + ) + return _parse_exmy_num_bits(_raw) + + +# --------------------------------------------------------------------------- +# $import resolution +# --------------------------------------------------------------------------- + +_IMPORT_KEY = "$import" + + +def _resolve_imports( + data: dict[str, Any], _loading: frozenset[str] | None = None +) -> dict[str, Any]: + """Resolve the ``imports`` section and ``$import`` references. + + See ``modelopt.recipe.loader`` module docstring for the full specification. + This function lives in ``_config_loader`` (not ``loader``) so that it can be + used from ``modelopt.torch.quantization.config`` without circular imports. + """ + imports_dict = data.pop("imports", None) + if not imports_dict: + return data + + if not isinstance(imports_dict, dict): + raise ValueError( + f"'imports' must be a dict mapping names to config paths, got: {type(imports_dict).__name__}" + ) + + if _loading is None: + _loading = frozenset() + + # Build name → config mapping (recursively resolve nested imports) + import_map: dict[str, Any] = {} + for name, config_path in imports_dict.items(): + if not config_path: + raise ValueError(f"Import {name!r} has an empty config path.") + if config_path in _loading: + raise ValueError( + f"Circular import detected: {config_path!r} is already being loaded. " + f"Import chain: {sorted(_loading)}" + ) + snippet = _load_raw_config(config_path) + if isinstance(snippet, dict) and "imports" in snippet: + snippet = _resolve_imports(snippet, _loading | {config_path}) + # Unwrap _list_content (multi-document YAML: imports + list content) + if isinstance(snippet, dict) and "_list_content" in snippet: + snippet = snippet["_list_content"] + import_map[name] = snippet + + def _lookup(ref_name: str, context: str) -> Any: + if ref_name not in import_map: + raise ValueError( + f"Unknown $import reference {ref_name!r} in {context}. " + f"Available imports: {list(import_map.keys())}" + ) + return import_map[ref_name] + + def _resolve_list(entries: list[Any]) -> list[Any]: + """Resolve $import markers in a list of entries.""" + resolved: list[Any] = [] + for entry in entries: + if isinstance(entry, dict) and _IMPORT_KEY in entry: + if len(entry) > 1: + raise ValueError( + f"$import must be the only key in the dict, got extra keys: " + f"{sorted(k for k in entry if k != _IMPORT_KEY)}" + ) + imported = _lookup(entry[_IMPORT_KEY], "list entry") + if not isinstance(imported, list): + raise ValueError( + f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " + f"list, got {type(imported).__name__}." + ) + resolved.extend(imported) + elif ( + isinstance(entry, dict) + and isinstance(entry.get("cfg"), dict) + and _IMPORT_KEY in entry["cfg"] + ): + ref = entry["cfg"].pop(_IMPORT_KEY) + inline_keys = dict(entry["cfg"]) + ref_names = ref if isinstance(ref, list) else [ref] + + merged: dict[str, Any] = {} + for rname in ref_names: + snippet = _lookup(rname, f"cfg of {entry}") + if not isinstance(snippet, dict): + raise ValueError( + f"$import {rname!r} in cfg must resolve to a dict, " + f"got {type(snippet).__name__}." + ) + merged.update(snippet) + + merged.update(inline_keys) + entry["cfg"] = merged + resolved.append(entry) + else: + resolved.append(entry) + return resolved + + # Resolve in quant_cfg (top-level or nested under quantize) + for container in [data, data.get("quantize", {})]: + if isinstance(container, dict): + quant_cfg = container.get("quant_cfg") + if isinstance(quant_cfg, list): + container["quant_cfg"] = _resolve_list(quant_cfg) + + # Resolve in _list_content (multi-document snippets) + if "_list_content" in data: + data["_list_content"] = _resolve_list(data["_list_content"]) + + return data + + +def load_config(config_path: str | Path | Traversable) -> dict[str, Any] | list[Any]: + """Load a YAML config and resolve all ``$import`` references. + + This is the primary config loading entry point. It loads the YAML file, + resolves any ``imports`` / ``$import`` directives, and returns the final + config dict or list. + """ + data = _load_raw_config(config_path) + if isinstance(data, dict) and "imports" in data: + data = _resolve_imports(data) + return data diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index 5430391adb..c6f49b2de9 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -157,8 +157,8 @@ from pydantic import ValidationInfo, field_validator, model_validator from typing_extensions import Required, TypedDict -from modelopt.recipe._config_loader import load_config from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField +from modelopt.torch.opt.config_loader import load_config from modelopt.torch.utils.network import ConstructorLike From 65b291d2dd5e1c20c11e02d3834197b1c8bd514e Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 14:40:32 -0700 Subject: [PATCH 20/30] make config_root so it is logcially independent of recipe Signed-off-by: Shengliang Xu --- modelopt/recipe/_config_loader.py | 12 ++++++++++-- modelopt/torch/opt/config_loader.py | 16 ++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/modelopt/recipe/_config_loader.py b/modelopt/recipe/_config_loader.py index 5ed2c80361..1abbd36c98 100644 --- a/modelopt/recipe/_config_loader.py +++ b/modelopt/recipe/_config_loader.py @@ -16,10 +16,18 @@ """Re-export config loading utilities from ``modelopt.torch.opt.config_loader``.""" from modelopt.torch.opt.config_loader import ( - BUILTIN_RECIPES_LIB, + BUILTIN_CONFIG_ROOT, _load_raw_config, _resolve_imports, load_config, ) -__all__ = ["BUILTIN_RECIPES_LIB", "_load_raw_config", "_resolve_imports", "load_config"] +BUILTIN_RECIPES_LIB = BUILTIN_CONFIG_ROOT + +__all__ = [ + "BUILTIN_CONFIG_ROOT", + "BUILTIN_RECIPES_LIB", + "_load_raw_config", + "_resolve_imports", + "load_config", +] diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py index 3c03f4b445..25d4daa7b8 100644 --- a/modelopt/torch/opt/config_loader.py +++ b/modelopt/torch/opt/config_loader.py @@ -33,8 +33,8 @@ import yaml -# Root to all built-in recipes. Users can create own recipes. -BUILTIN_RECIPES_LIB = files("modelopt_recipes") +# Root to all built-in configs and recipes. +BUILTIN_CONFIG_ROOT = files("modelopt_recipes") _EXMY_RE = re.compile(r"^[Ee](\d+)[Mm](\d+)$") _EXMY_KEYS = frozenset({"num_bits", "scale_bits"}) @@ -73,22 +73,22 @@ def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | if not config_file.endswith(".yml") and not config_file.endswith(".yaml"): paths_to_check.append(Path(f"{config_file}.yml")) paths_to_check.append(Path(f"{config_file}.yaml")) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml")) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml")) + paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yml")) + paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yaml")) else: paths_to_check.append(Path(config_file)) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(config_file)) + paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(config_file)) elif isinstance(config_file, Path): if config_file.suffix in (".yml", ".yaml"): paths_to_check.append(config_file) if not config_file.is_absolute(): - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(str(config_file))) + paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(str(config_file))) else: paths_to_check.append(Path(f"{config_file}.yml")) paths_to_check.append(Path(f"{config_file}.yaml")) if not config_file.is_absolute(): - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yml")) - paths_to_check.append(BUILTIN_RECIPES_LIB.joinpath(f"{config_file}.yaml")) + paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yml")) + paths_to_check.append(BUILTIN_CONFIG_ROOT.joinpath(f"{config_file}.yaml")) elif isinstance(config_file, Traversable): paths_to_check.append(config_file) else: From 9f69cd05ecc7530011d02845647075adadfc0922 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 15:12:02 -0700 Subject: [PATCH 21/30] README Signed-off-by: Shengliang Xu --- modelopt_recipes/configs/ptq/units/README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 modelopt_recipes/configs/ptq/units/README.md diff --git a/modelopt_recipes/configs/ptq/units/README.md b/modelopt_recipes/configs/ptq/units/README.md new file mode 100644 index 0000000000..50cf028c15 --- /dev/null +++ b/modelopt_recipes/configs/ptq/units/README.md @@ -0,0 +1,17 @@ +# PTQ Config Units + +Reusable building blocks for composing PTQ quantization configurations. +Each file defines one or more `quant_cfg` entries that can be imported +into recipes or presets via `$import`. + +Units are **not** standalone configs — they don't have `algorithm` or +`metadata`. They are meant to be composed into complete configs by +recipes (under `general/` or `models/`) or presets (under `presets/`). + +| File | Description | +|------|-------------| +| `base_disable_all.yaml` | Deny-all entry: disables all quantizers as the first step | +| `default_disabled_quantizers.yaml` | Standard exclusions (LM head, routers, BatchNorm, etc.) | +| `fp8_kv.yaml` | FP8 E4M3 KV cache quantizer entry | +| `w8a8_fp8_fp8.yaml` | FP8 weight + activation quantizer entries (W8A8) | +| `w4a4_nvfp4_nvfp4.yaml` | NVFP4 weight + activation quantizer entries (W4A4) | From 0b79b9fc2bf80d22f301faf1e85e27d068acee97 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 15:24:57 -0700 Subject: [PATCH 22/30] Change Log Signed-off-by: Shengliang Xu --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e2c4b2a7c0..f22ec42256 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,7 +15,7 @@ Changelog - Enable PTQ workflow for the Step3.5-Flash MoE model with NVFP4 W4A4 + FP8 KV cache quantization. See `modelopt_recipes/models/Step3.5-Flash/nvfp4-mlp-only.yaml `_ for more details. - Add support for vLLM fakequant reload using ModelOpt state for HF models. See `examples/vllm_serve/README.md `_ for more details. - [Early Testing] Add Claude Code PTQ skill (``.claude/skills/ptq/``) for agent-assisted post-training quantization. The skill guides the agent through environment detection, model support checking, format selection, and execution via the launcher or manual SLURM/Docker/bare GPU paths. Includes handling for unlisted models with custom module patching. This feature is in early testing — use with caution. -- Add composable ``$import`` system for recipe YAML configs. Recipes can now declare an ``imports`` section mapping names to reusable config snippet files. The ``{$import: name}`` marker resolves at load time — as a dict value it replaces the content with ordered override precedence (later imports override earlier, inline keys override all), as a list element it splices the snippet entries. Supports multi-import (``$import: [a, b]``) and inline extension/override. Resolution is recursive with circular import detection. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/``. See :ref:`composable-imports` for the full specification. +- Add composable ``$import`` system for recipe YAML configs, enabling reusable config snippets referenced via ``{$import: name}`` markers. All built-in PTQ recipes converted to use imports with shared snippets under ``modelopt_recipes/configs/`` (numeric formats, quant_cfg building blocks, presets). See :ref:`composable-imports`. **Backward Breaking Changes** From e3c9e5003901a088e5ff59ed933e3cbd3a4081a8 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 15:58:17 -0700 Subject: [PATCH 23/30] use full name, do not short Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 37 ++++++++++--------- .../configs/ptq/presets/model/fp8.yaml | 8 ++-- .../general/ptq/fp8_default-fp8_kv.yaml | 4 +- .../general/ptq/nvfp4_default-fp8_kv.yaml | 4 +- .../ptq/nvfp4_default-none_kv_gptq.yaml | 4 +- .../ptq/nvfp4_experts_only-fp8_kv.yaml | 4 +- .../general/ptq/nvfp4_mlp_only-fp8_kv.yaml | 4 +- .../general/ptq/nvfp4_omlp_only-fp8_kv.yaml | 4 +- 8 files changed, 35 insertions(+), 34 deletions(-) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index a8986312f0..ed783dc93c 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -61,7 +61,7 @@ styles can be used in a single-file or directory layout. Single-file format ------------------ -The simplest form is a single ``.yml`` or ``.yaml`` file. +The simplest form is a single ``.yaml`` file. **Inline style** — all config values are written directly: @@ -131,10 +131,10 @@ example: .. code-block:: text my_recipe/ - recipe.yml # metadata section (+ optional imports) - quantize.yml # quantize section (quant_cfg + algorithm) + recipe.yaml # metadata section (+ optional imports) + quantize.yaml # quantize section (+ optional imports) -``recipe.yml``: +``recipe.yaml``: .. code-block:: yaml @@ -142,7 +142,7 @@ example: recipe_type: ptq description: My custom NVFP4 recipe. -``quantize.yml``: +``quantize.yaml``: .. code-block:: yaml @@ -159,8 +159,9 @@ example: num_bits: e4m3 axis: -Both inline and import styles work with the directory format. When using -imports in a directory recipe, place the ``imports`` section in ``recipe.yml``. +Both inline and import styles work with the directory format. Any YAML file +in the directory can have its own ``imports`` section — ``recipe.yaml``, +``quantize.yaml``, or any other config file. .. _composable-imports: @@ -475,7 +476,7 @@ type depends on the ``recipe_type`` in the metadata: .. code-block:: python # Load a custom recipe from the filesystem (file or directory) - recipe = load_recipe("/path/to/my_custom_recipe.yml") + recipe = load_recipe("/path/to/my_custom_recipe.yaml") # or: recipe = load_recipe("/path/to/my_recipe_dir/") Command-line usage @@ -529,7 +530,7 @@ This means built-in recipes can be referenced without any prefix: # These are all equivalent: load_recipe("general/ptq/fp8_default-fp8_kv") - load_recipe("general/ptq/fp8_default-fp8_kv.yml") + load_recipe("general/ptq/fp8_default-fp8_kv.yaml") Writing a custom recipe @@ -547,7 +548,7 @@ Example -- creating a custom PTQ recipe using imports: .. code-block:: yaml - # my_int8_recipe.yml + # my_int8_recipe.yaml imports: base_disable_all: configs/ptq/units/base_disable_all default_disabled: configs/ptq/units/default_disabled_quantizers @@ -586,19 +587,19 @@ The ``modelopt_recipes/`` package is organized as follows: +-- __init__.py +-- general/ # Model-agnostic recipes | +-- ptq/ - | +-- fp8_default-fp8_kv.yml - | +-- nvfp4_default-fp8_kv.yml - | +-- nvfp4_mlp_only-fp8_kv.yml - | +-- nvfp4_experts_only-fp8_kv.yml - | +-- nvfp4_omlp_only-fp8_kv.yml + | +-- fp8_default-fp8_kv.yaml + | +-- nvfp4_default-fp8_kv.yaml + | +-- nvfp4_mlp_only-fp8_kv.yaml + | +-- nvfp4_experts_only-fp8_kv.yaml + | +-- nvfp4_omlp_only-fp8_kv.yaml +-- models/ # Model-specific recipes | +-- Step3.5-Flash/ | +-- nvfp4-mlp-only.yaml +-- configs/ # Reusable config snippets (imported via $import) +-- numerics/ # Numeric format definitions - | +-- fp8.yml - | +-- nvfp4_static.yml - | +-- nvfp4.yml + | +-- fp8.yaml + | +-- nvfp4_static.yaml + | +-- nvfp4.yaml +-- ptq/ # PTQ-specific entry snippets +-- base_disable_all.yaml +-- default_disabled_quantizers.yaml diff --git a/modelopt_recipes/configs/ptq/presets/model/fp8.yaml b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml index 763fe8ee5f..3f7ef9f860 100644 --- a/modelopt_recipes/configs/ptq/presets/model/fp8.yaml +++ b/modelopt_recipes/configs/ptq/presets/model/fp8.yaml @@ -17,11 +17,11 @@ # Equivalent to the hardcoded FP8_DEFAULT_CFG in config.py. imports: base_disable_all: configs/ptq/units/base_disable_all - w8a8: configs/ptq/units/w8a8_fp8_fp8 - default_disabled: configs/ptq/units/default_disabled_quantizers + w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8 + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers algorithm: max quant_cfg: - $import: base_disable_all - - $import: w8a8 - - $import: default_disabled + - $import: w8a8_fp8_fp8 + - $import: default_disabled_quantizers diff --git a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml index 8fe8c121d2..85267c8672 100644 --- a/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/fp8_default-fp8_kv.yaml @@ -15,7 +15,7 @@ imports: base_disable_all: configs/ptq/units/base_disable_all - default_disabled: configs/ptq/units/default_disabled_quantizers + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers w8a8_fp8_fp8: configs/ptq/units/w8a8_fp8_fp8 fp8_kv: configs/ptq/units/fp8_kv @@ -28,4 +28,4 @@ quantize: - $import: base_disable_all - $import: w8a8_fp8_fp8 - $import: fp8_kv - - $import: default_disabled + - $import: default_disabled_quantizers diff --git a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml index 8da3bebff1..e616a3a63a 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yaml @@ -15,7 +15,7 @@ imports: base_disable_all: configs/ptq/units/base_disable_all - default_disabled: configs/ptq/units/default_disabled_quantizers + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers w4a4_nvfp4_nvfp4: configs/ptq/units/w4a4_nvfp4_nvfp4 fp8_kv: configs/ptq/units/fp8_kv @@ -28,4 +28,4 @@ quantize: - $import: base_disable_all - $import: w4a4_nvfp4_nvfp4 - $import: fp8_kv - - $import: default_disabled + - $import: default_disabled_quantizers diff --git a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml index 04cfcfa925..1f9fa822cd 100644 --- a/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yaml @@ -15,7 +15,7 @@ imports: base_disable_all: configs/ptq/units/base_disable_all - default_disabled: configs/ptq/units/default_disabled_quantizers + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers nvfp4_static: configs/numerics/nvfp4_static nvfp4: configs/numerics/nvfp4 @@ -36,4 +36,4 @@ quantize: $import: nvfp4 - quantizer_name: '*[kv]_bmm_quantizer' enable: false - - $import: default_disabled + - $import: default_disabled_quantizers diff --git a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml index 689e981b34..cd7ce28b46 100644 --- a/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yaml @@ -15,7 +15,7 @@ imports: base_disable_all: configs/ptq/units/base_disable_all - default_disabled: configs/ptq/units/default_disabled_quantizers + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers nvfp4: configs/numerics/nvfp4 fp8_kv: configs/ptq/units/fp8_kv @@ -39,4 +39,4 @@ quantize: cfg: $import: nvfp4 - $import: fp8_kv - - $import: default_disabled + - $import: default_disabled_quantizers diff --git a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml index ee26898cd9..9e300b2501 100644 --- a/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_mlp_only-fp8_kv.yaml @@ -15,7 +15,7 @@ imports: base_disable_all: configs/ptq/units/base_disable_all - default_disabled: configs/ptq/units/default_disabled_quantizers + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers nvfp4: configs/numerics/nvfp4 fp8_kv: configs/ptq/units/fp8_kv @@ -39,4 +39,4 @@ quantize: cfg: $import: nvfp4 - $import: fp8_kv - - $import: default_disabled + - $import: default_disabled_quantizers diff --git a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml index 1075303f72..2c83641137 100644 --- a/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml +++ b/modelopt_recipes/general/ptq/nvfp4_omlp_only-fp8_kv.yaml @@ -15,7 +15,7 @@ imports: base_disable_all: configs/ptq/units/base_disable_all - default_disabled: configs/ptq/units/default_disabled_quantizers + default_disabled_quantizers: configs/ptq/units/default_disabled_quantizers nvfp4: configs/numerics/nvfp4 fp8_kv: configs/ptq/units/fp8_kv @@ -45,4 +45,4 @@ quantize: cfg: $import: nvfp4 - $import: fp8_kv - - $import: default_disabled + - $import: default_disabled_quantizers From 070f2154a8d4ca38727f359cfd45018f3cb9d9bb Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 16:07:46 -0700 Subject: [PATCH 24/30] cleaner code Signed-off-by: Shengliang Xu --- modelopt/recipe/loader.py | 23 +++++++++++--- modelopt/torch/opt/config_loader.py | 48 ++++++++++++++++------------- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py index f91f6d9920..a23353f944 100644 --- a/modelopt/recipe/loader.py +++ b/modelopt/recipe/loader.py @@ -88,7 +88,10 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes. """ raw = _load_raw_config(recipe_file) - assert isinstance(raw, dict), f"Recipe file {recipe_file} must be a YAML mapping." + if not isinstance(raw, dict): + raise ValueError( + f"Recipe file {recipe_file} must be a YAML mapping, got {type(raw).__name__}." + ) data = _resolve_imports(raw) metadata = data.get("metadata", {}) @@ -121,7 +124,10 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase: ) recipe_data = _load_raw_config(recipe_file) - assert isinstance(recipe_data, dict), f"Recipe file {recipe_file} must be a YAML mapping." + if not isinstance(recipe_data, dict): + raise ValueError( + f"Recipe file {recipe_file} must be a YAML mapping, got {type(recipe_data).__name__}." + ) metadata = recipe_data.get("metadata", {}) recipe_type = metadata.get("recipe_type") if recipe_type is None: @@ -138,14 +144,21 @@ def _load_recipe_from_dir(recipe_dir: Path | Traversable) -> ModelOptRecipeBase: raise ValueError( f"Cannot find quantize in {recipe_dir}. Looked for: quantize.yml, quantize.yaml" ) - # Resolve imports: imports are in recipe.yml, quantize data is separate + # Resolve imports from both recipe.yaml and quantize.yaml quantize_data = _load_raw_config(quantize_file) - assert isinstance(quantize_data, dict), f"{quantize_file} must be a YAML mapping." + if not isinstance(quantize_data, dict): + raise ValueError( + f"{quantize_file} must be a YAML mapping, got {type(quantize_data).__name__}." + ) + # Resolve quantize.yaml's own imports first (if any) + if "imports" in quantize_data: + quantize_data = _resolve_imports(quantize_data) + # Then resolve recipe.yaml's imports applied to the quantize data combined: dict[str, Any] = {"quantize": quantize_data} imports = recipe_data.get("imports") if imports: combined["imports"] = imports - combined = _resolve_imports(combined) + combined = _resolve_imports(combined) return ModelOptPTQRecipe( recipe_type=RecipeType.PTQ, description=metadata.get("description", "PTQ recipe."), diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py index 25d4daa7b8..6e63f3cdc6 100644 --- a/modelopt/torch/opt/config_loader.py +++ b/modelopt/torch/opt/config_loader.py @@ -198,6 +198,28 @@ def _lookup(ref_name: str, context: str) -> Any: ) return import_map[ref_name] + def _resolve_dict_value(d: dict[str, Any], key: str) -> None: + """Resolve ``$import`` in a dict value: ``key: {$import: name, ...inline}``.""" + val = d[key] + if not isinstance(val, dict) or _IMPORT_KEY not in val: + return + ref = val.pop(_IMPORT_KEY) + inline_keys = dict(val) + ref_names = ref if isinstance(ref, list) else [ref] + + merged: dict[str, Any] = {} + for rname in ref_names: + snippet = _lookup(rname, f"{key} of {d}") + if not isinstance(snippet, dict): + raise ValueError( + f"$import {rname!r} in {key} must resolve to a dict, " + f"got {type(snippet).__name__}." + ) + merged.update(snippet) + + merged.update(inline_keys) + d[key] = merged + def _resolve_list(entries: list[Any]) -> list[Any]: """Resolve $import markers in a list of entries.""" resolved: list[Any] = [] @@ -215,27 +237,11 @@ def _resolve_list(entries: list[Any]) -> list[Any]: f"list, got {type(imported).__name__}." ) resolved.extend(imported) - elif ( - isinstance(entry, dict) - and isinstance(entry.get("cfg"), dict) - and _IMPORT_KEY in entry["cfg"] - ): - ref = entry["cfg"].pop(_IMPORT_KEY) - inline_keys = dict(entry["cfg"]) - ref_names = ref if isinstance(ref, list) else [ref] - - merged: dict[str, Any] = {} - for rname in ref_names: - snippet = _lookup(rname, f"cfg of {entry}") - if not isinstance(snippet, dict): - raise ValueError( - f"$import {rname!r} in cfg must resolve to a dict, " - f"got {type(snippet).__name__}." - ) - merged.update(snippet) - - merged.update(inline_keys) - entry["cfg"] = merged + elif isinstance(entry, dict): + # Resolve $import in any dict value within the entry + for key in list(entry): + if isinstance(entry.get(key), dict) and _IMPORT_KEY in entry[key]: + _resolve_dict_value(entry, key) resolved.append(entry) else: resolved.append(entry) From 1127f323fc37b87bf43f3536d64fb21008bb5a8a Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 16:14:09 -0700 Subject: [PATCH 25/30] A new test Signed-off-by: Shengliang Xu --- tests/unit/recipe/test_loader.py | 47 ++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index 4dd235a081..beb1319284 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -557,6 +557,53 @@ def test_import_cfg_inline_overrides_import(tmp_path): assert cfg["axis"] is None +def test_import_in_non_cfg_dict_value(tmp_path): + """$import resolves in any dict value, not just cfg.""" + (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: static\naxis: -1\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" bias_cfg: {tmp_path / 'bias_cfg.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" bias:\n" + f" $import: bias_cfg\n" + ) + recipe = load_recipe(recipe_file) + entry = recipe.quantize["quant_cfg"][0] + assert entry["bias"] == {"enable": True, "type": "static", "axis": -1} + + +def test_import_in_multiple_dict_values(tmp_path): + """$import resolves independently in multiple dict values of the same entry.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: dynamic\n") + recipe_file = tmp_path / "recipe.yml" + recipe_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f" bias_cfg: {tmp_path / 'bias_cfg.yml'}\n" + f"metadata:\n" + f" recipe_type: ptq\n" + f"quantize:\n" + f" algorithm: max\n" + f" quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: fp8\n" + f" bias:\n" + f" $import: bias_cfg\n" + ) + recipe = load_recipe(recipe_file) + entry = recipe.quantize["quant_cfg"][0] + assert entry["cfg"] == {"num_bits": (4, 3)} + assert entry["bias"] == {"enable": True, "type": "dynamic"} + + def test_import_cfg_multi_import(tmp_path): """$import with a list of names merges non-overlapping snippets.""" (tmp_path / "bits.yml").write_text("num_bits: e4m3\n") From 185ee3bec3c7a2a71c968129e52d5856a7111fa8 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 16:18:59 -0700 Subject: [PATCH 26/30] more loads Signed-off-by: Shengliang Xu --- modelopt/torch/quantization/config.py | 32 ++++----------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py index c6f49b2de9..fb2a8c3fd2 100644 --- a/modelopt/torch/quantization/config.py +++ b/modelopt/torch/quantization/config.py @@ -204,35 +204,11 @@ def find_quant_cfg_entry_by_path( return result -_base_disable_all: list[QuantizerCfgEntry] = [ - {"quantizer_name": "*", "enable": False}, -] +_base_disable_all: list[QuantizerCfgEntry] = load_config("configs/ptq/units/base_disable_all") -_default_disabled_quantizer_cfg: list[QuantizerCfgEntry] = [ - {"parent_class": "nn.BatchNorm1d", "quantizer_name": "*", "enable": False}, - {"parent_class": "nn.BatchNorm2d", "quantizer_name": "*", "enable": False}, - {"parent_class": "nn.BatchNorm3d", "quantizer_name": "*", "enable": False}, - {"parent_class": "nn.LeakyReLU", "quantizer_name": "*", "enable": False}, - {"quantizer_name": "*lm_head*", "enable": False}, - { - "quantizer_name": "*proj_out.*", - "enable": False, - }, # In Whisper model, lm_head has key name proj_out - { - "quantizer_name": "*block_sparse_moe.gate*", - "enable": False, - }, # Skip the MOE router - {"quantizer_name": "*router*", "enable": False}, # Skip the MOE router - {"quantizer_name": "*mlp.gate.*", "enable": False}, # Skip the MOE router - { - "quantizer_name": "*mlp.shared_expert_gate.*", - "enable": False, - }, # Skip the MOE router - {"quantizer_name": "*linear_attn.conv1d*", "enable": False}, - {"quantizer_name": "*mixer.conv1d*", "enable": False}, # Skip mamba conv1d - {"quantizer_name": "*output_layer*", "enable": False}, - {"quantizer_name": "output.*", "enable": False}, -] +_default_disabled_quantizer_cfg: list[QuantizerCfgEntry] = load_config( + "configs/ptq/units/default_disabled_quantizers" +) _mamba_moe_disabled_quantizer_cfg: list[QuantizerCfgEntry] = [ {"quantizer_name": "*fc1_latent_proj*", "enable": False}, # Skip Latent MOE From 5e0cc8af2aef02b4a8cbc0c0f29617c251544cb3 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 17:00:10 -0700 Subject: [PATCH 27/30] fix the doc Signed-off-by: Shengliang Xu --- docs/source/guides/10_recipes.rst | 15 ++++++++++++--- modelopt/torch/opt/config_loader.py | 2 ++ tools/precommit/check_modelopt_recipes.py | 8 ++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/docs/source/guides/10_recipes.rst b/docs/source/guides/10_recipes.rst index ed783dc93c..26f28afd75 100644 --- a/docs/source/guides/10_recipes.rst +++ b/docs/source/guides/10_recipes.rst @@ -600,9 +600,18 @@ The ``modelopt_recipes/`` package is organized as follows: | +-- fp8.yaml | +-- nvfp4_static.yaml | +-- nvfp4.yaml - +-- ptq/ # PTQ-specific entry snippets - +-- base_disable_all.yaml - +-- default_disabled_quantizers.yaml + +-- ptq/ + +-- units/ # Reusable quant_cfg building blocks + | +-- base_disable_all.yaml + | +-- default_disabled_quantizers.yaml + | +-- fp8_kv.yaml + | +-- w8a8_fp8_fp8.yaml + | +-- w4a4_nvfp4_nvfp4.yaml + +-- presets/ # Complete configs (backward compat with *_CFG dicts) + +-- model/ + | +-- fp8.yaml + +-- kv/ + +-- fp8.yaml Recipe data model diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py index 6e63f3cdc6..89bed69262 100644 --- a/modelopt/torch/opt/config_loader.py +++ b/modelopt/torch/opt/config_loader.py @@ -68,6 +68,8 @@ def _load_raw_config(config_file: str | Path | Traversable) -> dict[str, Any] | config_file: Path to a config yaml file. The path suffix can be omitted. """ + # Probe order: filesystem first, then built-in library. + # This lets users override built-in configs by placing a file locally. paths_to_check: list[Path | Traversable] = [] if isinstance(config_file, str): if not config_file.endswith(".yml") and not config_file.endswith(".yaml"): diff --git a/tools/precommit/check_modelopt_recipes.py b/tools/precommit/check_modelopt_recipes.py index 600de317b5..2c5706ee73 100644 --- a/tools/precommit/check_modelopt_recipes.py +++ b/tools/precommit/check_modelopt_recipes.py @@ -57,6 +57,14 @@ def _check_quant_cfg(quant_cfg, label: str) -> list[str]: continue # {$import: name} entries are resolved at load time if "$import" in entry: + ref = entry["$import"] + if not isinstance(ref, (str, list)) or ( + isinstance(ref, list) and not all(isinstance(r, str) for r in ref) + ): + errors.append( + f"{label}: quant_cfg[{i}] '$import' must be a string or list of strings, " + f"got {type(ref).__name__}: {ref!r}" + ) continue if "quantizer_name" not in entry: errors.append( From c7ce455e6667167db1498f1158f57efd6caadc8e Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 18:16:58 -0700 Subject: [PATCH 28/30] fix failed tests and more tests Signed-off-by: Shengliang Xu --- modelopt/torch/opt/config_loader.py | 108 +++++++-------- tests/unit/recipe/test_loader.py | 203 +++++++++++++++++++++------- 2 files changed, 202 insertions(+), 109 deletions(-) diff --git a/modelopt/torch/opt/config_loader.py b/modelopt/torch/opt/config_loader.py index 89bed69262..51ab693d88 100644 --- a/modelopt/torch/opt/config_loader.py +++ b/modelopt/torch/opt/config_loader.py @@ -200,65 +200,57 @@ def _lookup(ref_name: str, context: str) -> Any: ) return import_map[ref_name] - def _resolve_dict_value(d: dict[str, Any], key: str) -> None: - """Resolve ``$import`` in a dict value: ``key: {$import: name, ...inline}``.""" - val = d[key] - if not isinstance(val, dict) or _IMPORT_KEY not in val: - return - ref = val.pop(_IMPORT_KEY) - inline_keys = dict(val) - ref_names = ref if isinstance(ref, list) else [ref] - - merged: dict[str, Any] = {} - for rname in ref_names: - snippet = _lookup(rname, f"{key} of {d}") - if not isinstance(snippet, dict): - raise ValueError( - f"$import {rname!r} in {key} must resolve to a dict, " - f"got {type(snippet).__name__}." - ) - merged.update(snippet) - - merged.update(inline_keys) - d[key] = merged - - def _resolve_list(entries: list[Any]) -> list[Any]: - """Resolve $import markers in a list of entries.""" - resolved: list[Any] = [] - for entry in entries: - if isinstance(entry, dict) and _IMPORT_KEY in entry: - if len(entry) > 1: - raise ValueError( - f"$import must be the only key in the dict, got extra keys: " - f"{sorted(k for k in entry if k != _IMPORT_KEY)}" - ) - imported = _lookup(entry[_IMPORT_KEY], "list entry") - if not isinstance(imported, list): - raise ValueError( - f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " - f"list, got {type(imported).__name__}." - ) - resolved.extend(imported) - elif isinstance(entry, dict): - # Resolve $import in any dict value within the entry - for key in list(entry): - if isinstance(entry.get(key), dict) and _IMPORT_KEY in entry[key]: - _resolve_dict_value(entry, key) - resolved.append(entry) + def _resolve_value(obj: Any) -> Any: + """Recursively resolve ``$import`` markers anywhere in the config tree. + + - Dict with ``$import`` as only key and list value → splice (in list context) + - Dict with ``$import`` key → replace/merge (import + override with inline keys) + - List → resolve each element (with list-splice for ``$import`` entries) + - Other → return as-is + """ + if isinstance(obj, dict): + if _IMPORT_KEY in obj: + # {$import: name, ...inline} → import, merge, override + ref = obj.pop(_IMPORT_KEY) + inline_keys = dict(obj) + ref_names = ref if isinstance(ref, list) else [ref] + + merged: dict[str, Any] = {} + for rname in ref_names: + snippet = _lookup(rname, "dict value") + if not isinstance(snippet, dict): + raise ValueError( + f"$import {rname!r} in dict must resolve to a dict, " + f"got {type(snippet).__name__}." + ) + merged.update(snippet) + + merged.update(inline_keys) + return _resolve_value(merged) # resolve any nested $import in result else: - resolved.append(entry) - return resolved - - # Resolve in quant_cfg (top-level or nested under quantize) - for container in [data, data.get("quantize", {})]: - if isinstance(container, dict): - quant_cfg = container.get("quant_cfg") - if isinstance(quant_cfg, list): - container["quant_cfg"] = _resolve_list(quant_cfg) - - # Resolve in _list_content (multi-document snippets) - if "_list_content" in data: - data["_list_content"] = _resolve_list(data["_list_content"]) + return {k: _resolve_value(v) for k, v in obj.items()} + elif isinstance(obj, list): + resolved: list[Any] = [] + for entry in obj: + if isinstance(entry, dict) and _IMPORT_KEY in entry and len(entry) == 1: + # {$import: name} as sole key in list → splice + imported = _lookup(entry[_IMPORT_KEY], "list entry") + if not isinstance(imported, list): + raise ValueError( + f"$import {entry[_IMPORT_KEY]!r} in list must resolve to a " + f"list, got {type(imported).__name__}." + ) + resolved.extend(_resolve_value(imported)) + else: + resolved.append(_resolve_value(entry)) + return resolved + return obj + + data = _resolve_value(data) + + # Unwrap _list_content (multi-document snippets) + if isinstance(data, dict) and "_list_content" in data: + data["_list_content"] = _resolve_value(data["_list_content"]) return data diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index beb1319284..222b0a190d 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -445,7 +445,9 @@ def test_import_entry_single_element_list(tmp_path): ) recipe = load_recipe(recipe_file) assert len(recipe.quantize["quant_cfg"]) == 1 - assert recipe.quantize["quant_cfg"][0] == {"quantizer_name": "*", "enable": False} + entry = recipe.quantize["quant_cfg"][0] + assert entry["quantizer_name"] == "*" + assert entry["enable"] is False def test_import_entry_non_list_raises(tmp_path): @@ -491,8 +493,8 @@ def test_import_entry_list_splice(tmp_path): assert recipe.quantize["quant_cfg"][2]["quantizer_name"] == "*router*" -def test_import_entry_sibling_keys_raises(tmp_path): - """$import as a list entry with sibling keys raises ValueError.""" +def test_import_entry_sibling_keys_with_list_snippet_raises(tmp_path): + """$import with sibling keys raises when the import resolves to a list (not a dict).""" (tmp_path / "disable.yml").write_text("- quantizer_name: '*'\n enable: false\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( @@ -506,7 +508,7 @@ def test_import_entry_sibling_keys_raises(tmp_path): f" - $import: disable_all\n" f" quantizer_name: '*extra*'\n" ) - with pytest.raises(ValueError, match="must be the only key"): + with pytest.raises(ValueError, match="must resolve to a dict"): load_recipe(recipe_file) @@ -558,50 +560,42 @@ def test_import_cfg_inline_overrides_import(tmp_path): def test_import_in_non_cfg_dict_value(tmp_path): - """$import resolves in any dict value, not just cfg.""" - (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: static\naxis: -1\n") - recipe_file = tmp_path / "recipe.yml" - recipe_file.write_text( + """$import resolves in any dict value, not just cfg (tested via load_config to skip validation).""" + (tmp_path / "extra.yml").write_text("foo: bar\nbaz: 42\n") + config_file = tmp_path / "config.yml" + config_file.write_text( f"imports:\n" - f" bias_cfg: {tmp_path / 'bias_cfg.yml'}\n" - f"metadata:\n" - f" recipe_type: ptq\n" - f"quantize:\n" - f" algorithm: max\n" - f" quant_cfg:\n" - f" - quantizer_name: '*weight_quantizer'\n" - f" bias:\n" - f" $import: bias_cfg\n" + f" extra: {tmp_path / 'extra.yml'}\n" + f"quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" my_field:\n" + f" $import: extra\n" ) - recipe = load_recipe(recipe_file) - entry = recipe.quantize["quant_cfg"][0] - assert entry["bias"] == {"enable": True, "type": "static", "axis": -1} + data = load_config(config_file) + entry = data["quant_cfg"][0] + assert entry["my_field"] == {"foo": "bar", "baz": 42} def test_import_in_multiple_dict_values(tmp_path): """$import resolves independently in multiple dict values of the same entry.""" (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") - (tmp_path / "bias_cfg.yml").write_text("enable: true\ntype: dynamic\n") - recipe_file = tmp_path / "recipe.yml" - recipe_file.write_text( + (tmp_path / "extra.yml").write_text("foo: bar\n") + config_file = tmp_path / "config.yml" + config_file.write_text( f"imports:\n" f" fp8: {tmp_path / 'fp8.yml'}\n" - f" bias_cfg: {tmp_path / 'bias_cfg.yml'}\n" - f"metadata:\n" - f" recipe_type: ptq\n" - f"quantize:\n" - f" algorithm: max\n" - f" quant_cfg:\n" - f" - quantizer_name: '*weight_quantizer'\n" - f" cfg:\n" - f" $import: fp8\n" - f" bias:\n" - f" $import: bias_cfg\n" + f" extra: {tmp_path / 'extra.yml'}\n" + f"quant_cfg:\n" + f" - quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: fp8\n" + f" my_field:\n" + f" $import: extra\n" ) - recipe = load_recipe(recipe_file) - entry = recipe.quantize["quant_cfg"][0] + data = load_config(config_file) + entry = data["quant_cfg"][0] assert entry["cfg"] == {"num_bits": (4, 3)} - assert entry["bias"] == {"enable": True, "type": "dynamic"} + assert entry["my_field"] == {"foo": "bar"} def test_import_cfg_multi_import(tmp_path): @@ -655,12 +649,12 @@ def test_import_cfg_multi_import_later_overrides_earlier(tmp_path): def test_import_cfg_multi_import_with_extend(tmp_path): """$import list + inline keys all merge without conflicts.""" (tmp_path / "bits.yml").write_text("num_bits: e4m3\n") - (tmp_path / "scale.yml").write_text("scale_bits: e8m0\n") + (tmp_path / "extra.yml").write_text("fake_quant: false\n") recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( f"imports:\n" f" bits: {tmp_path / 'bits.yml'}\n" - f" scale: {tmp_path / 'scale.yml'}\n" + f" extra: {tmp_path / 'extra.yml'}\n" f"metadata:\n" f" recipe_type: ptq\n" f"quantize:\n" @@ -668,12 +662,12 @@ def test_import_cfg_multi_import_with_extend(tmp_path): f" quant_cfg:\n" f" - quantizer_name: '*weight_quantizer'\n" f" cfg:\n" - f" $import: [bits, scale]\n" + f" $import: [bits, extra]\n" f" axis: 0\n" ) recipe = load_recipe(recipe_file) cfg = recipe.quantize["quant_cfg"][0]["cfg"] - assert cfg == {"num_bits": (4, 3), "scale_bits": (8, 0), "axis": 0} + assert cfg == {"num_bits": (4, 3), "fake_quant": False, "axis": 0} def test_import_dir_format(tmp_path): @@ -740,34 +734,141 @@ def test_import_builtin_fp8_kv_snippet(): assert kv_entries[0]["cfg"]["num_bits"] == (4, 3) +# --------------------------------------------------------------------------- +# imports — general tree-wide resolution (not just quant_cfg) +# --------------------------------------------------------------------------- + + +def test_import_in_top_level_dict_value(tmp_path): + """$import resolves in a top-level dict value (not inside any list).""" + (tmp_path / "algo.yml").write_text("method: gptq\nuse_layerwise: true\n") + config_file = tmp_path / "config.yml" + config_file.write_text( + f"imports:\n algo: {tmp_path / 'algo.yml'}\nalgorithm:\n $import: algo\nquant_cfg: []\n" + ) + data = load_config(config_file) + assert data["algorithm"] == {"method": "gptq", "use_layerwise": True} + + +def test_import_in_nested_dict(tmp_path): + """$import resolves in deeply nested dicts.""" + (tmp_path / "settings.yml").write_text("lr: 0.001\nepochs: 10\n") + config_file = tmp_path / "config.yml" + config_file.write_text( + f"imports:\n" + f" settings: {tmp_path / 'settings.yml'}\n" + f"training:\n" + f" optimizer:\n" + f" params:\n" + f" $import: settings\n" + ) + data = load_config(config_file) + assert data["training"]["optimizer"]["params"] == {"lr": 0.001, "epochs": 10} + + +def test_import_list_splice_outside_quant_cfg(tmp_path): + """$import list splice works in any list, not just quant_cfg.""" + (tmp_path / "extra_tasks.yml").write_text("- name: task_b\n- name: task_c\n") + config_file = tmp_path / "config.yml" + config_file.write_text( + f"imports:\n" + f" extra: {tmp_path / 'extra_tasks.yml'}\n" + f"tasks:\n" + f" - name: task_a\n" + f" - $import: extra\n" + f" - name: task_d\n" + ) + data = load_config(config_file) + assert data["tasks"] == [ + {"name": "task_a"}, + {"name": "task_b"}, + {"name": "task_c"}, + {"name": "task_d"}, + ] + + +def test_import_in_nested_list_of_dicts(tmp_path): + """$import in dict values within a nested list resolves correctly.""" + (tmp_path / "defaults.yml").write_text("timeout: 30\nretries: 3\n") + config_file = tmp_path / "config.yml" + config_file.write_text( + f"imports:\n" + f" defaults: {tmp_path / 'defaults.yml'}\n" + f"stages:\n" + f" - name: build\n" + f" config:\n" + f" $import: defaults\n" + f" verbose: true\n" + f" - name: test\n" + f" config:\n" + f" $import: defaults\n" + ) + data = load_config(config_file) + assert data["stages"][0]["config"] == {"timeout": 30, "retries": 3, "verbose": True} + assert data["stages"][1]["config"] == {"timeout": 30, "retries": 3} + + +def test_import_mixed_tree(tmp_path): + """$import resolves at multiple levels in the same config.""" + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + (tmp_path / "disables.yml").write_text("- quantizer_name: '*lm_head*'\n enable: false\n") + (tmp_path / "meta.yml").write_text("version: 2\nauthor: test\n") + config_file = tmp_path / "config.yml" + config_file.write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f" disables: {tmp_path / 'disables.yml'}\n" + f" meta: {tmp_path / 'meta.yml'}\n" + f"info:\n" + f" $import: meta\n" + f"items:\n" + f" - name: a\n" + f" cfg:\n" + f" $import: fp8\n" + f" - $import: disables\n" + ) + data = load_config(config_file) + # Top-level dict import + assert data["info"] == {"version": 2, "author": "test"} + # Dict import inside list entry + assert data["items"][0]["cfg"] == {"num_bits": (4, 3)} + # List splice + assert data["items"][1] == {"quantizer_name": "*lm_head*", "enable": False} + + # --------------------------------------------------------------------------- # imports — recursive resolution and cycle detection # --------------------------------------------------------------------------- def test_import_recursive(tmp_path): - """A snippet can itself import other snippets.""" - (tmp_path / "base.yml").write_text("num_bits: e4m3\n") - (tmp_path / "mid.yml").write_text( - f"imports:\n base: {tmp_path / 'base.yml'}\nnum_bits:\n $import: base\n" + """A list snippet can import a dict snippet (recursive resolution via multi-doc).""" + # base: dict snippet with FP8 attributes + (tmp_path / "fp8.yml").write_text("num_bits: e4m3\n") + # mid: list snippet that imports base and uses $import in cfg + (tmp_path / "mid.yaml").write_text( + f"imports:\n" + f" fp8: {tmp_path / 'fp8.yml'}\n" + f"---\n" + f"- quantizer_name: '*weight_quantizer'\n" + f" cfg:\n" + f" $import: fp8\n" ) + # recipe imports mid recipe_file = tmp_path / "recipe.yml" recipe_file.write_text( f"imports:\n" - f" mid: {tmp_path / 'mid.yml'}\n" + f" mid: {tmp_path / 'mid.yaml'}\n" f"metadata:\n" f" recipe_type: ptq\n" f"quantize:\n" f" algorithm: max\n" f" quant_cfg:\n" - f" - quantizer_name: '*weight_quantizer'\n" - f" cfg:\n" - f" $import: mid\n" + f" - $import: mid\n" ) recipe = load_recipe(recipe_file) cfg = recipe.quantize["quant_cfg"][0]["cfg"] - # mid.yml resolved "num_bits: {$import: base}" → base.yml content - assert cfg["num_bits"] == {"num_bits": (4, 3)} + assert cfg == {"num_bits": (4, 3)} def test_import_circular_raises(tmp_path): From 33af932ac6287418f3e31e605eb956d2a2cbb244 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Wed, 15 Apr 2026 18:53:28 -0700 Subject: [PATCH 29/30] better wording Signed-off-by: Shengliang Xu --- modelopt_recipes/configs/ptq/presets/README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modelopt_recipes/configs/ptq/presets/README.md b/modelopt_recipes/configs/ptq/presets/README.md index f8974fc78c..ee45ec83b9 100644 --- a/modelopt_recipes/configs/ptq/presets/README.md +++ b/modelopt_recipes/configs/ptq/presets/README.md @@ -4,10 +4,9 @@ This directory holds preset quantization configurations that serve as the YAML source of truth for the hardcoded `*_CFG` dicts in `modelopt.torch.quantization.config` (e.g., `FP8_DEFAULT_CFG`). -Each preset is a complete, self-contained config with `algorithm` and -`quant_cfg` — ready to pass directly to `mtq.quantize()`. Presets compose -from the reusable snippets in `configs/numerics/` and `configs/ptq/units/` -via the `$import` system. +Each preset is a self-contained config with `quant_cfg` that can be +passed to `mtq.quantize()`. Presets compose from the reusable snippets +in `configs/numerics/` and `configs/ptq/units/` via the `$import` system. **Note:** The main purpose of these presets is to support the existing `hf_ptq.py` script's `--qformat` / `--kv_cache_qformat` flags and other From a8f5c0fd0d34e2a40c0f4d4d0938a3eee450a3a0 Mon Sep 17 00:00:00 2001 From: Shengliang Xu Date: Thu, 16 Apr 2026 18:05:52 -0700 Subject: [PATCH 30/30] more tests for better coverage Signed-off-by: Shengliang Xu --- tests/unit/recipe/test_loader.py | 122 +++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py index 222b0a190d..7c6e12de0d 100644 --- a/tests/unit/recipe/test_loader.py +++ b/tests/unit/recipe/test_loader.py @@ -914,3 +914,125 @@ def test_import_cross_file_same_name_no_conflict(tmp_path): # Parent's "fmt" resolves to fp8 (e4m3), not child's nvfp4 cfg = recipe.quantize["quant_cfg"][0]["cfg"] assert cfg == {"num_bits": (4, 3)} + + +# --------------------------------------------------------------------------- +# Coverage: _load_raw_config edge cases +# --------------------------------------------------------------------------- + + +def test_load_config_path_object(tmp_path): + """load_config accepts a Path object.""" + cfg_file = tmp_path / "test.yaml" + cfg_file.write_text("key: value\n") + data = load_config(cfg_file) + assert data == {"key": "value"} + + +def test_load_config_path_without_suffix(tmp_path): + """load_config probes .yml/.yaml suffixes for a Path without suffix.""" + cfg_file = tmp_path / "test.yaml" + cfg_file.write_text("key: value\n") + data = load_config(tmp_path / "test") # no suffix + assert data == {"key": "value"} + + +def test_load_config_empty_yaml(tmp_path): + """load_config returns empty dict for empty YAML file.""" + cfg_file = tmp_path / "empty.yaml" + cfg_file.write_text("") + data = load_config(cfg_file) + assert data == {} + + +def test_load_config_null_yaml(tmp_path): + """load_config returns empty dict for YAML file containing only null.""" + cfg_file = tmp_path / "null.yaml" + cfg_file.write_text("---\n") + data = load_config(cfg_file) + assert data == {} + + +def test_load_config_multi_doc_dict_dict(tmp_path): + """Multi-document YAML with two dicts merges them.""" + cfg_file = tmp_path / "multi.yaml" + cfg_file.write_text("imports:\n fp8: some/path\n---\nalgorithm: max\n") + from modelopt.torch.opt.config_loader import _load_raw_config + + data = _load_raw_config(cfg_file) + assert data["imports"] == {"fp8": "some/path"} + assert data["algorithm"] == "max" + + +def test_load_config_multi_doc_null_content(tmp_path): + """Multi-document YAML where second doc is null treats content as empty dict.""" + cfg_file = tmp_path / "multi_null.yaml" + cfg_file.write_text("key: value\n---\n") + from modelopt.torch.opt.config_loader import _load_raw_config + + data = _load_raw_config(cfg_file) + assert data == {"key": "value"} + + +def test_load_config_multi_doc_first_not_dict_raises(tmp_path): + """Multi-document YAML with non-dict first document raises ValueError.""" + cfg_file = tmp_path / "bad_multi.yaml" + cfg_file.write_text("- item1\n---\nkey: value\n") + with pytest.raises(ValueError, match="first YAML document must be a mapping"): + load_config(cfg_file) + + +def test_load_config_multi_doc_second_not_dict_or_list_raises(tmp_path): + """Multi-document YAML with scalar second document raises ValueError.""" + cfg_file = tmp_path / "bad_multi2.yaml" + cfg_file.write_text("key: value\n---\njust a string\n") + with pytest.raises(ValueError, match="second YAML document must be a mapping or list"): + load_config(cfg_file) + + +def test_load_config_three_docs_raises(tmp_path): + """YAML with 3+ documents raises ValueError.""" + cfg_file = tmp_path / "three_docs.yaml" + cfg_file.write_text("a: 1\n---\nb: 2\n---\nc: 3\n") + with pytest.raises(ValueError, match="expected 1 or 2 YAML documents"): + load_config(cfg_file) + + +def test_load_config_invalid_type_raises(): + """load_config with non-string/Path/Traversable raises ValueError.""" + with pytest.raises(ValueError, match="Invalid config file"): + load_config(12345) + + +def test_load_config_list_valued_yaml(tmp_path): + """load_config handles top-level YAML list.""" + cfg_file = tmp_path / "list.yaml" + cfg_file.write_text("- name: a\n value: 1\n- name: b\n value: 2\n") + data = load_config(cfg_file) + assert isinstance(data, list) + assert len(data) == 2 + assert data[0] == {"name": "a", "value": 1} + + +# --------------------------------------------------------------------------- +# Coverage: _resolve_imports edge cases +# --------------------------------------------------------------------------- + + +def test_import_dict_value_resolves_to_list_raises(tmp_path): + """$import in dict value position raises when snippet is a list.""" + (tmp_path / "entries.yml").write_text("- a: 1\n- b: 2\n") + config_file = tmp_path / "config.yml" + config_file.write_text( + f"imports:\n entries: {tmp_path / 'entries.yml'}\nmy_field:\n $import: entries\n" + ) + with pytest.raises(ValueError, match="must resolve to a dict"): + load_config(config_file) + + +def test_import_imports_not_a_dict_raises(tmp_path): + """imports section that is a list raises ValueError.""" + config_file = tmp_path / "config.yml" + config_file.write_text("imports:\n - some/path\nkey: value\n") + with pytest.raises(ValueError, match="must be a dict"): + load_config(config_file)