address review comments

juhi10071998 · juhi10071998 · commit 5877887be966 · 2026-05-22T00:17:45.000Z
Signed-off-by: Juhi Mittal &lt;juhim@nvidia.com&gt;
diff --git a/examples/llm_ptq/hf_ptq.py b/examples/llm_ptq/hf_ptq.py
@@ -1084,7 +1084,7 @@ def _is_layerwise(obj):
     # All auto_quantize() knobs are resolved here before calling the helper.
     # Helper is a leaf orchestrator — it does not know whether inputs came from
     # CLI args or a recipe.
-    if isinstance(recipe, ModelOptAutoQuantizeRecipe) or args.auto_quantize_bits:
+    if isinstance(recipe, ModelOptAutoQuantizeRecipe) or args.auto_quantize_bits is not None:
         default_disabled_layers = [
             entry["quantizer_name"]
             for entry in _default_disabled_quantizer_cfg
diff --git a/modelopt/recipe/config.py b/modelopt/recipe/config.py
@@ -19,7 +19,7 @@
 
 import warnings
 from enum import Enum
-from typing import Literal
+from typing import ClassVar, Literal
 
 from pydantic import Field, field_validator, model_validator
 
@@ -109,13 +109,41 @@ class ModelOptPTQRecipe(ModelOptRecipeBase):
 class AutoQuantizeKVCache(ModeloptBaseConfig):
     """KV-cache configuration for an AutoQuantize recipe (optional)."""
 
+    # Mirrors the keys of KV_QUANT_CFG_CHOICES in examples/llm_ptq/hf_ptq.py.
+    # Kept inline (rather than imported) so the recipe schema stays free of
+    # example-script dependencies. Update both sides if new KV variants land.
+    # ClassVar annotation tells Pydantic this is a class-level constant, not a
+    # private model attribute (which is the default for leading-underscore names).
+    _SUPPORTED_QFORMATS: ClassVar[frozenset[str]] = frozenset(
+        {
+            "none",
+            "fp8_cast",
+            "fp8",
+            "fp8_affine",
+            "nvfp4_cast",
+            "nvfp4",
+            "nvfp4_affine",
+            "nvfp4_rotate",
+        }
+    )
+
     qformat: str | None = ModeloptField(
         default=None,
         title="KV cache quantization format",
         description="One of the entries in KV_QUANT_CFG_CHOICES, or 'none' to disable. "
         "If omitted, the runtime --kv_cache_qformat CLI flag is used.",
     )
 
+    @field_validator("qformat")
+    @classmethod
+    def _validate_qformat(cls, v: str | None) -> str | None:
+        if v is not None and v not in cls._SUPPORTED_QFORMATS:
+            raise ValueError(
+                f"Unsupported kv_cache.qformat: {v!r}. "
+                f"Expected one of {sorted(cls._SUPPORTED_QFORMATS)} or None."
+            )
+        return v
+
 
 class AutoQuantizeConstraints(ModeloptBaseConfig):
     """Constraints passed to ``mtq.auto_quantize`` (matches its dict shape).
diff --git a/tests/unit/recipe/test_loader.py b/tests/unit/recipe/test_loader.py
@@ -20,6 +20,7 @@
 
 import pytest
 
+import modelopt.torch.quantization as mtq
 from modelopt.recipe.config import (
     ModelOptAutoQuantizeRecipe,
     ModelOptDFlashRecipe,
@@ -285,8 +286,6 @@ def test_load_recipe_autoquantize_defaults():
 
 def test_load_recipe_autoquantize_candidates_match_presets():
     """Built-in AutoQuantize recipe's $imported candidates equal mtq.X_DEFAULT_CFG dicts."""
-    import modelopt.torch.quantization as mtq
-
     recipe = load_recipe("general/auto_quantize/nvfp4_fp8_at_4p8bits-kv_fp8_cast")
     candidates = recipe.auto_quantize.candidate_formats
     assert candidates[0].model_dump(exclude_unset=True) == mtq.NVFP4_DEFAULT_CFG
@@ -334,6 +333,14 @@ def test_load_recipe_autoquantize_kv_cache_optional(tmp_path):
     assert recipe.auto_quantize.kv_cache is None
 
 
+def test_load_recipe_autoquantize_invalid_kv_qformat_raises(tmp_path):
+    """An unknown kv_cache.qformat is rejected at recipe-load time, not later."""
+    bad = tmp_path / "bad.yml"
+    bad.write_text(_AQ_MINIMAL_BODY + "  kv_cache:\n    qformat: not_a_real_format\n")
+    with pytest.raises(ValueError, match="kv_cache.qformat"):
+        load_recipe(bad)
+
+
 # ---------------------------------------------------------------------------
 # load_recipe — EAGLE speculative decoding
 # ---------------------------------------------------------------------------