Merge remote-tracking branch 'origin/main' into feature/puzzletron

kevalmorabia97 · kevalmorabia97 · commit 977d60a80897 · 2026-04-10T16:38:04.000-07:00
diff --git a/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py b/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py
@@ -142,7 +142,8 @@ def keep_conversation(entry):
     tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
+    if tokenizer.chat_template is not None:
+        tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
 
     output_dir = args.output_dir
     output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/modelopt/torch/quantization/config.py b/modelopt/torch/quantization/config.py
@@ -1560,6 +1560,10 @@ def normalize_quant_cfg_list(v: dict | list) -> list[QuantizerCfgEntry]:
     - An empty entry ``{}``.
     - An entry with only ``quantizer_name`` and no other keys — the only effect would be an
       implicit ``enable=True``, which must be stated explicitly.
+    - An entry with ``enable=True`` (explicit or implicit) whose ``cfg`` is not a non-empty
+      ``dict`` or ``list`` — e.g. ``{"quantizer_name": "*", "cfg": {}}`` or
+      ``{"quantizer_name": "*", "cfg": 42}``.  An enabled quantizer must have a valid
+      configuration.
 
     **Normalization** — after conversion and validation every entry is put into canonical form:
 
@@ -1577,7 +1581,8 @@ def normalize_quant_cfg_list(v: dict | list) -> list[QuantizerCfgEntry]:
 
     Raises:
         ValueError: If any entry has only ``quantizer_name`` with neither ``cfg`` nor ``enable``,
-            or if the entry format is not recognized.
+            if ``enable=True`` with an empty or non-dict/list ``cfg``, or if the entry format
+            is not recognized.
     """
 
     def _warn_legacy():
@@ -1662,6 +1667,28 @@ def _dict_to_entry(key: str, value) -> list[QuantizerCfgEntry]:
                     "enable=True is not allowed; set it explicitly)."
                 )
 
+            # Validate: when cfg is present and enable=True, cfg must be a non-empty
+            # dict or list.  An empty cfg would attempt to create a
+            # QuantizerAttributeConfig with no actual configuration.
+            cfg = entry.get("cfg")
+            enable = entry.get("enable", True)
+            if enable and cfg is not None:
+                if isinstance(cfg, dict):
+                    is_invalid = len(cfg) == 0
+                elif isinstance(cfg, list):
+                    is_invalid = len(cfg) == 0 or any(
+                        not isinstance(item, dict) or len(item) == 0 for item in cfg
+                    )
+                else:
+                    is_invalid = True
+                if is_invalid:
+                    raise ValueError(
+                        f"Invalid quant_cfg entry: {raw!r} — 'cfg' must be a non-empty dict "
+                        f"or a non-empty list of non-empty dicts when enabling a quantizer "
+                        f"(got {type(cfg).__name__}: {cfg!r}). Either provide quantizer "
+                        "attributes in 'cfg' or remove 'cfg' and set 'enable' explicitly."
+                    )
+
             # Normalize: make enable and cfg always explicit.
             entry.setdefault("enable", True)
             entry.setdefault("cfg", None)
diff --git a/tests/examples/speculative_decoding/conftest.py b/tests/examples/speculative_decoding/conftest.py
@@ -13,11 +13,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+
 import pytest
 import yaml
 from _test_utils.examples.run_command import run_example_command
 
 
+@pytest.fixture(scope="session")
+def tiny_conversations_path(tmp_path_factory):
+    """Tiny JSONL with short synthetic conversations for compute_hidden_states_hf tests.
+
+    Uses minimal single-turn conversations so that tokenized lengths stay well
+    within the tiny test model's max_position_embeddings (32) even after chat
+    template formatting.
+    """
+    tmp_dir = tmp_path_factory.mktemp("tiny_convs")
+    output_file = tmp_dir / "train.jsonl"
+    conversations = [
+        {
+            "conversation_id": f"test-{i}",
+            "conversations": [
+                {"role": "user", "content": "What is 2 plus 2?"},
+                {"role": "assistant", "content": "4"},
+            ],
+        }
+        for i in range(5)
+    ]
+    with open(output_file, "w") as f:
+        f.writelines(json.dumps(conv) + "\n" for conv in conversations)
+    return output_file
+
+
 @pytest.fixture(scope="session", autouse=True)
 def tiny_daring_anteater_path(tmp_path_factory):
     tmp_dir = tmp_path_factory.mktemp("daring_anteater")
diff --git a/tests/examples/speculative_decoding/test_eagle_offline_ptq.py b/tests/examples/speculative_decoding/test_eagle_offline_ptq.py
@@ -55,7 +55,7 @@ def offline_ptq_dirs(tmp_path_factory):
     }
 
 
-def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offline_ptq_dirs):
+def test_collect_hidden_states(tiny_llama_path, tiny_conversations_path, offline_ptq_dirs):
     """Stage 1: generate .pt hidden state files from the base model."""
     run_example_command(
         [
@@ -64,11 +64,13 @@ def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offli
             "--model",
             tiny_llama_path,
             "--input-data",
-            str(tiny_daring_anteater_path),
+            str(tiny_conversations_path),
             "--output-dir",
             str(offline_ptq_dirs["hidden_states"]),
             "--debug-max-num-conversations",
             "2",
+            "--max-seq-len",
+            "32",
         ],
         "speculative_decoding",
     )
diff --git a/tests/unit/torch/quantization/test_config_validation.py b/tests/unit/torch/quantization/test_config_validation.py
@@ -163,6 +163,60 @@ def test_error_on_multi_key_legacy_dict(self):
         with pytest.raises(ValueError):
             normalize_quant_cfg_list([{"*weight_quantizer": {}, "*input_quantizer": {}}])
 
+    def test_error_on_empty_cfg_dict_implicit_enable(self):
+        """Entry with cfg={} and implicit enable=True is rejected."""
+        with pytest.raises(ValueError, match="non-empty dict"):
+            normalize_quant_cfg_list([{"quantizer_name": "*weight_quantizer", "cfg": {}}])
+
+    def test_error_on_empty_cfg_dict_explicit_enable_true(self):
+        """Entry with cfg={} and explicit enable=True is rejected."""
+        with pytest.raises(ValueError, match="non-empty dict"):
+            normalize_quant_cfg_list(
+                [{"quantizer_name": "*weight_quantizer", "cfg": {}, "enable": True}]
+            )
+
+    def test_error_on_empty_cfg_list_enable_true(self):
+        """Entry with cfg=[] and enable=True is rejected."""
+        with pytest.raises(ValueError, match="non-empty dict"):
+            normalize_quant_cfg_list(
+                [{"quantizer_name": "*weight_quantizer", "cfg": [], "enable": True}]
+            )
+
+    def test_error_on_non_dict_non_list_cfg_enable_true(self):
+        """Entry with cfg of invalid type (e.g. int) and enable=True is rejected."""
+        with pytest.raises(ValueError, match="non-empty dict"):
+            normalize_quant_cfg_list(
+                [{"quantizer_name": "*weight_quantizer", "cfg": 42, "enable": True}]
+            )
+
+    def test_error_on_cfg_list_with_empty_dict_enable_true(self):
+        """Entry with cfg=[{}] and enable=True is rejected (empty dict element)."""
+        with pytest.raises(ValueError, match="non-empty dict"):
+            normalize_quant_cfg_list(
+                [{"quantizer_name": "*weight_quantizer", "cfg": [{}], "enable": True}]
+            )
+
+    def test_error_on_cfg_list_with_non_dict_element_enable_true(self):
+        """Entry with cfg=[42] and enable=True is rejected (non-dict element)."""
+        with pytest.raises(ValueError, match="non-empty dict"):
+            normalize_quant_cfg_list(
+                [{"quantizer_name": "*weight_quantizer", "cfg": [42], "enable": True}]
+            )
+
+    def test_empty_cfg_dict_enable_false_accepted(self):
+        """Entry with cfg={} and enable=False is allowed (disable-only entry)."""
+        result = normalize_quant_cfg_list(
+            [{"quantizer_name": "*input_quantizer", "cfg": {}, "enable": False}]
+        )
+        assert result[0]["enable"] is False
+
+    def test_empty_cfg_list_enable_false_accepted(self):
+        """Entry with cfg=[] and enable=False is allowed (disable-only entry)."""
+        result = normalize_quant_cfg_list(
+            [{"quantizer_name": "*input_quantizer", "cfg": [], "enable": False}]
+        )
+        assert result[0]["enable"] is False
+
     def test_new_format_with_list_cfg(self):
         """cfg can be a list of dicts for SequentialQuantizer."""
         raw = [