Fix test_collect_hidden_states: use synthetic short conversations (#1234)

yeyu-nvidia · claude · kinjalpatel27 · commit ff0df2b51284 · 2026-04-13T02:41:03.000Z
## Summary - `test_collect_hidden_states` was using real daring-anteater conversations (typically 1000+ tokens) but the tiny test model has `max_position_embeddings=32`. Both sampled conversations exceeded the default `--max-seq-len 3072` filter, producing zero `.pt` files and failing the assertion. - Added a `tiny_conversations_path` fixture with synthetic short single-turn conversations that tokenize within `max_position_embeddings=32`. - Changed `test_collect_hidden_states` to use this fixture with `--max-seq-len 32`. - Added a `None` guard for `tokenizer.chat_template.replace(...)` to avoid `AttributeError` when the tokenizer has no chat template. ## Test plan - [ ] `pytest tests/examples/speculative_decoding/test_eagle_offline_ptq.py::test_collect_hidden_states` passes - [ ] CI `speculative_decoding` job passes 🤖 Generated with [Claude Code](https://claude.com/claude-code)  ## Summary by CodeRabbit * **Bug Fixes** * Resolved compatibility issues when tokenizers do not have a chat template configuration by adding proper error handling. * Standardized tokenization input extraction logic across different transformer library versions for consistent behavior. * **Tests** * Enhanced test infrastructure with new conversation data fixtures and improved sequence length validation for speculative decoding examples.  --------- Signed-off-by: Ye Yu <yeyu@nvidia.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py b/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py
@@ -142,7 +142,8 @@ def keep_conversation(entry):
     tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
+    if tokenizer.chat_template is not None:
+        tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
 
     output_dir = args.output_dir
     output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/tests/examples/speculative_decoding/conftest.py b/tests/examples/speculative_decoding/conftest.py
@@ -13,11 +13,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+
 import pytest
 import yaml
 from _test_utils.examples.run_command import run_example_command
 
 
+@pytest.fixture(scope="session")
+def tiny_conversations_path(tmp_path_factory):
+    """Tiny JSONL with short synthetic conversations for compute_hidden_states_hf tests.
+
+    Uses minimal single-turn conversations so that tokenized lengths stay well
+    within the tiny test model's max_position_embeddings (32) even after chat
+    template formatting.
+    """
+    tmp_dir = tmp_path_factory.mktemp("tiny_convs")
+    output_file = tmp_dir / "train.jsonl"
+    conversations = [
+        {
+            "conversation_id": f"test-{i}",
+            "conversations": [
+                {"role": "user", "content": "What is 2 plus 2?"},
+                {"role": "assistant", "content": "4"},
+            ],
+        }
+        for i in range(5)
+    ]
+    with open(output_file, "w") as f:
+        f.writelines(json.dumps(conv) + "\n" for conv in conversations)
+    return output_file
+
+
 @pytest.fixture(scope="session", autouse=True)
 def tiny_daring_anteater_path(tmp_path_factory):
     tmp_dir = tmp_path_factory.mktemp("daring_anteater")
diff --git a/tests/examples/speculative_decoding/test_eagle_offline_ptq.py b/tests/examples/speculative_decoding/test_eagle_offline_ptq.py
@@ -55,7 +55,7 @@ def offline_ptq_dirs(tmp_path_factory):
     }
 
 
-def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offline_ptq_dirs):
+def test_collect_hidden_states(tiny_llama_path, tiny_conversations_path, offline_ptq_dirs):
     """Stage 1: generate .pt hidden state files from the base model."""
     run_example_command(
         [
@@ -64,11 +64,13 @@ def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offli
             "--model",
             tiny_llama_path,
             "--input-data",
-            str(tiny_daring_anteater_path),
+            str(tiny_conversations_path),
             "--output-dir",
             str(offline_ptq_dirs["hidden_states"]),
             "--debug-max-num-conversations",
             "2",
+            "--max-seq-len",
+            "32",
         ],
         "speculative_decoding",
     )