Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ def keep_conversation(entry):
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
if tokenizer.chat_template is not None:
tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")

output_dir = args.output_dir
output_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -206,10 +207,12 @@ async def submit_generates():
continue

# Tokenize and check length
tokenized = tokenizer.apply_chat_template(
conversations, return_tensors="pt", add_generation_template=False
)
input_ids = tokenized["input_ids"] if isinstance(tokenized, dict) else tokenized
# return_dict=True ensures BatchEncoding is returned on all transformers
# versions: in <5.0 the default is False (returns raw tensor), in 5.0+
# the default changed to True (returns BatchEncoding).
input_ids = tokenizer.apply_chat_template(
conversations, return_tensors="pt", return_dict=True, add_generation_template=False
)["input_ids"]
Comment on lines +213 to +215
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

In Hugging Face Transformers, what is the expected behavior of tokenizer.apply_chat_template(...) when tokenizer.chat_template is None, and what fallback is recommended?

💡 Result:

When tokenizer.chat_template is None and no chat_template argument is passed to tokenizer.apply_chat_template(...), it raises a ValueError with the message: "Cannot use apply_chat_template because tokenizer.chat_template is not set and no template argument was passed! For information about writing templates and setting the tokenizer.chat_template attribute, please see the documentation at https://huggingface.co/docs/transformers/main/en/chat_templating". The recommended fallback is to either set tokenizer.chat_template to a suitable Jinja template string (e.g., copy from a similar model or use ChatML format), or pass a chat_template string directly as a keyword argument to apply_chat_template. Official docs encourage setting tokenizer.chat_template explicitly and pushing to the Hub for chat models lacking one. Recent versions removed automatic class-level default templates to avoid issues.

Citations:


🏁 Script executed:

cat -n examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py | sed -n '135,220p'

Repository: NVIDIA/Model-Optimizer

Length of output: 4355


Handle tokenizers without chat templates in the tokenization path.

Line 213 unconditionally calls apply_chat_template; if tokenizer.chat_template is None, this raises a ValueError at runtime even though a check exists at line 145. The line 145 guard only modifies an existing template and does not prevent this code path. Add a fallback for tokenizers without chat templates:

Proposed fix
-            input_ids = tokenizer.apply_chat_template(
-                conversations, return_tensors="pt", return_dict=True, add_generation_template=False
-            )["input_ids"]
+            if tokenizer.chat_template is not None:
+                input_ids = tokenizer.apply_chat_template(
+                    conversations,
+                    return_tensors="pt",
+                    return_dict=True,
+                    add_generation_template=False,
+                )["input_ids"]
+            else:
+                plain_text = "\n".join(
+                    f"{msg.get('role', 'user')}: {msg.get('content', '')}" for msg in conversations
+                )
+                input_ids = tokenizer(plain_text, return_tensors="pt")["input_ids"]
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
input_ids = tokenizer.apply_chat_template(
conversations, return_tensors="pt", return_dict=True, add_generation_template=False
)["input_ids"]
if tokenizer.chat_template is not None:
input_ids = tokenizer.apply_chat_template(
conversations,
return_tensors="pt",
return_dict=True,
add_generation_template=False,
)["input_ids"]
else:
plain_text = "\n".join(
f"{msg.get('role', 'user')}: {msg.get('content', '')}" for msg in conversations
)
input_ids = tokenizer(plain_text, return_tensors="pt")["input_ids"]
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In
`@examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py`
around lines 213 - 215, The code unconditionally calls
tokenizer.apply_chat_template which raises if tokenizer.chat_template is None;
update the tokenization path to check tokenizer.chat_template and use
apply_chat_template only when present, otherwise call tokenizer(conversations,
return_tensors="pt", return_dict=True, add_special_tokens=False) (or equivalent
non-chat tokenization) to produce the same return structure and extract
["input_ids"]; modify the block around tokenizer.apply_chat_template and ensure
downstream code still expects the same keys (e.g., input_ids) so
compute_hidden_states_hf.py's tokenization works for tokenizers both with and
without chat templates.

num_input_tokens = input_ids.shape[1]
if num_input_tokens <= 10 or num_input_tokens > args.max_seq_len:
num_skipped_too_long += 1
Expand Down
27 changes: 27 additions & 0 deletions tests/examples/speculative_decoding/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,38 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json

import pytest
import yaml
from _test_utils.examples.run_command import run_example_command


@pytest.fixture(scope="session")
def tiny_conversations_path(tmp_path_factory):
"""Tiny JSONL with short synthetic conversations for compute_hidden_states_hf tests.

Uses minimal single-turn conversations so that tokenized lengths stay well
within the tiny test model's max_position_embeddings (32) even after chat
template formatting.
"""
tmp_dir = tmp_path_factory.mktemp("tiny_convs")
output_file = tmp_dir / "train.jsonl"
conversations = [
{
"conversation_id": f"test-{i}",
"conversations": [
{"role": "user", "content": "What is 2 plus 2?"},
{"role": "assistant", "content": "4"},
],
}
for i in range(5)
]
with open(output_file, "w") as f:
f.writelines(json.dumps(conv) + "\n" for conv in conversations)
return output_file


@pytest.fixture(scope="session", autouse=True)
def tiny_daring_anteater_path(tmp_path_factory):
tmp_dir = tmp_path_factory.mktemp("daring_anteater")
Expand Down
6 changes: 4 additions & 2 deletions tests/examples/speculative_decoding/test_eagle_offline_ptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def offline_ptq_dirs(tmp_path_factory):
}


def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offline_ptq_dirs):
def test_collect_hidden_states(tiny_llama_path, tiny_conversations_path, offline_ptq_dirs):
"""Stage 1: generate .pt hidden state files from the base model."""
run_example_command(
[
Expand All @@ -64,11 +64,13 @@ def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offli
"--model",
tiny_llama_path,
"--input-data",
str(tiny_daring_anteater_path),
str(tiny_conversations_path),
"--output-dir",
str(offline_ptq_dirs["hidden_states"]),
"--debug-max-num-conversations",
"2",
"--max-seq-len",
"32",
],
"speculative_decoding",
)
Expand Down
Loading