Skip to content

Commit 372c30f

Browse files
fix(distributed): use apply_chat_template(tokenize=True, return_dict=False) for cross-version token ids
transformers 5.x returns a dict {input_ids, attention_mask} from apply_chat_template by default -> verifier.prefill got dict keys and raised 'str object cannot be interpreted as an integer' on the Mac (5.x). Adopt the proven kv_cache_proposer.proposer.encode_chat convention (return_dict=False) in the distributed integration test fixture AND the demo, replacing the demo's ad-hoc coercion. Both distributed integration tests pass locally (2 passed). Co-authored-by: FluffyAIcode <FluffyAIcode@users.noreply.github.com>
1 parent 1fc68f4 commit 372c30f

2 files changed

Lines changed: 13 additions & 15 deletions

File tree

scripts/demo_distributed_spec_decode.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -174,24 +174,17 @@ async def _run_verifier_node(args: argparse.Namespace) -> int:
174174
# Echo-style answers are where the n-gram proposer shines; Qwen3's thinking
175175
# preamble is novel text the lookup cannot draft. Templates without the
176176
# variable ignore it harmlessly.
177-
# transformers 4.x returns token ids from apply_chat_template(tokenize=True);
178-
# 5.x can return a string (or a BatchEncoding) — coerce to a flat List[int]
179-
# so this works across both (the Mac engine needs transformers 5.x).
180-
_templated = verifier.tokenizer.apply_chat_template(
177+
# transformers 5.x returns a dict by default with tokenize=True; request the
178+
# legacy flat list-of-ids shape (return_dict=False) so this works on both
179+
# 4.x and 5.x — same convention as kv_cache_proposer.proposer.encode_chat
180+
# (the Mac engine runs transformers 5.x).
181+
prompt_ids = verifier.tokenizer.apply_chat_template(
181182
[{"role": "user", "content": args.prompt}],
182-
add_generation_prompt=True, tokenize=True,
183+
add_generation_prompt=True,
184+
tokenize=True,
185+
return_dict=False,
183186
enable_thinking=args.enable_thinking,
184187
)
185-
if isinstance(_templated, str):
186-
prompt_ids = list(verifier.tokenizer.encode(_templated))
187-
else:
188-
if hasattr(_templated, "input_ids"):
189-
_templated = _templated.input_ids
190-
if hasattr(_templated, "tolist"):
191-
_templated = _templated.tolist()
192-
if _templated and isinstance(_templated[0], (list, tuple)):
193-
_templated = _templated[0]
194-
prompt_ids = [int(x) for x in _templated]
195188

196189
# --- 4. Greedy baseline (same verifier, local only) --------------
197190
t0 = time.perf_counter()

tests/integration/test_distributed_spec_decode_real.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,14 @@ def verifier() -> SinkWindowVerifier:
7171

7272
@pytest.fixture(scope="module")
7373
def prompt_ids(verifier) -> List[int]:
74+
# transformers 5.x returns a dict by default with tokenize=True; request the
75+
# legacy flat list-of-ids shape so it matches on 4.x and 5.x (same convention
76+
# as kv_cache_proposer.proposer.encode_chat).
7477
return verifier.tokenizer.apply_chat_template(
7578
[{"role": "user", "content": PROMPT}],
7679
add_generation_prompt=True,
80+
tokenize=True,
81+
return_dict=False,
7782
)
7883

7984

0 commit comments

Comments
 (0)