Skip to content

Commit 30655d6

Browse files
halleriteclaude
andcommitted
fix(tokenizer): fall back to direct fast-tokenizer load when model config build fails
`AutoTokenizer.from_pretrained` eagerly constructs the *model* config to resolve the tokenizer class — even for a plain `PreTrainedTokenizerFast`. That construction runs HF's RoPE validator, which rejects configs carrying nested `rope_parameters` (e.g. poolside/Laguna-XS.2: `full_attention` / `sliding_attention` blocks with no top-level `rope_theta`) when the config is built outside vLLM's `patch_rope_parameters`. The resulting `KeyError` escapes (AutoTokenizer only catches `ValueError`/`OSError`) and kills the tokenizer load — a modeling-only concern breaking something the tokenizer never needed. renderers needs the tokenizer, not the model. When `AutoTokenizer` fails while building the config, fall back to loading the repo's self-contained `tokenizer.json` directly via `PreTrainedTokenizerFast`, which never touches the model config. The fallback runs under the fastokens patch, so models like Laguna keep the Rust fast-path speedup. Custom `auto_map` tokenizers and repos without a fast tokenizer are left to surface the original error. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e729baa commit 30655d6

1 file changed

Lines changed: 69 additions & 6 deletions

File tree

renderers/base.py

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,7 +1089,6 @@ def _patched_load(model_name_or_path: str, **kwargs):
10891089
path is still discoverable in logs.
10901090
"""
10911091
import fastokens
1092-
from transformers import AutoTokenizer
10931092

10941093
global _FASTOKENS_ANNOUNCED
10951094

@@ -1102,13 +1101,72 @@ def _patched_load(model_name_or_path: str, **kwargs):
11021101
)
11031102
_FASTOKENS_ANNOUNCED = True
11041103
try:
1105-
return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1104+
return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
11061105
finally:
11071106
with _FASTOKENS_PATCH_LOCK:
11081107
with contextlib.redirect_stdout(io.StringIO()):
11091108
fastokens.unpatch_transformers()
11101109

11111110

1111+
def _load_fast_tokenizer_directly(
1112+
model_name_or_path: str, revision: str | None
1113+
) -> Any | None:
1114+
"""Load a self-contained fast tokenizer without building the model config.
1115+
1116+
``AutoTokenizer.from_pretrained`` eagerly constructs the *model* config to
1117+
resolve the tokenizer class — even for a plain ``PreTrainedTokenizerFast``.
1118+
That construction can raise on modeling-only concerns the tokenizer never
1119+
needs (e.g. RoPE parameter validation for configs that carry nested
1120+
``rope_parameters``). When the repo ships a complete ``tokenizer.json`` and
1121+
declares no custom tokenizer, the tokenizer is fully self-describing, so we
1122+
load it directly and skip the config detour.
1123+
1124+
Returns ``None`` when there's nothing safe to load this way — a custom
1125+
``auto_map`` tokenizer (which must run through ``AutoTokenizer`` with
1126+
``trust_remote_code``) or no fast tokenizer at all — so the caller can
1127+
surface its original error instead.
1128+
"""
1129+
from transformers import PreTrainedTokenizerFast
1130+
from transformers.models.auto.tokenization_auto import get_tokenizer_config
1131+
1132+
try:
1133+
if "auto_map" in get_tokenizer_config(model_name_or_path, revision=revision):
1134+
return None
1135+
return PreTrainedTokenizerFast.from_pretrained(
1136+
model_name_or_path, revision=revision
1137+
)
1138+
except Exception:
1139+
return None
1140+
1141+
1142+
def _load_tokenizer_via_auto(model_name_or_path: str, **kwargs) -> Any:
1143+
"""``AutoTokenizer.from_pretrained`` with a config-free fallback.
1144+
1145+
renderers needs the tokenizer, not the model. If ``AutoTokenizer`` fails
1146+
while building the model config it loads to resolve the tokenizer class,
1147+
retry by loading the repo's self-contained ``tokenizer.json`` directly. The
1148+
original error is re-raised if the repo has no such tokenizer.
1149+
"""
1150+
from transformers import AutoTokenizer
1151+
1152+
try:
1153+
return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1154+
except Exception as exc:
1155+
tok = _load_fast_tokenizer_directly(
1156+
model_name_or_path, revision=kwargs.get("revision")
1157+
)
1158+
if tok is None:
1159+
raise
1160+
logger.debug(
1161+
"AutoTokenizer.from_pretrained(%r) failed building the model config "
1162+
"(%s: %s); loaded the tokenizer directly from tokenizer.json.",
1163+
model_name_or_path,
1164+
type(exc).__name__,
1165+
str(exc)[:160],
1166+
)
1167+
return tok
1168+
1169+
11121170
def load_tokenizer(
11131171
model_name_or_path: str,
11141172
*,
@@ -1138,9 +1196,14 @@ def load_tokenizer(
11381196
fastokens raises during the patched load (e.g. an unknown
11391197
pre-tokenizer type), we automatically retry with the vanilla
11401198
backend and emit an INFO log.
1141-
"""
1142-
from transformers import AutoTokenizer
11431199
1200+
``AutoTokenizer.from_pretrained`` eagerly builds the model config to
1201+
resolve the tokenizer class. If that construction raises on a
1202+
modeling-only concern the tokenizer doesn't need (e.g. RoPE
1203+
validation for configs with nested ``rope_parameters``), we fall
1204+
back to loading the repo's self-contained ``tokenizer.json``
1205+
directly — see ``_load_tokenizer_via_auto``.
1206+
"""
11441207
kwargs: dict[str, Any] = {}
11451208
revision = TRUSTED_REVISIONS.get(model_name_or_path)
11461209
if revision is not None:
@@ -1149,7 +1212,7 @@ def load_tokenizer(
11491212
kwargs = {"trust_remote_code": False}
11501213

11511214
if not use_fastokens or model_name_or_path in FASTOKENS_INCOMPATIBLE:
1152-
return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1215+
return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
11531216

11541217
try:
11551218
return _patched_load(model_name_or_path, **kwargs)
@@ -1162,7 +1225,7 @@ def load_tokenizer(
11621225
type(exc).__name__,
11631226
str(exc)[:160],
11641227
)
1165-
return AutoTokenizer.from_pretrained(model_name_or_path, **kwargs)
1228+
return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
11661229

11671230

11681231
def _populate_registry():

0 commit comments

Comments
 (0)