Skip to content

Commit ee272c3

Browse files
committed
Prefer tokenizer for text-only Gemma 4 MLX runs
1 parent 90e5577 commit ee272c3

1 file changed

Lines changed: 12 additions & 5 deletions

File tree

backends/mlx/examples/llm/run_llm_hf.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,18 @@ def _load_text_processor(model_id: str):
5151
"""
5252
Load a text processor for the model.
5353
54-
Prefer AutoProcessor for multimodal/text-hybrid models like Gemma 4, and
55-
fall back to AutoTokenizer for text-only checkpoints.
54+
Prefer AutoTokenizer for text-only prompting, even for checkpoints that
55+
also ship an AutoProcessor. Some hybrid checkpoints (for example Gemma 4)
56+
expose both, but the tokenizer path is the more stable interface for the
57+
plain text generation flow exercised by this runner.
5658
"""
59+
logger.info(f"Loading tokenizer from HuggingFace: {model_id}...")
60+
try:
61+
tokenizer = AutoTokenizer.from_pretrained(model_id)
62+
return tokenizer, False
63+
except Exception as exc:
64+
logger.info(f"AutoTokenizer unavailable for {model_id}: {exc}")
65+
5766
try:
5867
processor = AutoProcessor.from_pretrained(model_id)
5968
if hasattr(processor, "apply_chat_template") and hasattr(processor, "decode"):
@@ -62,9 +71,7 @@ def _load_text_processor(model_id: str):
6271
except Exception as exc:
6372
logger.info(f"AutoProcessor unavailable for {model_id}: {exc}")
6473

65-
logger.info(f"Loading tokenizer from HuggingFace: {model_id}...")
66-
tokenizer = AutoTokenizer.from_pretrained(model_id)
67-
return tokenizer, False
74+
raise RuntimeError(f"Could not load tokenizer or processor for {model_id}")
6875

6976

7077
def _apply_chat_template(text_processor, messages) -> str:

0 commit comments

Comments
 (0)