Skip to content

Commit d7a6c20

Browse files
fix: refine model spec revision parsing and tighten HF repo detection for tokenizer loading
1 parent 02654cd commit d7a6c20

2 files changed

Lines changed: 30 additions & 16 deletions

File tree

src/infer_check/resolve.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -93,26 +93,27 @@ def resolve_model(
9393
if not spec:
9494
raise ValueError("Empty model spec")
9595

96-
revision: str | None = None
97-
if "@" in spec and not spec.startswith("@"):
98-
# Split on the LAST @ to allow for potential @ in paths if they exist (unlikely but safer)
99-
# Actually usually it's repo@rev.
100-
spec, revision = spec.rsplit("@", 1)
101-
10296
# ── 1. Check for explicit prefix ─────────────────────────────────
10397
for prefix, backend in _PREFIX_MAP.items():
10498
pattern = f"^{re.escape(prefix)}:"
10599
if re.match(pattern, spec, re.IGNORECASE):
106100
model_id = spec[len(prefix) + 1 :]
101+
102+
# Revision is allowed for explicit prefixes
103+
actual_revision = None
104+
if "@" in model_id and not model_id.startswith("@"):
105+
model_id, actual_revision = model_id.rsplit("@", 1)
106+
107107
return ResolvedModel(
108108
backend=backend,
109109
model_id=model_id,
110110
base_url=base_url or _DEFAULT_URLS.get(backend),
111111
label=label or _make_label(model_id),
112-
revision=revision,
112+
revision=actual_revision,
113113
)
114114

115115
# ── 2. Local .gguf file path ─────────────────────────────────────
116+
# If it's a local .gguf path, we don't treat @ as a revision delimiter.
116117
local_path = Path(spec)
117118
if local_path.suffix.lower() == ".gguf":
118119
if local_path.exists():
@@ -121,18 +122,23 @@ def resolve_model(
121122
model_id=str(local_path.resolve()),
122123
base_url=base_url or _DEFAULT_URLS["llama-cpp"],
123124
label=label or local_path.stem,
124-
revision=revision,
125+
revision=None,
125126
)
126127
# Even if it doesn't exist yet, honour the extension.
127128
return ResolvedModel(
128129
backend="llama-cpp",
129130
model_id=spec,
130131
base_url=base_url or _DEFAULT_URLS["llama-cpp"],
131132
label=label or local_path.stem,
132-
revision=revision,
133+
revision=None,
133134
)
134135

135136
# ── 3. HuggingFace repo heuristics ──────────────────────────────
137+
# HF repos CAN have @revision.
138+
actual_revision = None
139+
if "@" in spec and not spec.startswith("@"):
140+
spec, actual_revision = spec.rsplit("@", 1)
141+
136142
spec_lower = spec.lower()
137143

138144
# MLX repos (mlx-community org or -mlx suffix).
@@ -147,7 +153,7 @@ def resolve_model(
147153
model_id=spec,
148154
base_url=None, # mlx-lm loads locally, no URL
149155
label=label or _make_label(spec),
150-
revision=revision,
156+
revision=actual_revision,
151157
)
152158

153159
# GGUF repos (typically served via Ollama or llama-cpp).
@@ -159,7 +165,7 @@ def resolve_model(
159165
model_id=spec,
160166
base_url=base_url or _DEFAULT_URLS["llama-cpp"],
161167
label=label or _make_label(spec),
162-
revision=revision,
168+
revision=actual_revision,
163169
)
164170

165171
# ── 4. Ollama-style tags (contain colon but no slash) ────────────
@@ -170,7 +176,7 @@ def resolve_model(
170176
model_id=spec,
171177
base_url=base_url or _DEFAULT_URLS["openai-compat"],
172178
label=label or _make_label(spec),
173-
revision=revision,
179+
revision=actual_revision,
174180
)
175181

176182
# ── 5. Fallback — assume mlx-lm (Mac-first user base) ───────────
@@ -179,5 +185,5 @@ def resolve_model(
179185
model_id=spec,
180186
base_url=None,
181187
label=label or _make_label(spec),
182-
revision=revision,
188+
revision=actual_revision,
183189
)

src/infer_check/utils.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,15 @@ def _get_tokenizer(model_id: str, revision: str | None = None) -> Any:
7474
"""Helper to load and cache HuggingFace tokenizers."""
7575
from transformers import AutoTokenizer
7676

77-
return AutoTokenizer.from_pretrained(model_id, revision=revision)
77+
# We use local_files_only=True to ensure that we don't hang on network
78+
# calls if the model isn't actually a HF repo (or if we're offline).
79+
# This matches the tightened is_hf_id heuristic in format_prompt.
80+
return AutoTokenizer.from_pretrained(
81+
model_id,
82+
revision=revision,
83+
local_files_only=True,
84+
trust_remote_code=False,
85+
)
7886

7987

8088
def format_prompt(
@@ -100,10 +108,10 @@ def format_prompt(
100108
text = strip_thinking_tokens(text)
101109

102110
if tokenizer is None and model_id:
103-
# Only attempt to load from HF if it looks like a HF repo (owner/name)
111+
# Only attempt to load from HF if it looks like a HF repo (owner/repo)
104112
# or an absolute/relative path. Ollama tags (name:tag) or local GGUF
105113
# files should be skipped as they'll fail or hang from_pretrained.
106-
is_hf_id = "/" in model_id or (model_id.count(":") == 0 and "." not in model_id)
114+
is_hf_id = "/" in model_id
107115
if is_hf_id:
108116
with contextlib.suppress(Exception):
109117
tokenizer = _get_tokenizer(model_id, revision)

0 commit comments

Comments
 (0)