Skip to content

Commit 10b16b2

Browse files
feat: propagate hf_revision to diff command and clarify Ollama chat handling comments
1 parent 970a8d5 commit 10b16b2

2 files changed

Lines changed: 6 additions & 4 deletions

File tree

src/infer_check/backends/openai_compat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ def __init__(
5252
self._chat = chat
5353
self._revision = revision
5454
self._disable_thinking = disable_thinking
55-
# Ollama listens on :11434 by default. When we're talking to Ollama and
56-
# thinking is disabled, we prepend "/no_think" to the user message — a
57-
# directive that Qwen3 and some Gemma/Ollama templates honour even when
58-
# the top-level `think` field is ignored.
55+
# Ollama listens on :11434 by default. Track it so later request
56+
# handling can apply Ollama-specific chat behavior when thinking is
57+
# disabled (for example, using request flags and stripping think
58+
# tokens from responses) rather than relying on prompt rewriting.
5959
self._is_ollama = ":11434" in self._base_url
6060

6161
headers: dict[str, str] = {}

src/infer_check/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,13 +399,15 @@ def compare(
399399
backend_type=resolved_a.backend,
400400
model_id=resolved_a.model_id,
401401
quantization=resolved_a.label,
402+
hf_revision=resolved_a.revision,
402403
base_url=resolved_a.base_url,
403404
disable_thinking=disable_thinking,
404405
)
405406
config_b = BackendConfig(
406407
backend_type=resolved_b.backend,
407408
model_id=resolved_b.model_id,
408409
quantization=resolved_b.label,
410+
hf_revision=resolved_b.revision,
409411
base_url=resolved_b.base_url,
410412
disable_thinking=disable_thinking,
411413
)

0 commit comments

Comments
 (0)