Skip to content

Commit 9ee796b

Browse files
fix: update Ollama backend default URL to remove /v1 suffix and set chat mode default to True
1 parent 3289264 commit 9ee796b

5 files changed

Lines changed: 8 additions & 10 deletions

File tree

docs/backends.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
| **mlx-lm** | In-process | (local) | Local Apple Silicon inference with logprobs |
1010
| **llama-cpp** | HTTP | `http://127.0.0.1:8080` | llama-server via `/completion` endpoint |
1111
| **vllm-mlx** | HTTP | `http://127.0.0.1:8000` | Continuous batching on Apple Silicon |
12-
| **openai-compat** | HTTP | `http://127.0.0.1:11434/v1` | Any OpenAI-compatible server (vLLM, SGLang, Ollama) |
12+
| **openai-compat** | HTTP | `http://127.0.0.1:11434` | Any OpenAI-compatible server (vLLM, SGLang, Ollama) |
1313

1414
## mlx-lm
1515

@@ -109,7 +109,7 @@ Generic backend for any server that implements the OpenAI API format. Works with
109109

110110
| Model source | Default URL |
111111
|-------------|-------------|
112-
| Ollama tags (e.g., `llama3.1:8b`) | `http://127.0.0.1:11434/v1` |
112+
| Ollama tags (e.g., `llama3.1:8b`) | `http://127.0.0.1:11434` |
113113
| Custom server | Use `--base-url` |
114114

115115
**Example with Ollama**:

src/infer_check/backends/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,20 +66,20 @@ def get_backend(config: BackendConfig) -> BackendAdapter:
6666
return VLLMMLXBackend(
6767
model_id=config.model_id,
6868
base_url=url,
69-
chat=config.extra.get("chat", False),
69+
chat=config.extra.get("chat", True),
7070
)
7171
elif config.backend_type == "openai-compat":
7272
from infer_check.backends.openai_compat import OpenAICompatBackend
7373

7474
if not config.base_url:
7575
raise ValueError(
76-
"openai-compat backend requires --base-url. Example: --base-url http://127.0.0.1:11434/v1 (Ollama)"
76+
"openai-compat backend requires --base-url. Example: --base-url http://127.0.0.1:11434 (Ollama)"
7777
)
7878
return OpenAICompatBackend(
7979
base_url=config.base_url,
8080
model_id=config.model_id,
8181
api_key=config.extra.get("api_key"),
82-
chat=config.extra.get("chat", False),
82+
chat=config.extra.get("chat", True),
8383
)
8484
else:
8585
supported = ", ".join(["mlx-lm", "llama-cpp", "vllm-mlx", "openai-compat"])

src/infer_check/cli.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,14 +384,12 @@ def compare(
384384
model_id=resolved_a.model_id,
385385
quantization=resolved_a.label,
386386
base_url=resolved_a.base_url,
387-
extra={"chat": False},
388387
)
389388
config_b = BackendConfig(
390389
backend_type=resolved_b.backend,
391390
model_id=resolved_b.model_id,
392391
quantization=resolved_b.label,
393392
base_url=resolved_b.base_url,
394-
extra={"chat": False},
395393
)
396394
backend_a = get_backend(config_a)
397395
backend_b = get_backend(config_b)

src/infer_check/resolve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434

3535
# Default base URLs per backend (can be overridden via CLI).
3636
_DEFAULT_URLS: dict[BackendType, str] = {
37-
"openai-compat": "http://127.0.0.1:11434/v1", # Ollama
37+
"openai-compat": "http://127.0.0.1:11434", # Ollama (backend adds /v1/... paths)
3838
"llama-cpp": "http://127.0.0.1:8080",
3939
"vllm-mlx": "http://127.0.0.1:8000",
4040
}

tests/unit/test_resolve.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_ollama_prefix(self) -> None:
1414
r = resolve_model("ollama:llama3.1:8b-instruct-q4_K_M")
1515
assert r.backend == "openai-compat"
1616
assert r.model_id == "llama3.1:8b-instruct-q4_K_M"
17-
assert r.base_url == "http://127.0.0.1:11434/v1"
17+
assert r.base_url == "http://127.0.0.1:11434"
1818
assert r.label == "llama3.1:8b-instruct-q4_K_M"
1919

2020
def test_mlx_prefix(self) -> None:
@@ -67,7 +67,7 @@ def test_mlx_keyword_heuristic(self) -> None:
6767
def test_ollama_style_tag(self) -> None:
6868
r = resolve_model("llama3.1:8b-instruct-q4_K_M")
6969
assert r.backend == "openai-compat"
70-
assert r.base_url == "http://127.0.0.1:11434/v1"
70+
assert r.base_url == "http://127.0.0.1:11434"
7171

7272
def test_local_gguf_path(self, tmp_path: Path) -> None:
7373
gguf_file = tmp_path / "model-q4.gguf"

0 commit comments

Comments
 (0)