Skip to content

Commit 267618d

Browse files
committed
Refactor LLM param resolution into adapters
1 parent 962191f commit 267618d

3 files changed

Lines changed: 124 additions & 32 deletions

File tree

agent/core/llm_params.py

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,7 @@
55
creating circular imports.
66
"""
77

8-
import os
9-
10-
11-
# HF router reasoning models only accept "low" | "medium" | "high" (e.g.
12-
# MiniMax M2 actually *requires* reasoning to be enabled). OpenAI's GPT-5
13-
# also accepts "minimal" for near-zero thinking. We map "minimal" to "low"
14-
# for HF so the user doesn't get a 400.
15-
_HF_ALLOWED_EFFORTS = {"low", "medium", "high"}
8+
from agent.core.provider_adapters import ADAPTERS
169

1710

1811
def _resolve_llm_params(
@@ -50,27 +43,12 @@ def _resolve_llm_params(
5043
2. session.hf_token — the user's own token (CLI / OAuth / cache file).
5144
3. HF_TOKEN env — belt-and-suspenders fallback for CLI users.
5245
"""
53-
if model_name.startswith(("anthropic/", "openai/")):
54-
params: dict = {"model": model_name}
55-
if reasoning_effort:
56-
params["reasoning_effort"] = reasoning_effort
57-
return params
58-
59-
hf_model = model_name.removeprefix("huggingface/")
60-
api_key = (
61-
os.environ.get("INFERENCE_TOKEN")
62-
or session_hf_token
63-
or os.environ.get("HF_TOKEN")
64-
)
65-
params = {
66-
"model": f"openai/{hf_model}",
67-
"api_base": "https://router.huggingface.co/v1",
68-
"api_key": api_key,
69-
}
70-
if os.environ.get("INFERENCE_TOKEN"):
71-
params["extra_headers"] = {"X-HF-Bill-To": "huggingface"}
72-
if reasoning_effort:
73-
hf_level = "low" if reasoning_effort == "minimal" else reasoning_effort
74-
if hf_level in _HF_ALLOWED_EFFORTS:
75-
params["extra_body"] = {"reasoning_effort": hf_level}
76-
return params
46+
for adapter in ADAPTERS:
47+
if adapter.matches(model_name):
48+
return adapter.build_params(
49+
model_name,
50+
session_hf_token=session_hf_token,
51+
reasoning_effort=reasoning_effort,
52+
)
53+
54+
raise ValueError(f"Unsupported model id: {model_name}")

agent/core/provider_adapters.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Provider-specific LiteLLM parameter builders."""
2+
3+
from __future__ import annotations
4+
5+
import os
6+
from dataclasses import dataclass
7+
8+
9+
class ProviderAdapter:
10+
"""Build LiteLLM kwargs for one family of model ids."""
11+
12+
def matches(self, model_name: str) -> bool:
13+
raise NotImplementedError
14+
15+
def build_params(
16+
self,
17+
model_name: str,
18+
session_hf_token: str | None = None,
19+
reasoning_effort: str | None = None,
20+
) -> dict:
21+
raise NotImplementedError
22+
23+
24+
@dataclass(frozen=True)
25+
class NativeAdapter(ProviderAdapter):
26+
prefixes: tuple[str, ...] = ("anthropic/", "openai/")
27+
28+
def matches(self, model_name: str) -> bool:
29+
return model_name.startswith(self.prefixes)
30+
31+
def build_params(
32+
self,
33+
model_name: str,
34+
session_hf_token: str | None = None,
35+
reasoning_effort: str | None = None,
36+
) -> dict:
37+
del session_hf_token
38+
params: dict = {"model": model_name}
39+
if reasoning_effort:
40+
params["reasoning_effort"] = reasoning_effort
41+
return params
42+
43+
44+
@dataclass(frozen=True)
45+
class HfRouterAdapter(ProviderAdapter):
46+
allowed_efforts: tuple[str, ...] = ("low", "medium", "high")
47+
48+
def matches(self, model_name: str) -> bool:
49+
return "/" in model_name and not model_name.startswith(
50+
("anthropic/", "openai/")
51+
)
52+
53+
def build_params(
54+
self,
55+
model_name: str,
56+
session_hf_token: str | None = None,
57+
reasoning_effort: str | None = None,
58+
) -> dict:
59+
hf_model = model_name.removeprefix("huggingface/")
60+
inference_token = os.environ.get("INFERENCE_TOKEN")
61+
api_key = inference_token or session_hf_token or os.environ.get("HF_TOKEN")
62+
params = {
63+
"model": f"openai/{hf_model}",
64+
"api_base": "https://router.huggingface.co/v1",
65+
"api_key": api_key,
66+
}
67+
if inference_token:
68+
params["extra_headers"] = {"X-HF-Bill-To": "huggingface"}
69+
if reasoning_effort:
70+
hf_level = "low" if reasoning_effort == "minimal" else reasoning_effort
71+
if hf_level in self.allowed_efforts:
72+
params["extra_body"] = {"reasoning_effort": hf_level}
73+
return params
74+
75+
76+
ADAPTERS: tuple[ProviderAdapter, ...] = (
77+
NativeAdapter(),
78+
HfRouterAdapter(),
79+
)

tests/test_provider_adapters.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from agent.core.llm_params import _resolve_llm_params
2+
3+
4+
def test_native_adapter_keeps_model_name():
5+
params = _resolve_llm_params("anthropic/claude-opus-4-6", reasoning_effort="high")
6+
7+
assert params == {
8+
"model": "anthropic/claude-opus-4-6",
9+
"reasoning_effort": "high",
10+
}
11+
12+
13+
def test_hf_adapter_builds_router_params(monkeypatch):
14+
monkeypatch.setenv("HF_TOKEN", "hf-test")
15+
16+
params = _resolve_llm_params(
17+
"moonshotai/Kimi-K2.6:novita", reasoning_effort="minimal"
18+
)
19+
20+
assert params == {
21+
"model": "openai/moonshotai/Kimi-K2.6:novita",
22+
"api_base": "https://router.huggingface.co/v1",
23+
"api_key": "hf-test",
24+
"extra_body": {"reasoning_effort": "low"},
25+
}
26+
27+
28+
def test_hf_adapter_adds_bill_to_header(monkeypatch):
29+
monkeypatch.setenv("INFERENCE_TOKEN", "hf-space-token")
30+
monkeypatch.delenv("HF_TOKEN", raising=False)
31+
32+
params = _resolve_llm_params("MiniMaxAI/MiniMax-M2.7")
33+
34+
assert params["extra_headers"] == {"X-HF-Bill-To": "huggingface"}
35+
assert params["api_key"] == "hf-space-token"

0 commit comments

Comments
 (0)