Skip to content

Commit 064c99b

Browse files
committed
Merge upstream main and refine provider adapters
2 parents ff1c7b4 + 5ab7c4e commit 064c99b

23 files changed

Lines changed: 1345 additions & 414 deletions

agent/config.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,15 @@ class Config(BaseModel):
3333
confirm_cpu_jobs: bool = True
3434
auto_file_upload: bool = False
3535

36+
# Reasoning effort for models that support it (GPT-5 / o-series, Claude
37+
# extended thinking, HF reasoning models like MiniMax M2 / Kimi K2).
38+
# Defaults to "high" — we'd rather spend tokens thinking than ship a
39+
# wrong ML recipe. Users can dial down with `/effort low|medium|off`.
40+
# "minimal" is an OpenAI-only level and is normalized to "low" for HF
41+
# router models (MiniMax requires ≥low). Ignored for non-reasoning models.
42+
# Valid values: None | "minimal" | "low" | "medium" | "high"
43+
reasoning_effort: str | None = "high"
44+
3645

3746
def substitute_env_vars(obj: Any) -> Any:
3847
"""

agent/context_manager/manager.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from jinja2 import Template
1313
from litellm import Message, acompletion
1414

15-
from agent.llm import resolve_llm_params
15+
from agent.core.llm_params import _resolve_llm_params
1616

1717
logger = logging.getLogger(__name__)
1818

@@ -308,7 +308,11 @@ async def compact(
308308
)
309309
)
310310

311-
llm_params = resolve_llm_params(model_name, session_hf_token=hf_token)
311+
llm_params = _resolve_llm_params(
312+
model_name,
313+
session_hf_token=hf_token,
314+
reasoning_effort="high",
315+
)
312316
response = await acompletion(
313317
messages=messages_to_summarize,
314318
max_completion_tokens=self.compact_size,

agent/core/agent_loop.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,16 @@
1212

1313
from agent.config import Config
1414
from agent.core.doom_loop import check_for_doom_loop
15+
from agent.core.llm_params import _resolve_llm_params
1516
from agent.core.session import Event, OpType, Session
1617
from agent.core.tools import ToolRouter
17-
from agent.llm import resolve_llm_params
1818
from agent.tools.jobs_tool import CPU_FLAVORS
1919

2020
logger = logging.getLogger(__name__)
2121

2222
ToolCall = ChatCompletionMessageToolCall
2323

2424

25-
def _resolve_hf_router_params(
26-
model_name: str, session_hf_token: str | None = None
27-
) -> dict:
28-
"""Back-compat wrapper for the shared provider resolver."""
29-
return resolve_llm_params(model_name, session_hf_token=session_hf_token)
30-
31-
3225
def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
3326
"""
3427
Validate tool arguments structure.
@@ -181,6 +174,23 @@ def _friendly_error_message(error: Exception) -> str | None:
181174
"at your model provider's dashboard."
182175
)
183176

177+
if "not supported by provider" in err_str or "no provider supports" in err_str:
178+
return (
179+
"The model isn't served by the provider you pinned.\n\n"
180+
"Drop the ':<provider>' suffix to let the HF router auto-pick a "
181+
"provider, or use '/model' (no arg) to see which providers host "
182+
"which models."
183+
)
184+
185+
if "model_not_found" in err_str or (
186+
"model" in err_str and ("not found" in err_str or "does not exist" in err_str)
187+
):
188+
return (
189+
"Model not found. Use '/model' to list suggestions, or paste an "
190+
"HF model id like 'MiniMaxAI/MiniMax-M2.7'. Availability is shown "
191+
"when you switch."
192+
)
193+
184194
return None
185195

186196

@@ -529,8 +539,10 @@ async def run_agent(
529539
tools = session.tool_router.get_tool_specs_for_llm()
530540
try:
531541
# ── Call the LLM (streaming or non-streaming) ──
532-
llm_params = _resolve_hf_router_params(
533-
session.config.model_name, session.hf_token
542+
llm_params = _resolve_llm_params(
543+
session.config.model_name,
544+
session.hf_token,
545+
reasoning_effort=session.config.reasoning_effort,
534546
)
535547
if session.stream:
536548
llm_result = await _call_llm_streaming(
@@ -746,7 +758,7 @@ async def _exec_tool(
746758
if not valid:
747759
return (tc, name, args, err, False)
748760
out, ok = await session.tool_router.call_tool(
749-
name, args, session=session
761+
name, args, session=session, tool_call_id=tc.id
750762
)
751763
return (tc, name, args, out, ok)
752764

agent/core/hf_router_catalog.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""Fetch and cache the HF Inference Router model catalog.
2+
3+
The router exposes an OpenAI-compatible listing at
4+
``https://router.huggingface.co/v1/models`` with per-provider availability,
5+
pricing, context length, and tool-use support. We use it to:
6+
7+
• Validate ``/model`` switches with live data instead of a hard-coded allowlist.
8+
• Show the user which providers serve a model, at what price, and whether they
9+
support tool calls.
10+
• Derive a reasonable context-window limit for any routed model.
11+
12+
The listing is cached in-memory for a few minutes so repeated lookups during a
13+
session are free. On fetch failure we return stale data if we have it, or an
14+
empty catalog otherwise.
15+
"""
16+
17+
import logging
18+
import time
19+
from dataclasses import dataclass
20+
from difflib import get_close_matches
21+
from typing import Optional
22+
23+
import httpx
24+
25+
logger = logging.getLogger(__name__)
26+
27+
_CATALOG_URL = "https://router.huggingface.co/v1/models"
28+
_CACHE_TTL_SECONDS = 300
29+
_HTTP_TIMEOUT_SECONDS = 5.0
30+
31+
_cache: Optional[dict] = None
32+
_cache_time: float = 0.0
33+
34+
35+
@dataclass
36+
class ProviderInfo:
37+
provider: str
38+
status: str
39+
context_length: Optional[int]
40+
input_price: Optional[float]
41+
output_price: Optional[float]
42+
supports_tools: bool
43+
supports_structured_output: bool
44+
45+
46+
@dataclass
47+
class ModelInfo:
48+
id: str
49+
providers: list[ProviderInfo]
50+
51+
@property
52+
def live_providers(self) -> list[ProviderInfo]:
53+
return [p for p in self.providers if p.status == "live"]
54+
55+
@property
56+
def max_context_length(self) -> Optional[int]:
57+
lengths = [p.context_length for p in self.live_providers if p.context_length]
58+
return max(lengths) if lengths else None
59+
60+
@property
61+
def any_supports_tools(self) -> bool:
62+
return any(p.supports_tools for p in self.live_providers)
63+
64+
65+
def _fetch_catalog(force: bool = False) -> dict:
66+
global _cache, _cache_time
67+
now = time.time()
68+
if not force and _cache is not None and now - _cache_time < _CACHE_TTL_SECONDS:
69+
return _cache
70+
try:
71+
resp = httpx.get(_CATALOG_URL, timeout=_HTTP_TIMEOUT_SECONDS)
72+
resp.raise_for_status()
73+
_cache = resp.json()
74+
_cache_time = now
75+
except Exception as e:
76+
logger.warning("Failed to fetch HF router catalog: %s", e)
77+
if _cache is None:
78+
_cache = {"data": []}
79+
_cache_time = now
80+
return _cache
81+
82+
83+
def _parse_entry(entry: dict) -> ModelInfo:
84+
providers = []
85+
for p in entry.get("providers", []) or []:
86+
pricing = p.get("pricing") or {}
87+
providers.append(
88+
ProviderInfo(
89+
provider=p.get("provider", ""),
90+
status=p.get("status", ""),
91+
context_length=p.get("context_length"),
92+
input_price=pricing.get("input"),
93+
output_price=pricing.get("output"),
94+
supports_tools=bool(p.get("supports_tools", False)),
95+
supports_structured_output=bool(p.get("supports_structured_output", False)),
96+
)
97+
)
98+
return ModelInfo(id=entry.get("id", ""), providers=providers)
99+
100+
101+
def lookup(model_id: str) -> Optional[ModelInfo]:
102+
"""Find a model in the router catalog.
103+
104+
Accepts ``<org>/<model>`` or ``<org>/<model>:<tag>`` — the tag is stripped
105+
for lookup. Returns ``None`` if the model isn't listed.
106+
"""
107+
bare = model_id.split(":", 1)[0]
108+
catalog = _fetch_catalog()
109+
for entry in catalog.get("data", []):
110+
if entry.get("id") == bare:
111+
return _parse_entry(entry)
112+
return None
113+
114+
115+
def fuzzy_suggest(model_id: str, limit: int = 3) -> list[str]:
116+
"""Return the closest model ids from the catalog."""
117+
bare = model_id.split(":", 1)[0]
118+
catalog = _fetch_catalog()
119+
ids = [e.get("id", "") for e in catalog.get("data", []) if e.get("id")]
120+
return get_close_matches(bare, ids, n=limit, cutoff=0.4)
121+
122+
123+
def prewarm() -> None:
124+
"""Fetch the catalog so subsequent lookups are instant. Safe to call from
125+
a background task — swallows failures."""
126+
try:
127+
_fetch_catalog(force=False)
128+
except Exception:
129+
pass

agent/core/llm_params.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""LiteLLM kwargs resolution for the model ids this agent accepts."""
2+
3+
import os
4+
5+
_HF_ALLOWED_EFFORTS = {"low", "medium", "high"}
6+
_LM_STUDIO_DEFAULT_BASE = "http://127.0.0.1:1234/v1"
7+
_DIRECT_PREFIXES = (
8+
"anthropic/",
9+
"openai/",
10+
"openrouter/",
11+
"lm_studio/",
12+
"opencode/",
13+
"opencode-go/",
14+
)
15+
16+
17+
def _looks_like_hf_router_model(model_name: str) -> bool:
18+
bare = model_name.removeprefix("huggingface/").split(":", 1)[0]
19+
parts = bare.split("/")
20+
return len(parts) >= 2 and all(parts)
21+
22+
23+
def _custom_openai_compat_params(
24+
model_name: str,
25+
*,
26+
prefix: str,
27+
api_base: str,
28+
api_key_env: str,
29+
reasoning_effort: str | None = None,
30+
) -> dict:
31+
actual_model = model_name[len(prefix) :]
32+
params = {
33+
"model": f"openai/{actual_model}",
34+
"api_base": api_base,
35+
"api_key": os.environ.get(api_key_env, ""),
36+
}
37+
if reasoning_effort:
38+
params["extra_body"] = {"reasoning_effort": reasoning_effort}
39+
return params
40+
41+
42+
def _resolve_llm_params(
43+
model_name: str,
44+
session_hf_token: str | None = None,
45+
reasoning_effort: str | None = None,
46+
) -> dict:
47+
"""Build LiteLLM kwargs for supported direct, local, and routed models."""
48+
if model_name.startswith(("anthropic/", "openai/")):
49+
params: dict = {"model": model_name}
50+
if reasoning_effort:
51+
params["reasoning_effort"] = reasoning_effort
52+
return params
53+
54+
if model_name.startswith("lm_studio/"):
55+
return {
56+
"model": model_name,
57+
"api_base": os.environ.get(
58+
"LMSTUDIO_BASE_URL", _LM_STUDIO_DEFAULT_BASE
59+
).rstrip("/"),
60+
"api_key": os.environ.get("LMSTUDIO_API_KEY", "") or "lm-studio",
61+
}
62+
63+
if model_name.startswith("openrouter/"):
64+
return _custom_openai_compat_params(
65+
model_name,
66+
prefix="openrouter/",
67+
api_base="https://openrouter.ai/api/v1",
68+
api_key_env="OPENROUTER_API_KEY",
69+
reasoning_effort=reasoning_effort,
70+
)
71+
72+
if model_name.startswith("opencode/"):
73+
return _custom_openai_compat_params(
74+
model_name,
75+
prefix="opencode/",
76+
api_base="https://opencode.ai/zen/v1",
77+
api_key_env="OPENCODE_ZEN_API_KEY",
78+
reasoning_effort=reasoning_effort,
79+
)
80+
81+
if model_name.startswith("opencode-go/"):
82+
return _custom_openai_compat_params(
83+
model_name,
84+
prefix="opencode-go/",
85+
api_base="https://opencode.ai/zen/go/v1",
86+
api_key_env="OPENCODE_GO_API_KEY",
87+
reasoning_effort=reasoning_effort,
88+
)
89+
90+
if model_name.startswith(_DIRECT_PREFIXES):
91+
raise ValueError(f"Unrecognized model prefix: {model_name}")
92+
93+
if not _looks_like_hf_router_model(model_name):
94+
raise ValueError(f"Unrecognized model id: {model_name}")
95+
96+
hf_model = model_name.removeprefix("huggingface/")
97+
api_key = (
98+
os.environ.get("INFERENCE_TOKEN")
99+
or session_hf_token
100+
or os.environ.get("HF_TOKEN")
101+
)
102+
if not api_key:
103+
raise ValueError(
104+
"Missing Hugging Face token. Set INFERENCE_TOKEN or HF_TOKEN, or sign in so the session carries your HF token."
105+
)
106+
params = {
107+
"model": f"openai/{hf_model}",
108+
"api_base": "https://router.huggingface.co/v1",
109+
"api_key": api_key,
110+
}
111+
if os.environ.get("INFERENCE_TOKEN"):
112+
params["extra_headers"] = {"X-HF-Bill-To": "huggingface"}
113+
if reasoning_effort:
114+
hf_level = "low" if reasoning_effort == "minimal" else reasoning_effort
115+
if hf_level in _HF_ALLOWED_EFFORTS:
116+
params["extra_body"] = {"reasoning_effort": hf_level}
117+
return params

0 commit comments

Comments
 (0)