diff --git a/.changeset/twelve-lemons-tan.md b/.changeset/twelve-lemons-tan.md new file mode 100644 index 00000000..a0e3bd98 --- /dev/null +++ b/.changeset/twelve-lemons-tan.md @@ -0,0 +1,5 @@ +--- +"apollo": minor +--- + +upgrade to opus in planner and job chat diff --git a/services/global_chat/config.yaml b/services/global_chat/config.yaml index a8714f58..d2f26a51 100644 --- a/services/global_chat/config.yaml +++ b/services/global_chat/config.yaml @@ -8,7 +8,6 @@ router: # Planner configuration (complex orchestration) planner: - model: "claude-sonnet" - max_tokens: 8192 - temperature: 1.0 + model: "claude-opus" + max_tokens: 24576 max_tool_calls: 10 diff --git a/services/global_chat/planner.py b/services/global_chat/planner.py index 7eb3bd9b..ee4fe80f 100644 --- a/services/global_chat/planner.py +++ b/services/global_chat/planner.py @@ -6,6 +6,7 @@ from typing import List, Dict, Optional from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass +import httpx from anthropic import Anthropic import sentry_sdk @@ -59,9 +60,8 @@ def __init__(self, config_loader: ConfigLoader, api_key: Optional[str] = None): self.tools = TOOL_DEFINITIONS planner_config = config_loader.config.get("planner", {}) - self.model = resolve_model(planner_config.get("model", "claude-sonnet")) - self.max_tokens = planner_config.get("max_tokens", 8192) - self.temperature = planner_config.get("temperature", 1.0) + self.model = resolve_model(planner_config.get("model", "claude-opus")) + self.max_tokens = planner_config.get("max_tokens", 24576) self.max_tool_calls = planner_config.get("max_tool_calls", 20) self.current_yaml: Optional[str] = None @@ -285,6 +285,7 @@ def _call_api(self, system_prompt, messages, stream): messages=messages, tools=self.tools, thinking={"type": "adaptive"}, + output_config={"effort": "medium"}, ) as stream_obj: for event in stream_obj: if event.type == "content_block_delta": @@ -299,7 +300,11 @@ def _call_api(self, system_prompt, messages, stream): messages=messages, tools=self.tools, thinking={"type": "adaptive"}, - output_config={"effort": "high"}, + output_config={"effort": "medium"}, + # Per-request timeout (same values as the SDK default): + # required for non-streaming calls with max_tokens > ~21k, + # which the SDK otherwise rejects. + timeout=httpx.Timeout(600.0, connect=5.0), betas=["context-management-2025-06-27"], context_management={ "edits": [ diff --git a/services/job_chat/job_chat.py b/services/job_chat/job_chat.py index 31c11440..112a49c8 100644 --- a/services/job_chat/job_chat.py +++ b/services/job_chat/job_chat.py @@ -4,6 +4,7 @@ import yaml from typing import List, Optional, Dict, Any from dataclasses import dataclass +import httpx from anthropic import ( Anthropic, APIConnectionError, @@ -138,7 +139,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "Payload": @dataclass class ChatConfig: model: str = _MODEL - max_tokens: int = 16384 + max_tokens: int = 24576 api_key: Optional[str] = None @@ -288,6 +289,10 @@ def generate( max_tokens=self.config.max_tokens, messages=prompt, model=self.config.model, system=system_message, thinking={"type": "adaptive"}, output_config=output_config, + # Per-request timeout (same values as the SDK default): + # required for non-streaming calls with max_tokens > ~21k, + # which the SDK otherwise rejects. + timeout=httpx.Timeout(600.0, connect=5.0), **tool_kwargs ) message = self.client.messages.create(**create_kwargs) diff --git a/services/job_chat/rag.yaml b/services/job_chat/rag.yaml index 9a6b6c39..13caa0de 100644 --- a/services/job_chat/rag.yaml +++ b/services/job_chat/rag.yaml @@ -1,5 +1,5 @@ config_version: 1.0 -model: "claude-sonnet" +model: "claude-opus" llm_search_decision: "claude-sonnet" llm_retrieval: "claude-sonnet" threshold: 0.8