Support Claude Opus 4.7+ adaptive thinking (#119)

fwilhe · web-flow · commit ee1509842602 · 2026-05-29T15:23:13.000+02:00
Anthropic rejects manual extended thinking on Claude Opus 4.7 and 4.8
with HTTP 400. These models require adaptive thinking, where depth is
controlled via a top-level `output_config.effort` parameter rather
than the legacy `thinking.budget_tokens` field.

The previous auto-detection had only a single thinking mode (manual)
and matched any `claude-opus-4*` ID, so any request to Opus 4.7+
failed before reaching the model.

Replace the boolean `supports_thinking` switch with a `ThinkingMode`
enum that distinguishes three cases:

  - Adaptive: emits `thinking: {type: "adaptive"}` together with
    `output_config: {effort: "high"}`. Selected for `claude-opus-4-7`,
    `claude-opus-4-8`, and the `claude-opus-latest` alias.
  - Manual:   keeps the existing `{type: "enabled", budget_tokens: N}`
    payload. Selected for Sonnet 4.x, Claude 3.7 Sonnet, and Opus 4
    through 4.6.
  - None:     unchanged for non-thinking models.

Users can still override the default effort level (or any other
field) per model via the `config` block in `models.json`, which is
shallow-merged into the request.

Add a unit test covering all three branches of the new detection,
extend `models.example.json` with an Opus 4.7 entry, and document
the new shape and override mechanism in the README with links to
Anthropic's extended-thinking and effort documentation.
diff --git a/README.md b/README.md
@@ -184,6 +184,19 @@ The code-assistant uses two JSON configuration files to manage LLM providers and
 **`~/.config/code-assistant/models.json`** - Define available models:
 ```json
 {
+  "Claude Opus 4.7 (Adaptive Thinking)": {
+    "provider": "anthropic",
+    "id": "claude-opus-4-7",
+    "config": {
+      "max_tokens": 64000,
+      "thinking": {
+        "type": "adaptive"
+      },
+      "output_config": {
+        "effort": "high"
+      }
+    }
+  },
   "Claude Sonnet 4.5 (Thinking)": {
     "provider": "anthropic",
     "id": "claude-sonnet-4-5",
@@ -212,6 +225,16 @@ The code-assistant uses two JSON configuration files to manage LLM providers and
 }
 ```
 
+**Note on Claude Opus 4.7+ extended thinking**: Starting with Claude Opus 4.7, Anthropic
+no longer accepts the manual `thinking: { type: "enabled", budget_tokens: N }` form
+(it returns a 400 error). These models require *adaptive* thinking, where depth is
+controlled via the `output_config.effort` parameter (`low`, `medium`, `high`, `xhigh`,
+`max`). code-assistant detects Opus 4.7+ model IDs (`claude-opus-4-7`,
+`claude-opus-4-8`, `claude-opus-latest`) and emits the correct request shape by default.
+You can override the effort level (or any other field) via the model's `config` block,
+as shown in the example above. See Anthropic's [extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking)
+and [effort](https://docs.anthropic.com/en/docs/build-with-claude/effort) docs for details.
+
 **Environment Variable Substitution**: Use `${VAR_NAME}` in provider configs to reference environment variables for API keys.
 
 **Full Examples**: See [`providers.example.json`](providers.example.json) and [`models.example.json`](models.example.json) for complete configuration examples with all supported providers (Anthropic, OpenAI, Ollama, SAP AI Core, Vertex AI, Groq, Cerebras, MistralAI, OpenRouter).
diff --git a/crates/llm/src/anthropic.rs b/crates/llm/src/anthropic.rs
@@ -565,17 +565,63 @@ pub struct AnthropicClient {
     custom_config: Option<serde_json::Value>,
 }
 
+/// Thinking strategy used for a given Claude model.
+///
+/// See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ThinkingMode {
+    /// Manual extended thinking via `thinking: {type: "enabled", budget_tokens: N}`.
+    /// Supported on Claude Sonnet 4.x, Claude 3.7 Sonnet, and Claude Opus 4.x up to 4.6.
+    Manual,
+    /// Adaptive thinking via `thinking: {type: "adaptive"}` combined with
+    /// `output_config: {effort: "..."}`. Required on Claude Opus 4.7 and later
+    /// (manual thinking returns a 400 error on these models).
+    Adaptive,
+    /// Model does not support extended thinking.
+    None,
+}
+
 impl AnthropicClient {
-    /// Substrings of model IDs that should enable thinking mode and higher limits
-    fn thinking_model_substrings() -> &'static [&'static str] {
+    /// Substrings of model IDs that require adaptive thinking.
+    ///
+    /// Manual extended thinking (`type: "enabled"` with `budget_tokens`) is rejected
+    /// with a 400 error on these models. They must use `type: "adaptive"` together
+    /// with the `output_config.effort` parameter.
+    fn adaptive_thinking_model_substrings() -> &'static [&'static str] {
+        &[
+            "claude-opus-4-7",
+            "claude-opus-4-8",
+            // Anthropic alias that currently points to the latest Opus release,
+            // which uses adaptive thinking. Some proxies expose this alias too.
+            "claude-opus-latest",
+        ]
+    }
+
+    /// Substrings of model IDs that support manual extended thinking.
+    fn manual_thinking_model_substrings() -> &'static [&'static str] {
         &["claude-sonnet-4", "claude-3-7-sonnet", "claude-opus-4"]
     }
 
-    /// Returns true if the current model should have thinking mode enabled
-    fn supports_thinking(&self) -> bool {
-        Self::thinking_model_substrings()
+    /// Returns the thinking mode that should be used for the current model.
+    fn thinking_mode(&self) -> ThinkingMode {
+        if Self::adaptive_thinking_model_substrings()
+            .iter()
+            .any(|substr| self.model.contains(substr))
+        {
+            return ThinkingMode::Adaptive;
+        }
+        if Self::manual_thinking_model_substrings()
             .iter()
             .any(|substr| self.model.contains(substr))
+        {
+            return ThinkingMode::Manual;
+        }
+        ThinkingMode::None
+    }
+
+    /// Returns true if the current model should have thinking mode enabled
+    fn supports_thinking(&self) -> bool {
+        !matches!(self.thinking_mode(), ThinkingMode::None)
     }
 
     pub fn default_base_url() -> String {
@@ -1311,16 +1357,11 @@ impl LLMProvider for AnthropicClient {
         });
 
         // Configure thinking mode and max_tokens based on model
-        let (thinking_config, max_tokens) = if self.supports_thinking() {
-            (
-                Some(ThinkingConfiguration {
-                    thinking_type: "enabled".to_string(),
-                    budget_tokens: 16000,
-                }),
-                64000,
-            )
+        let thinking_mode = self.thinking_mode();
+        let max_tokens = if matches!(thinking_mode, ThinkingMode::None) {
+            8192
         } else {
-            (None, 8192)
+            64000
         };
 
         // Convert messages using the message converter
@@ -1330,20 +1371,34 @@ impl LLMProvider for AnthropicClient {
         let mut anthropic_request = serde_json::json!({
             "model": self.model,
             "max_tokens": max_tokens,
-            "temperature": if thinking_config.is_some() {
+            "temperature": if matches!(thinking_mode, ThinkingMode::None) {
+                0.7
+            } else {
                 // Anthropic requires this to be 1.0 if you enable "thinking"
                 1.0
-            } else {
-                0.7
             },
             "system": system,
             "stream": streaming_callback.is_some(),
             "messages": messages_json,
         });
 
-        if let Some(thinking_config) = thinking_config {
-            anthropic_request["thinking"] = serde_json::to_value(thinking_config)?;
+        match thinking_mode {
+            ThinkingMode::Manual => {
+                anthropic_request["thinking"] = serde_json::to_value(ThinkingConfiguration {
+                    thinking_type: "enabled".to_string(),
+                    budget_tokens: 16000,
+                })?;
+            }
+            ThinkingMode::Adaptive => {
+                // Opus 4.7+ require adaptive thinking; depth is controlled via
+                // `output_config.effort`. Users can override either via the model's
+                // `config` block (shallow-merged below).
+                anthropic_request["thinking"] = serde_json::json!({ "type": "adaptive" });
+                anthropic_request["output_config"] = serde_json::json!({ "effort": "high" });
+            }
+            ThinkingMode::None => {}
         }
+
         if let Some(tool_choice) = tool_choice {
             anthropic_request["tool_choice"] = tool_choice;
         }
@@ -1938,4 +1993,54 @@ mod tests {
             panic!("Expected ToolResult content");
         }
     }
+
+    fn make_client(model: &str) -> AnthropicClient {
+        AnthropicClient::new(
+            "test-key".to_string(),
+            model.to_string(),
+            AnthropicClient::default_base_url(),
+        )
+    }
+
+    #[test]
+    fn test_thinking_mode_detection() {
+        // Adaptive-only models (Opus 4.7+).
+        for id in [
+            "claude-opus-4-7",
+            "claude-opus-4-8",
+            "claude-opus-latest",
+            "vendor-prefix/claude-opus-4-7",
+        ] {
+            assert_eq!(
+                make_client(id).thinking_mode(),
+                ThinkingMode::Adaptive,
+                "expected adaptive for {id}",
+            );
+        }
+
+        // Manual extended thinking models.
+        for id in [
+            "claude-sonnet-4-6",
+            "claude-sonnet-4-5",
+            "claude-3-7-sonnet",
+            "claude-opus-4",
+            "claude-opus-4-5",
+            "claude-opus-4-6",
+        ] {
+            assert_eq!(
+                make_client(id).thinking_mode(),
+                ThinkingMode::Manual,
+                "expected manual for {id}",
+            );
+        }
+
+        // Models that don't support extended thinking.
+        for id in ["claude-3-5-sonnet", "claude-haiku-4-5", "gpt-4o"] {
+            assert_eq!(
+                make_client(id).thinking_mode(),
+                ThinkingMode::None,
+                "expected none for {id}",
+            );
+        }
+    }
 }
diff --git a/models.example.json b/models.example.json
@@ -1,4 +1,19 @@
 {
+  "Claude Opus 4.7 (Adaptive Thinking)": {
+    "provider": "anthropic-main",
+    "id": "claude-opus-4-7",
+    "context_token_limit": 200000,
+    "config": {
+      "max_tokens": 64000,
+      "temperature": 1.0,
+      "thinking": {
+        "type": "adaptive"
+      },
+      "output_config": {
+        "effort": "high"
+      }
+    }
+  },
   "Claude Sonnet 4.6 (Thinking)": {
     "provider": "anthropic-main",
     "id": "claude-sonnet-4-6",