deprecate --thinking-budget and remove the unused field that plumbed it through the call chain

alexylon · alexylon · commit b90477f4296c · 2026-05-07T00:14:59.000+03:00
diff --git a/src/cli.rs b/src/cli.rs
@@ -1,6 +1,13 @@
 use crate::error::SofosError;
 use clap::Parser;
 
+/// Default for the deprecated `--thinking-budget` flag. Kept as a named
+/// const so `main.rs` can warn when the user supplies a value that
+/// differs from this — anything else means the user expected the flag
+/// to do something it no longer does. Removable when the flag itself
+/// goes.
+pub const THINKING_BUDGET_DEFAULT: u32 = 5120;
+
 #[derive(Parser, Debug)]
 #[command(
     name = "sofos",
@@ -55,12 +62,14 @@ pub struct Cli {
     #[arg(short = 'e', long, default_value = "medium")]
     pub reasoning_effort: crate::api::ReasoningEffort,
 
-    /// Vestigial. Currently inert on every path: legacy Anthropic uses
-    /// a fixed per-tier budget (Low=1024, Medium=5120, High=16384),
-    /// Anthropic adaptive (Opus 4.7+) uses `output_config.effort`, and
-    /// OpenAI uses `reasoning.effort`. Kept for backwards-compatibility;
-    /// will be removed in a later release.
-    #[arg(long, default_value = "5120")]
+    /// Deprecated. The flag has no effect on any path: legacy Anthropic
+    /// uses a fixed per-tier budget (Low=1024, Medium=5120, High=16384),
+    /// adaptive Anthropic (Opus 4.7+) uses `output_config.effort`, and
+    /// OpenAI uses `reasoning.effort`. The flag still parses so older
+    /// scripts don't break; `main.rs` warns at startup when a non-default
+    /// value is supplied. Hidden from `--help`. Will be removed in a
+    /// future release. Use `--reasoning-effort` to control thinking depth.
+    #[arg(long, default_value_t = THINKING_BUDGET_DEFAULT, hide = true)]
     pub thinking_budget: u32,
 
     #[arg(short, long)]
diff --git a/src/config.rs b/src/config.rs
@@ -53,24 +53,18 @@ pub struct ModelConfig {
     pub model: String,
     pub max_tokens: u32,
     pub reasoning_effort: crate::api::ReasoningEffort,
-    /// Token budget for non-adaptive Anthropic extended thinking. Ignored
-    /// on OpenAI and on Anthropic adaptive (Opus 4.7+) where the server
-    /// picks the budget from `output_config.effort`.
-    pub thinking_budget: u32,
 }
 
 impl ModelConfig {
     pub fn new(
         model: String,
         max_tokens: u32,
         reasoning_effort: crate::api::ReasoningEffort,
-        thinking_budget: u32,
     ) -> Self {
         Self {
             model,
             max_tokens,
             reasoning_effort,
-            thinking_budget,
         }
     }
 
diff --git a/src/main.rs b/src/main.rs
@@ -32,6 +32,13 @@ fn main() -> Result<()> {
 
     let cli = Cli::parse();
 
+    if cli.thinking_budget != cli::THINKING_BUDGET_DEFAULT {
+        tracing::warn!(
+            "--thinking-budget is deprecated and has no effect on any provider path. \
+             Use --reasoning-effort to control thinking depth. The flag will be removed in a future release."
+        );
+    }
+
     // Historically the logo printed here, up front. It's now deferred:
     // in interactive mode the banner text is collected into
     // `startup_banner` below and replayed through the TUI's capture
@@ -85,9 +92,7 @@ fn main() -> Result<()> {
             crate::api::anthropic::effort_label(cli.reasoning_effort)
         ));
     } else if cli.reasoning_effort.is_enabled() {
-        // Display the per-effort tier budget actually sent
-        // (`request_builder` no longer reads the inert
-        // `--thinking-budget` flag) so the startup banner matches
+        // Show the per-effort tier budget so the startup banner matches
         // what hits the API.
         let budget = crate::api::anthropic::legacy_thinking_budget(cli.reasoning_effort);
         startup_banner.push_str(&format!(
@@ -121,7 +126,6 @@ fn main() -> Result<()> {
         cli.model,
         cli.max_tokens,
         cli.reasoning_effort,
-        cli.thinking_budget,
         cli.safe_mode,
     );
 
diff --git a/src/repl/mod.rs b/src/repl/mod.rs
@@ -37,7 +37,6 @@ pub struct ReplConfig {
     pub model: String,
     pub max_tokens: u32,
     pub reasoning_effort: crate::api::ReasoningEffort,
-    pub thinking_budget: u32,
     pub safe_mode: bool,
 }
 
@@ -46,14 +45,12 @@ impl ReplConfig {
         model: String,
         max_tokens: u32,
         reasoning_effort: crate::api::ReasoningEffort,
-        thinking_budget: u32,
         safe_mode: bool,
     ) -> Self {
         Self {
             model,
             max_tokens,
             reasoning_effort,
-            thinking_budget,
             safe_mode,
         }
     }
@@ -139,11 +136,10 @@ impl Repl {
         }
 
         // Validate that `max_tokens` leaves room for the largest legacy
-        // thinking budget we might send. The actual budget is now picked
-        // per-effort in `request_builder` (Low=1024, Medium=5120,
-        // High=16384) rather than read from the user's `--thinking-budget`
-        // flag, so the invariant we need is `max_tokens > HIGH`. We check
-        // unconditionally on enabled-thinking sessions instead of also
+        // thinking budget we might send. The budget is picked per-effort
+        // in `request_builder` (Low=1024, Medium=5120, High=16384), so
+        // the invariant we need is `max_tokens > HIGH`. We check
+        // unconditionally on enabled-thinking sessions rather than
         // probing the model id, because the model can be swapped mid-
         // session via `/model` and we don't want a runtime 400.
         if config.reasoning_effort.is_enabled()
@@ -171,12 +167,8 @@ impl Repl {
 
         let session_id = HistoryManager::generate_session_id();
         let session_state = SessionState::new(session_id, conversation);
-        let model_config = ModelConfig::new(
-            config.model,
-            config.max_tokens,
-            config.reasoning_effort,
-            config.thinking_budget,
-        );
+        let model_config =
+            ModelConfig::new(config.model, config.max_tokens, config.reasoning_effort);
 
         let ui = UI::new();
 
@@ -250,11 +242,10 @@ impl Repl {
             format!("effort: {}", crate::api::anthropic::effort_label(effort))
         } else if matches!(self.client, Anthropic(_)) {
             if effort.is_enabled() {
-                // The legacy non-adaptive shape's `budget_tokens` is
-                // picked from the effort tier in `request_builder`, not
-                // from the (inert) `--thinking-budget` flag. Display the
-                // value we actually send so the status line reflects
-                // reality.
+                // The legacy non-adaptive shape's `budget_tokens` comes
+                // from the effort tier (mapping in `request_builder`).
+                // Show the value we actually send so the status line
+                // matches reality.
                 let budget = crate::api::anthropic::legacy_thinking_budget(effort);
                 format!("thinking: {} tok", budget)
             } else {
@@ -615,7 +606,6 @@ impl Repl {
             self.model_config.model.clone(),
             self.model_config.max_tokens,
             self.model_config.reasoning_effort,
-            self.model_config.thinking_budget,
             self.available_tools.clone(),
             use_streaming,
             Arc::clone(&self.interrupt_flag),
@@ -686,7 +676,6 @@ impl Repl {
             &self.session_state.conversation,
             self.get_available_tools(),
             self.model_config.reasoning_effort,
-            self.model_config.thinking_budget,
             &self.session_state.session_id,
         )
         .build()
@@ -801,10 +790,8 @@ impl Repl {
             );
         } else if matches!(self.client, Anthropic(_)) {
             if effort.is_enabled() {
-                // Display the per-effort tier budget actually sent
-                // (`request_builder` no longer reads the inert
-                // `--thinking-budget` flag) so the `/think` output
-                // matches what hits the API.
+                // Show the per-effort tier budget so the `/think`
+                // output matches what hits the API.
                 let budget = crate::api::anthropic::legacy_thinking_budget(effort);
                 println!(
                     "\n{} (budget: {} tokens)\n",
diff --git a/src/repl/request_builder.rs b/src/repl/request_builder.rs
@@ -9,29 +9,19 @@ pub struct RequestBuilder<'a> {
     conversation: &'a ConversationHistory,
     tools: Vec<Tool>,
     reasoning_effort: ReasoningEffort,
-    /// CLI-plumbed budget hint from `--thinking-budget`. No longer read
-    /// on Anthropic — the effort tier now maps to a fixed per-level
-    /// budget in `build()` so `/think low|medium|high` produce visibly
-    /// different outputs. Kept on the struct to avoid churning every
-    /// caller's signature; remove together with the `--thinking-budget`
-    /// CLI flag if the surface is ever pruned.
-    #[allow(dead_code)]
-    thinking_budget: u32,
     /// Stable per-session identifier sent as `prompt_cache_key` on the
     /// OpenAI Responses path. Anthropic ignores it.
     session_id: &'a str,
 }
 
 impl<'a> RequestBuilder<'a> {
-    #[allow(clippy::too_many_arguments)]
     pub fn new(
         client: &'a LlmClient,
         model: &'a str,
         max_tokens: u32,
         conversation: &'a ConversationHistory,
         tools: Vec<Tool>,
         reasoning_effort: ReasoningEffort,
-        thinking_budget: u32,
         session_id: &'a str,
     ) -> Self {
         Self {
@@ -41,7 +31,6 @@ impl<'a> RequestBuilder<'a> {
             conversation,
             tools,
             reasoning_effort,
-            thinking_budget,
             session_id,
         }
     }
@@ -273,7 +262,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "session-abc",
         )
         .build();
@@ -296,7 +284,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "s1",
         )
         .build();
@@ -318,7 +305,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "s1",
         )
         .build();
@@ -471,7 +457,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "s1",
         )
         .build();
@@ -511,7 +496,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "s1",
         )
         .build();
@@ -545,7 +529,6 @@ mod tests {
                 &conv,
                 one_regular_tool(),
                 effort,
-                0,
                 "s1",
             )
             .build();
@@ -573,7 +556,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "s1",
         )
         .build();
@@ -589,7 +571,6 @@ mod tests {
             &conv,
             one_regular_tool(),
             ReasoningEffort::Off,
-            0,
             "s1",
         )
         .build();
diff --git a/src/repl/response_handler.rs b/src/repl/response_handler.rs
@@ -22,7 +22,6 @@ pub struct ResponseHandler {
     model: String,
     max_tokens: u32,
     reasoning_effort: crate::api::ReasoningEffort,
-    thinking_budget: u32,
     config: SofosConfig,
     available_tools: Vec<crate::api::Tool>,
     use_streaming: bool,
@@ -40,7 +39,6 @@ impl ResponseHandler {
         model: String,
         max_tokens: u32,
         reasoning_effort: crate::api::ReasoningEffort,
-        thinking_budget: u32,
         available_tools: Vec<crate::api::Tool>,
         use_streaming: bool,
         interrupt_flag: Arc<AtomicBool>,
@@ -55,7 +53,6 @@ impl ResponseHandler {
             model,
             max_tokens,
             reasoning_effort,
-            thinking_budget,
             config: SofosConfig::default(),
             available_tools,
             use_streaming,
@@ -713,7 +710,6 @@ impl ResponseHandler {
             &self.conversation,
             self.get_available_tools(),
             self.reasoning_effort,
-            self.thinking_budget,
             &self.session_id,
         )
         .build()