Skip to content

Commit b90477f

Browse files
committed
deprecate --thinking-budget and remove the unused field that plumbed it through the call chain
1 parent 991da67 commit b90477f

6 files changed

Lines changed: 35 additions & 64 deletions

File tree

src/cli.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
use crate::error::SofosError;
22
use clap::Parser;
33

4+
/// Default for the deprecated `--thinking-budget` flag. Kept as a named
5+
/// const so `main.rs` can warn when the user supplies a value that
6+
/// differs from this — anything else means the user expected the flag
7+
/// to do something it no longer does. Removable when the flag itself
8+
/// goes.
9+
pub const THINKING_BUDGET_DEFAULT: u32 = 5120;
10+
411
#[derive(Parser, Debug)]
512
#[command(
613
name = "sofos",
@@ -55,12 +62,14 @@ pub struct Cli {
5562
#[arg(short = 'e', long, default_value = "medium")]
5663
pub reasoning_effort: crate::api::ReasoningEffort,
5764

58-
/// Vestigial. Currently inert on every path: legacy Anthropic uses
59-
/// a fixed per-tier budget (Low=1024, Medium=5120, High=16384),
60-
/// Anthropic adaptive (Opus 4.7+) uses `output_config.effort`, and
61-
/// OpenAI uses `reasoning.effort`. Kept for backwards-compatibility;
62-
/// will be removed in a later release.
63-
#[arg(long, default_value = "5120")]
65+
/// Deprecated. The flag has no effect on any path: legacy Anthropic
66+
/// uses a fixed per-tier budget (Low=1024, Medium=5120, High=16384),
67+
/// adaptive Anthropic (Opus 4.7+) uses `output_config.effort`, and
68+
/// OpenAI uses `reasoning.effort`. The flag still parses so older
69+
/// scripts don't break; `main.rs` warns at startup when a non-default
70+
/// value is supplied. Hidden from `--help`. Will be removed in a
71+
/// future release. Use `--reasoning-effort` to control thinking depth.
72+
#[arg(long, default_value_t = THINKING_BUDGET_DEFAULT, hide = true)]
6473
pub thinking_budget: u32,
6574

6675
#[arg(short, long)]

src/config.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,24 +53,18 @@ pub struct ModelConfig {
5353
pub model: String,
5454
pub max_tokens: u32,
5555
pub reasoning_effort: crate::api::ReasoningEffort,
56-
/// Token budget for non-adaptive Anthropic extended thinking. Ignored
57-
/// on OpenAI and on Anthropic adaptive (Opus 4.7+) where the server
58-
/// picks the budget from `output_config.effort`.
59-
pub thinking_budget: u32,
6056
}
6157

6258
impl ModelConfig {
6359
pub fn new(
6460
model: String,
6561
max_tokens: u32,
6662
reasoning_effort: crate::api::ReasoningEffort,
67-
thinking_budget: u32,
6863
) -> Self {
6964
Self {
7065
model,
7166
max_tokens,
7267
reasoning_effort,
73-
thinking_budget,
7468
}
7569
}
7670

src/main.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ fn main() -> Result<()> {
3232

3333
let cli = Cli::parse();
3434

35+
if cli.thinking_budget != cli::THINKING_BUDGET_DEFAULT {
36+
tracing::warn!(
37+
"--thinking-budget is deprecated and has no effect on any provider path. \
38+
Use --reasoning-effort to control thinking depth. The flag will be removed in a future release."
39+
);
40+
}
41+
3542
// Historically the logo printed here, up front. It's now deferred:
3643
// in interactive mode the banner text is collected into
3744
// `startup_banner` below and replayed through the TUI's capture
@@ -85,9 +92,7 @@ fn main() -> Result<()> {
8592
crate::api::anthropic::effort_label(cli.reasoning_effort)
8693
));
8794
} else if cli.reasoning_effort.is_enabled() {
88-
// Display the per-effort tier budget actually sent
89-
// (`request_builder` no longer reads the inert
90-
// `--thinking-budget` flag) so the startup banner matches
95+
// Show the per-effort tier budget so the startup banner matches
9196
// what hits the API.
9297
let budget = crate::api::anthropic::legacy_thinking_budget(cli.reasoning_effort);
9398
startup_banner.push_str(&format!(
@@ -121,7 +126,6 @@ fn main() -> Result<()> {
121126
cli.model,
122127
cli.max_tokens,
123128
cli.reasoning_effort,
124-
cli.thinking_budget,
125129
cli.safe_mode,
126130
);
127131

src/repl/mod.rs

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ pub struct ReplConfig {
3737
pub model: String,
3838
pub max_tokens: u32,
3939
pub reasoning_effort: crate::api::ReasoningEffort,
40-
pub thinking_budget: u32,
4140
pub safe_mode: bool,
4241
}
4342

@@ -46,14 +45,12 @@ impl ReplConfig {
4645
model: String,
4746
max_tokens: u32,
4847
reasoning_effort: crate::api::ReasoningEffort,
49-
thinking_budget: u32,
5048
safe_mode: bool,
5149
) -> Self {
5250
Self {
5351
model,
5452
max_tokens,
5553
reasoning_effort,
56-
thinking_budget,
5754
safe_mode,
5855
}
5956
}
@@ -139,11 +136,10 @@ impl Repl {
139136
}
140137

141138
// Validate that `max_tokens` leaves room for the largest legacy
142-
// thinking budget we might send. The actual budget is now picked
143-
// per-effort in `request_builder` (Low=1024, Medium=5120,
144-
// High=16384) rather than read from the user's `--thinking-budget`
145-
// flag, so the invariant we need is `max_tokens > HIGH`. We check
146-
// unconditionally on enabled-thinking sessions instead of also
139+
// thinking budget we might send. The budget is picked per-effort
140+
// in `request_builder` (Low=1024, Medium=5120, High=16384), so
141+
// the invariant we need is `max_tokens > HIGH`. We check
142+
// unconditionally on enabled-thinking sessions rather than
147143
// probing the model id, because the model can be swapped mid-
148144
// session via `/model` and we don't want a runtime 400.
149145
if config.reasoning_effort.is_enabled()
@@ -171,12 +167,8 @@ impl Repl {
171167

172168
let session_id = HistoryManager::generate_session_id();
173169
let session_state = SessionState::new(session_id, conversation);
174-
let model_config = ModelConfig::new(
175-
config.model,
176-
config.max_tokens,
177-
config.reasoning_effort,
178-
config.thinking_budget,
179-
);
170+
let model_config =
171+
ModelConfig::new(config.model, config.max_tokens, config.reasoning_effort);
180172

181173
let ui = UI::new();
182174

@@ -250,11 +242,10 @@ impl Repl {
250242
format!("effort: {}", crate::api::anthropic::effort_label(effort))
251243
} else if matches!(self.client, Anthropic(_)) {
252244
if effort.is_enabled() {
253-
// The legacy non-adaptive shape's `budget_tokens` is
254-
// picked from the effort tier in `request_builder`, not
255-
// from the (inert) `--thinking-budget` flag. Display the
256-
// value we actually send so the status line reflects
257-
// reality.
245+
// The legacy non-adaptive shape's `budget_tokens` comes
246+
// from the effort tier (mapping in `request_builder`).
247+
// Show the value we actually send so the status line
248+
// matches reality.
258249
let budget = crate::api::anthropic::legacy_thinking_budget(effort);
259250
format!("thinking: {} tok", budget)
260251
} else {
@@ -615,7 +606,6 @@ impl Repl {
615606
self.model_config.model.clone(),
616607
self.model_config.max_tokens,
617608
self.model_config.reasoning_effort,
618-
self.model_config.thinking_budget,
619609
self.available_tools.clone(),
620610
use_streaming,
621611
Arc::clone(&self.interrupt_flag),
@@ -686,7 +676,6 @@ impl Repl {
686676
&self.session_state.conversation,
687677
self.get_available_tools(),
688678
self.model_config.reasoning_effort,
689-
self.model_config.thinking_budget,
690679
&self.session_state.session_id,
691680
)
692681
.build()
@@ -801,10 +790,8 @@ impl Repl {
801790
);
802791
} else if matches!(self.client, Anthropic(_)) {
803792
if effort.is_enabled() {
804-
// Display the per-effort tier budget actually sent
805-
// (`request_builder` no longer reads the inert
806-
// `--thinking-budget` flag) so the `/think` output
807-
// matches what hits the API.
793+
// Show the per-effort tier budget so the `/think`
794+
// output matches what hits the API.
808795
let budget = crate::api::anthropic::legacy_thinking_budget(effort);
809796
println!(
810797
"\n{} (budget: {} tokens)\n",

src/repl/request_builder.rs

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,29 +9,19 @@ pub struct RequestBuilder<'a> {
99
conversation: &'a ConversationHistory,
1010
tools: Vec<Tool>,
1111
reasoning_effort: ReasoningEffort,
12-
/// CLI-plumbed budget hint from `--thinking-budget`. No longer read
13-
/// on Anthropic — the effort tier now maps to a fixed per-level
14-
/// budget in `build()` so `/think low|medium|high` produce visibly
15-
/// different outputs. Kept on the struct to avoid churning every
16-
/// caller's signature; remove together with the `--thinking-budget`
17-
/// CLI flag if the surface is ever pruned.
18-
#[allow(dead_code)]
19-
thinking_budget: u32,
2012
/// Stable per-session identifier sent as `prompt_cache_key` on the
2113
/// OpenAI Responses path. Anthropic ignores it.
2214
session_id: &'a str,
2315
}
2416

2517
impl<'a> RequestBuilder<'a> {
26-
#[allow(clippy::too_many_arguments)]
2718
pub fn new(
2819
client: &'a LlmClient,
2920
model: &'a str,
3021
max_tokens: u32,
3122
conversation: &'a ConversationHistory,
3223
tools: Vec<Tool>,
3324
reasoning_effort: ReasoningEffort,
34-
thinking_budget: u32,
3525
session_id: &'a str,
3626
) -> Self {
3727
Self {
@@ -41,7 +31,6 @@ impl<'a> RequestBuilder<'a> {
4131
conversation,
4232
tools,
4333
reasoning_effort,
44-
thinking_budget,
4534
session_id,
4635
}
4736
}
@@ -273,7 +262,6 @@ mod tests {
273262
&conv,
274263
one_regular_tool(),
275264
ReasoningEffort::Off,
276-
0,
277265
"session-abc",
278266
)
279267
.build();
@@ -296,7 +284,6 @@ mod tests {
296284
&conv,
297285
one_regular_tool(),
298286
ReasoningEffort::Off,
299-
0,
300287
"s1",
301288
)
302289
.build();
@@ -318,7 +305,6 @@ mod tests {
318305
&conv,
319306
one_regular_tool(),
320307
ReasoningEffort::Off,
321-
0,
322308
"s1",
323309
)
324310
.build();
@@ -471,7 +457,6 @@ mod tests {
471457
&conv,
472458
one_regular_tool(),
473459
ReasoningEffort::Off,
474-
0,
475460
"s1",
476461
)
477462
.build();
@@ -511,7 +496,6 @@ mod tests {
511496
&conv,
512497
one_regular_tool(),
513498
ReasoningEffort::Off,
514-
0,
515499
"s1",
516500
)
517501
.build();
@@ -545,7 +529,6 @@ mod tests {
545529
&conv,
546530
one_regular_tool(),
547531
effort,
548-
0,
549532
"s1",
550533
)
551534
.build();
@@ -573,7 +556,6 @@ mod tests {
573556
&conv,
574557
one_regular_tool(),
575558
ReasoningEffort::Off,
576-
0,
577559
"s1",
578560
)
579561
.build();
@@ -589,7 +571,6 @@ mod tests {
589571
&conv,
590572
one_regular_tool(),
591573
ReasoningEffort::Off,
592-
0,
593574
"s1",
594575
)
595576
.build();

src/repl/response_handler.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ pub struct ResponseHandler {
2222
model: String,
2323
max_tokens: u32,
2424
reasoning_effort: crate::api::ReasoningEffort,
25-
thinking_budget: u32,
2625
config: SofosConfig,
2726
available_tools: Vec<crate::api::Tool>,
2827
use_streaming: bool,
@@ -40,7 +39,6 @@ impl ResponseHandler {
4039
model: String,
4140
max_tokens: u32,
4241
reasoning_effort: crate::api::ReasoningEffort,
43-
thinking_budget: u32,
4442
available_tools: Vec<crate::api::Tool>,
4543
use_streaming: bool,
4644
interrupt_flag: Arc<AtomicBool>,
@@ -55,7 +53,6 @@ impl ResponseHandler {
5553
model,
5654
max_tokens,
5755
reasoning_effort,
58-
thinking_budget,
5956
config: SofosConfig::default(),
6057
available_tools,
6158
use_streaming,
@@ -713,7 +710,6 @@ impl ResponseHandler {
713710
&self.conversation,
714711
self.get_available_tools(),
715712
self.reasoning_effort,
716-
self.thinking_budget,
717713
&self.session_id,
718714
)
719715
.build()

0 commit comments

Comments
 (0)