@@ -37,7 +37,6 @@ pub struct ReplConfig {
3737 pub model : String ,
3838 pub max_tokens : u32 ,
3939 pub reasoning_effort : crate :: api:: ReasoningEffort ,
40- pub thinking_budget : u32 ,
4140 pub safe_mode : bool ,
4241}
4342
@@ -46,14 +45,12 @@ impl ReplConfig {
4645 model : String ,
4746 max_tokens : u32 ,
4847 reasoning_effort : crate :: api:: ReasoningEffort ,
49- thinking_budget : u32 ,
5048 safe_mode : bool ,
5149 ) -> Self {
5250 Self {
5351 model,
5452 max_tokens,
5553 reasoning_effort,
56- thinking_budget,
5754 safe_mode,
5855 }
5956 }
@@ -139,11 +136,10 @@ impl Repl {
139136 }
140137
141138 // Validate that `max_tokens` leaves room for the largest legacy
142- // thinking budget we might send. The actual budget is now picked
143- // per-effort in `request_builder` (Low=1024, Medium=5120,
144- // High=16384) rather than read from the user's `--thinking-budget`
145- // flag, so the invariant we need is `max_tokens > HIGH`. We check
146- // unconditionally on enabled-thinking sessions instead of also
139+ // thinking budget we might send. The budget is picked per-effort
140+ // in `request_builder` (Low=1024, Medium=5120, High=16384), so
141+ // the invariant we need is `max_tokens > HIGH`. We check
142+ // unconditionally on enabled-thinking sessions rather than
147143 // probing the model id, because the model can be swapped mid-
148144 // session via `/model` and we don't want a runtime 400.
149145 if config. reasoning_effort . is_enabled ( )
@@ -171,12 +167,8 @@ impl Repl {
171167
172168 let session_id = HistoryManager :: generate_session_id ( ) ;
173169 let session_state = SessionState :: new ( session_id, conversation) ;
174- let model_config = ModelConfig :: new (
175- config. model ,
176- config. max_tokens ,
177- config. reasoning_effort ,
178- config. thinking_budget ,
179- ) ;
170+ let model_config =
171+ ModelConfig :: new ( config. model , config. max_tokens , config. reasoning_effort ) ;
180172
181173 let ui = UI :: new ( ) ;
182174
@@ -250,11 +242,10 @@ impl Repl {
250242 format ! ( "effort: {}" , crate :: api:: anthropic:: effort_label( effort) )
251243 } else if matches ! ( self . client, Anthropic ( _) ) {
252244 if effort. is_enabled ( ) {
253- // The legacy non-adaptive shape's `budget_tokens` is
254- // picked from the effort tier in `request_builder`, not
255- // from the (inert) `--thinking-budget` flag. Display the
256- // value we actually send so the status line reflects
257- // reality.
245+ // The legacy non-adaptive shape's `budget_tokens` comes
246+ // from the effort tier (mapping in `request_builder`).
247+ // Show the value we actually send so the status line
248+ // matches reality.
258249 let budget = crate :: api:: anthropic:: legacy_thinking_budget ( effort) ;
259250 format ! ( "thinking: {} tok" , budget)
260251 } else {
@@ -615,7 +606,6 @@ impl Repl {
615606 self . model_config . model . clone ( ) ,
616607 self . model_config . max_tokens ,
617608 self . model_config . reasoning_effort ,
618- self . model_config . thinking_budget ,
619609 self . available_tools . clone ( ) ,
620610 use_streaming,
621611 Arc :: clone ( & self . interrupt_flag ) ,
@@ -686,7 +676,6 @@ impl Repl {
686676 & self . session_state . conversation ,
687677 self . get_available_tools ( ) ,
688678 self . model_config . reasoning_effort ,
689- self . model_config . thinking_budget ,
690679 & self . session_state . session_id ,
691680 )
692681 . build ( )
@@ -801,10 +790,8 @@ impl Repl {
801790 ) ;
802791 } else if matches ! ( self . client, Anthropic ( _) ) {
803792 if effort. is_enabled ( ) {
804- // Display the per-effort tier budget actually sent
805- // (`request_builder` no longer reads the inert
806- // `--thinking-budget` flag) so the `/think` output
807- // matches what hits the API.
793+ // Show the per-effort tier budget so the `/think`
794+ // output matches what hits the API.
808795 let budget = crate :: api:: anthropic:: legacy_thinking_budget ( effort) ;
809796 println ! (
810797 "\n {} (budget: {} tokens)\n " ,
0 commit comments