File tree Expand file tree Collapse file tree
docs/source/resources/includes Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -404,8 +404,11 @@ async fn llm_chat_inner(
404404 let max_retries = provider. max_retries . unwrap_or ( 1 ) ;
405405 if provider. retry_on_ratelimit == Some ( true ) && ( attempts - 1 ) < max_retries as usize
406406 {
407- // Exponential backoff: base interval 25ms
408- let backoff_ms = 25 * 2u64 . pow ( attempts as u32 - 1 ) ;
407+ // Configurable exponential backoff
408+ let base_ms = provider. retry_backoff_base_ms . unwrap_or ( 25 ) ;
409+ let max_ms = provider. retry_backoff_max_ms . unwrap_or ( 1000 ) ;
410+ let backoff_ms = ( base_ms * 2u64 . pow ( attempts as u32 - 1 ) ) . min ( max_ms) ;
411+
409412 debug ! (
410413 request_id = %request_id,
411414 "429 received, retrying after {}ms (attempt {})" ,
Original file line number Diff line number Diff line change @@ -331,6 +331,8 @@ pub struct LlmProvider {
331331 pub retry_on_ratelimit : Option < bool > ,
332332 pub max_retries : Option < u32 > ,
333333 pub retry_to_same_provider : Option < bool > ,
334+ pub retry_backoff_base_ms : Option < u64 > ,
335+ pub retry_backoff_max_ms : Option < u64 > ,
334336}
335337
336338pub trait IntoModels {
@@ -378,6 +380,8 @@ impl Default for LlmProvider {
378380 retry_on_ratelimit : None ,
379381 max_retries : None ,
380382 retry_to_same_provider : None ,
383+ retry_backoff_base_ms : None ,
384+ retry_backoff_max_ms : None ,
381385 }
382386 }
383387}
Original file line number Diff line number Diff line change @@ -313,6 +313,8 @@ mod tests {
313313 retry_on_ratelimit : None ,
314314 max_retries : None ,
315315 retry_to_same_provider : None ,
316+ retry_backoff_base_ms : None ,
317+ retry_backoff_max_ms : None ,
316318 }
317319 }
318320
Original file line number Diff line number Diff line change @@ -40,6 +40,8 @@ model_providers:
4040 retry_on_ratelimit : true
4141 max_retries : 2
4242 retry_to_same_provider : false # If false, Arch will pick another random model from the list
43+ retry_backoff_base_ms : 25 # Base delay for exponential backoff
44+ retry_backoff_max_ms : 1000 # Maximum delay for exponential backoff
4345
4446 # Example: Passthrough authentication for LiteLLM or similar proxies
4547 # When passthrough_auth is true, client's Authorization header is forwarded
You can’t perform that action at this time.
0 commit comments