Skip to content

Commit d1aa3ac

Browse files
committed
feat: make exponential backoff base and max intervals configurable
1 parent ba22745 commit d1aa3ac

4 files changed

Lines changed: 13 additions & 2 deletions

File tree

crates/brightstaff/src/handlers/llm.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,11 @@ async fn llm_chat_inner(
404404
let max_retries = provider.max_retries.unwrap_or(1);
405405
if provider.retry_on_ratelimit == Some(true) && (attempts - 1) < max_retries as usize
406406
{
407-
// Exponential backoff: base interval 25ms
408-
let backoff_ms = 25 * 2u64.pow(attempts as u32 - 1);
407+
// Configurable exponential backoff
408+
let base_ms = provider.retry_backoff_base_ms.unwrap_or(25);
409+
let max_ms = provider.retry_backoff_max_ms.unwrap_or(1000);
410+
let backoff_ms = (base_ms * 2u64.pow(attempts as u32 - 1)).min(max_ms);
411+
409412
debug!(
410413
request_id = %request_id,
411414
"429 received, retrying after {}ms (attempt {})",

crates/common/src/configuration.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ pub struct LlmProvider {
331331
pub retry_on_ratelimit: Option<bool>,
332332
pub max_retries: Option<u32>,
333333
pub retry_to_same_provider: Option<bool>,
334+
pub retry_backoff_base_ms: Option<u64>,
335+
pub retry_backoff_max_ms: Option<u64>,
334336
}
335337

336338
pub trait IntoModels {
@@ -378,6 +380,8 @@ impl Default for LlmProvider {
378380
retry_on_ratelimit: None,
379381
max_retries: None,
380382
retry_to_same_provider: None,
383+
retry_backoff_base_ms: None,
384+
retry_backoff_max_ms: None,
381385
}
382386
}
383387
}

crates/common/src/llm_providers.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,8 @@ mod tests {
313313
retry_on_ratelimit: None,
314314
max_retries: None,
315315
retry_to_same_provider: None,
316+
retry_backoff_base_ms: None,
317+
retry_backoff_max_ms: None,
316318
}
317319
}
318320

docs/source/resources/includes/arch_config_full_reference.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ model_providers:
4040
retry_on_ratelimit: true
4141
max_retries: 2
4242
retry_to_same_provider: false # If false, Arch will pick another random model from the list
43+
retry_backoff_base_ms: 25 # Base delay for exponential backoff
44+
retry_backoff_max_ms: 1000 # Maximum delay for exponential backoff
4345

4446
# Example: Passthrough authentication for LiteLLM or similar proxies
4547
# When passthrough_auth is true, client's Authorization header is forwarded

0 commit comments

Comments
 (0)