Skip to content

Commit 03884a4

Browse files
handle deadlock for too many requests timeout trigger check (#650)
1 parent 4e9f969 commit 03884a4

1 file changed

Lines changed: 53 additions & 16 deletions

File tree

atoma-service/src/middleware.rs

Lines changed: 53 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -811,22 +811,7 @@ pub async fn verify_permissions(
811811
message: "Model is not a string".to_string(),
812812
endpoint: endpoint.clone(),
813813
})?;
814-
if let Some(trigger_time) = state.too_many_requests.get(model) {
815-
if trigger_time.elapsed().as_millis() < state.too_many_requests_timeout_ms {
816-
tracing::info!(
817-
target = "atoma-service",
818-
level = "info",
819-
"Too many requests for model: {model}, endpoint: {endpoint}, elapsed trigger time: {} and timeout: {}",
820-
trigger_time.elapsed().as_millis(),
821-
state.too_many_requests_timeout_ms
822-
);
823-
return Err(AtomaServiceError::ChatCompletionsServiceUnavailable {
824-
message: "Too many requests".to_string(),
825-
endpoint: endpoint.clone(),
826-
});
827-
}
828-
state.too_many_requests.remove(model);
829-
}
814+
utils::check_if_too_many_requests(&state, model, &endpoint)?;
830815
if !state.models.contains(&model.to_string()) {
831816
return Err(AtomaServiceError::InvalidBody {
832817
message: format!("Model not supported, supported models: {:?}", state.models),
@@ -1603,4 +1588,56 @@ pub mod utils {
16031588
}
16041589
Ok(())
16051590
}
1591+
1592+
/// Checks if the model has too many requests.
1593+
///
1594+
/// This function checks if the model has too many requests by checking if the elapsed time since the first occurrence is less than the timeout.
1595+
///
1596+
/// # Arguments
1597+
/// * `state` - The application state containing the too many requests map
1598+
/// * `model` - The model to check
1599+
/// * `endpoint` - The API endpoint path being accessed (used for error context)
1600+
///
1601+
/// # Returns
1602+
/// * `Ok(())` - If the model has too many requests
1603+
/// * `Err(AtomaServiceError)` - If the model has too many requests
1604+
///
1605+
/// # Errors
1606+
/// This function will return an error if:
1607+
/// - The model has too many requests
1608+
/// - The elapsed time since the first occurrence is less than the timeout
1609+
#[instrument(level = "info", skip_all, err)]
1610+
pub fn check_if_too_many_requests(
1611+
state: &AppState,
1612+
model: &str,
1613+
endpoint: &str,
1614+
) -> Result<(), AtomaServiceError> {
1615+
match state.too_many_requests.entry(model.to_string()) {
1616+
dashmap::mapref::entry::Entry::Occupied(occupied_entry) => {
1617+
let elapsed_ms = occupied_entry.get().elapsed().as_millis();
1618+
1619+
if elapsed_ms < state.too_many_requests_timeout_ms {
1620+
tracing::info!(
1621+
target = "atoma-service",
1622+
level = "info",
1623+
"Too many requests for model: {model}, endpoint: {endpoint}, elapsed trigger time: {elapsed_ms} and timeout: {}",
1624+
state.too_many_requests_timeout_ms
1625+
);
1626+
return Err(AtomaServiceError::ChatCompletionsServiceUnavailable {
1627+
message: "Too many requests".to_string(),
1628+
endpoint: endpoint.to_string(),
1629+
});
1630+
}
1631+
occupied_entry.remove();
1632+
}
1633+
dashmap::mapref::entry::Entry::Vacant(_) => {
1634+
tracing::debug!(
1635+
target = "atoma-service",
1636+
level = "debug",
1637+
"Model is not in the `too_many_requests` map, so no action is needed here. Processing can continue."
1638+
);
1639+
}
1640+
}
1641+
Ok(())
1642+
}
16061643
}

0 commit comments

Comments
 (0)