@@ -811,22 +811,7 @@ pub async fn verify_permissions(
811811 message : "Model is not a string" . to_string ( ) ,
812812 endpoint : endpoint. clone ( ) ,
813813 } ) ?;
814- if let Some ( trigger_time) = state. too_many_requests . get ( model) {
815- if trigger_time. elapsed ( ) . as_millis ( ) < state. too_many_requests_timeout_ms {
816- tracing:: info!(
817- target = "atoma-service" ,
818- level = "info" ,
819- "Too many requests for model: {model}, endpoint: {endpoint}, elapsed trigger time: {} and timeout: {}" ,
820- trigger_time. elapsed( ) . as_millis( ) ,
821- state. too_many_requests_timeout_ms
822- ) ;
823- return Err ( AtomaServiceError :: ChatCompletionsServiceUnavailable {
824- message : "Too many requests" . to_string ( ) ,
825- endpoint : endpoint. clone ( ) ,
826- } ) ;
827- }
828- state. too_many_requests . remove ( model) ;
829- }
814+ utils:: check_if_too_many_requests ( & state, model, & endpoint) ?;
830815 if !state. models . contains ( & model. to_string ( ) ) {
831816 return Err ( AtomaServiceError :: InvalidBody {
832817 message : format ! ( "Model not supported, supported models: {:?}" , state. models) ,
@@ -1603,4 +1588,56 @@ pub mod utils {
16031588 }
16041589 Ok ( ( ) )
16051590 }
1591+
1592+ /// Checks if the model has too many requests.
1593+ ///
1594+ /// This function checks if the model has too many requests by checking if the elapsed time since the first occurrence is less than the timeout.
1595+ ///
1596+ /// # Arguments
1597+ /// * `state` - The application state containing the too many requests map
1598+ /// * `model` - The model to check
1599+ /// * `endpoint` - The API endpoint path being accessed (used for error context)
1600+ ///
1601+ /// # Returns
1602+ /// * `Ok(())` - If the model has too many requests
1603+ /// * `Err(AtomaServiceError)` - If the model has too many requests
1604+ ///
1605+ /// # Errors
1606+ /// This function will return an error if:
1607+ /// - The model has too many requests
1608+ /// - The elapsed time since the first occurrence is less than the timeout
1609+ #[ instrument( level = "info" , skip_all, err) ]
1610+ pub fn check_if_too_many_requests (
1611+ state : & AppState ,
1612+ model : & str ,
1613+ endpoint : & str ,
1614+ ) -> Result < ( ) , AtomaServiceError > {
1615+ match state. too_many_requests . entry ( model. to_string ( ) ) {
1616+ dashmap:: mapref:: entry:: Entry :: Occupied ( occupied_entry) => {
1617+ let elapsed_ms = occupied_entry. get ( ) . elapsed ( ) . as_millis ( ) ;
1618+
1619+ if elapsed_ms < state. too_many_requests_timeout_ms {
1620+ tracing:: info!(
1621+ target = "atoma-service" ,
1622+ level = "info" ,
1623+ "Too many requests for model: {model}, endpoint: {endpoint}, elapsed trigger time: {elapsed_ms} and timeout: {}" ,
1624+ state. too_many_requests_timeout_ms
1625+ ) ;
1626+ return Err ( AtomaServiceError :: ChatCompletionsServiceUnavailable {
1627+ message : "Too many requests" . to_string ( ) ,
1628+ endpoint : endpoint. to_string ( ) ,
1629+ } ) ;
1630+ }
1631+ occupied_entry. remove ( ) ;
1632+ }
1633+ dashmap:: mapref:: entry:: Entry :: Vacant ( _) => {
1634+ tracing:: debug!(
1635+ target = "atoma-service" ,
1636+ level = "debug" ,
1637+ "Model is not in the `too_many_requests` map, so no action is needed here. Processing can continue."
1638+ ) ;
1639+ }
1640+ }
1641+ Ok ( ( ) )
1642+ }
16061643}
0 commit comments