@@ -908,16 +908,15 @@ async fn handle_streaming_response(
908908 & state. running_num_requests ,
909909 chat_completions_service_urls,
910910 & model. to_lowercase ( ) ,
911+ state. memory_upper_threshold ,
911912 )
912913 . await
913914 . map_err ( |e| AtomaServiceError :: ChatCompletionsServiceUnavailable {
914915 message : e. to_string ( ) ,
915916 endpoint : endpoint. clone ( ) ,
916917 } ) ?;
917918 if status_code == StatusCode :: TOO_MANY_REQUESTS {
918- state
919- . too_many_requests
920- . insert ( model. to_string ( ) , Instant :: now ( ) ) ;
919+ state. too_many_requests . insert ( model. to_string ( ) ) ;
921920 return Err ( AtomaServiceError :: ChatCompletionsServiceUnavailable {
922921 message : "Too many requests" . to_string ( ) ,
923922 endpoint : endpoint. clone ( ) ,
@@ -1341,16 +1340,15 @@ pub mod utils {
13411340 & state. running_num_requests ,
13421341 chat_completions_service_url_services,
13431342 model,
1343+ state. memory_upper_threshold ,
13441344 )
13451345 . await
13461346 . map_err ( |e| AtomaServiceError :: ChatCompletionsServiceUnavailable {
13471347 message : e. to_string ( ) ,
13481348 endpoint : endpoint. to_string ( ) ,
13491349 } ) ?;
13501350 if status_code == StatusCode :: TOO_MANY_REQUESTS {
1351- state
1352- . too_many_requests
1353- . insert ( model. to_string ( ) , Instant :: now ( ) ) ;
1351+ state. too_many_requests . insert ( model. to_string ( ) ) ;
13541352 return Err ( AtomaServiceError :: ChatCompletionsServiceUnavailable {
13551353 message : "Too many requests" . to_string ( ) ,
13561354 endpoint : endpoint. to_string ( ) ,
0 commit comments