Skip to content

Commit 6b669a7

Browse files
authored
feat: add mem usage (#651)
* feat: add memusage to get_metrics * add lower threshold for disabling the flag * fix clippy * address 2 comments * add values to config * fix * fix tests * fix name
1 parent 03884a4 commit 6b669a7

9 files changed

Lines changed: 229 additions & 98 deletions

File tree

atoma-bin/atoma_node.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,11 @@ async fn main() -> Result<()> {
373373
keystore: Arc::new(keystore),
374374
address_index,
375375
whitelist_sui_addresses_for_fiat: config.service.whitelist_sui_addresses_for_fiat,
376-
too_many_requests: Arc::new(DashMap::new()),
376+
too_many_requests: Arc::new(DashSet::new()),
377377
too_many_requests_timeout_ms: u128::from(config.service.too_many_requests_timeout_ms),
378378
running_num_requests: Arc::new(RequestCounter::new()),
379+
memory_lower_threshold: config.service.memory_lower_threshold,
380+
memory_upper_threshold: config.service.memory_upper_threshold,
379381
};
380382

381383
let chat_completions_service_urls = app_state

atoma-service/src/config.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ pub struct AtomaServiceConfig {
6060

6161
/// The timeout for the too many requests error in milliseconds.
6262
pub too_many_requests_timeout_ms: u64,
63+
64+
///Lower threshold for memory usage, if the memory usage goes below this value, the service will not be considered overloaded
65+
pub memory_lower_threshold: f64,
66+
/// Upper threshold for memory usage, if the memory usage goes above this value, the service will be considered overloaded
67+
pub memory_upper_threshold: f64,
6368
}
6469

6570
impl AtomaServiceConfig {

atoma-service/src/handlers/chat_completions.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -908,16 +908,15 @@ async fn handle_streaming_response(
908908
&state.running_num_requests,
909909
chat_completions_service_urls,
910910
&model.to_lowercase(),
911+
state.memory_upper_threshold,
911912
)
912913
.await
913914
.map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable {
914915
message: e.to_string(),
915916
endpoint: endpoint.clone(),
916917
})?;
917918
if status_code == StatusCode::TOO_MANY_REQUESTS {
918-
state
919-
.too_many_requests
920-
.insert(model.to_string(), Instant::now());
919+
state.too_many_requests.insert(model.to_string());
921920
return Err(AtomaServiceError::ChatCompletionsServiceUnavailable {
922921
message: "Too many requests".to_string(),
923922
endpoint: endpoint.clone(),
@@ -1341,16 +1340,15 @@ pub mod utils {
13411340
&state.running_num_requests,
13421341
chat_completions_service_url_services,
13431342
model,
1343+
state.memory_upper_threshold,
13441344
)
13451345
.await
13461346
.map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable {
13471347
message: e.to_string(),
13481348
endpoint: endpoint.to_string(),
13491349
})?;
13501350
if status_code == StatusCode::TOO_MANY_REQUESTS {
1351-
state
1352-
.too_many_requests
1353-
.insert(model.to_string(), Instant::now());
1351+
state.too_many_requests.insert(model.to_string());
13541352
return Err(AtomaServiceError::ChatCompletionsServiceUnavailable {
13551353
message: "Too many requests".to_string(),
13561354
endpoint: endpoint.to_string(),

atoma-service/src/handlers/completions.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -881,16 +881,15 @@ async fn handle_streaming_response(
881881
&state.running_num_requests,
882882
chat_completions_service_urls,
883883
model,
884+
state.memory_upper_threshold,
884885
)
885886
.await
886887
.map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable {
887888
message: e.to_string(),
888889
endpoint: endpoint.clone(),
889890
})?;
890891
if status_code == StatusCode::TOO_MANY_REQUESTS {
891-
state
892-
.too_many_requests
893-
.insert(model.to_string(), Instant::now());
892+
state.too_many_requests.insert(model.to_string());
894893
return Err(AtomaServiceError::ChatCompletionsServiceUnavailable {
895894
message: "Too many requests".to_string(),
896895
endpoint: endpoint.clone(),
@@ -1303,16 +1302,15 @@ pub mod utils {
13031302
&state.running_num_requests,
13041303
completions_service_url_services,
13051304
model,
1305+
state.memory_upper_threshold,
13061306
)
13071307
.await
13081308
.map_err(|e| AtomaServiceError::ChatCompletionsServiceUnavailable {
13091309
message: e.to_string(),
13101310
endpoint: endpoint.to_string(),
13111311
})?;
13121312
if status_code == StatusCode::TOO_MANY_REQUESTS {
1313-
state
1314-
.too_many_requests
1315-
.insert(model.to_string(), Instant::now());
1313+
state.too_many_requests.insert(model.to_string());
13161314
return Err(AtomaServiceError::ChatCompletionsServiceUnavailable {
13171315
message: "Too many requests".to_string(),
13181316
endpoint: endpoint.to_string(),

0 commit comments

Comments
 (0)