Skip to content

Commit e7a78f9

Browse files
committed
feat: add heartbeat service
1 parent d59ea1f commit e7a78f9

4 files changed

Lines changed: 81 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
.DS_Store
12
target/
23
/models/
34
/config.toml

atoma-bin/atoma_node.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ impl NodeConfig {
6666
let service = AtomaServiceConfig::from_file_path(path);
6767
let state = AtomaStateManagerConfig::from_file_path(path);
6868
let daemon = AtomaDaemonConfig::from_file_path(path);
69+
6970
Self {
7071
sui,
7172
p2p,
@@ -165,6 +166,13 @@ async fn main() -> Result<()> {
165166
let (event_subscriber_sender, event_subscriber_receiver) = flume::unbounded();
166167
let (state_manager_sender, state_manager_receiver) = flume::unbounded();
167168
let (p2p_event_sender, p2p_event_receiver) = flume::unbounded();
169+
170+
// Start the heartbeat service
171+
start_heartbeat_service(
172+
shutdown_receiver.clone(),
173+
config.service.heartbeat_url.clone(),
174+
);
175+
168176
info!(
169177
target = "atoma-node-service",
170178
event = "keystore_path",
@@ -491,3 +499,71 @@ fn handle_tasks_results(
491499
)?;
492500
Ok(())
493501
}
502+
503+
/// Starts a heartbeat service that pings a health check endpoint every minute.
504+
///
505+
/// This function spawns a background task that sends a GET request to a health check
506+
/// service at regular intervals to indicate the daemon is still running.
507+
///
508+
/// # Arguments
509+
/// * `shutdown_receiver` - A receiver that signals when the service should shut down
510+
/// * `heartbeat_url` - The URL of the heartbeat service
511+
fn start_heartbeat_service(mut shutdown_receiver: watch::Receiver<bool>, heartbeat_url: String) {
512+
tokio::spawn(async move {
513+
let client = reqwest::Client::new();
514+
let interval = std::time::Duration::from_secs(60);
515+
516+
tracing::info!(
517+
target = "atoma_daemon",
518+
event = "heartbeat-service-start",
519+
url = %heartbeat_url.clone(),
520+
interval_secs = %interval.as_secs(),
521+
"Starting heartbeat service"
522+
);
523+
524+
loop {
525+
tokio::select! {
526+
_ = tokio::time::sleep(interval) => {
527+
// Send heartbeat ping
528+
match client.get(heartbeat_url.clone()).send().await {
529+
Ok(response) => {
530+
if response.status().is_success() {
531+
tracing::debug!(
532+
target = "atoma_daemon",
533+
event = "heartbeat-ping",
534+
status = %response.status(),
535+
"Sent heartbeat ping successfully"
536+
);
537+
} else {
538+
tracing::warn!(
539+
target = "atoma_daemon",
540+
event = "heartbeat-ping-failed",
541+
status = %response.status(),
542+
"Heartbeat ping returned non-success status"
543+
);
544+
}
545+
},
546+
Err(e) => {
547+
tracing::error!(
548+
target = "atoma_daemon",
549+
event = "heartbeat-ping-error",
550+
error = %e,
551+
"Failed to send heartbeat ping"
552+
);
553+
}
554+
}
555+
}
556+
result = shutdown_receiver.changed() => {
557+
if result.is_err() || *shutdown_receiver.borrow() {
558+
tracing::info!(
559+
target = "atoma_daemon",
560+
event = "heartbeat-service-shutdown",
561+
"Heartbeat service shutting down"
562+
);
563+
break;
564+
}
565+
}
566+
}
567+
}
568+
});
569+
}

atoma-service/src/config.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ pub struct AtomaServiceConfig {
4444
///
4545
/// This field specifies the address and port on which the Atoma Service will bind.
4646
pub service_bind_address: String,
47+
48+
/// The URL of the heartbeat service.
49+
pub heartbeat_url: String,
4750
}
4851

4952
impl AtomaServiceConfig {

config.example.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ image_generations_service_url = "http://image-generations:80"
3939
models = [ "Infermatic/Llama-3.3-70B-Instruct-FP8-Dynamic" ]
4040
revisions = [ "main" ]
4141
service_bind_address = "0.0.0.0:3000"
42+
heartbeat_url = "my-heartbeat-url"
4243

4344
[atoma_sui]
4445
atoma_db = "0x02920289f426dd1f3c2572d613f7dc92be95041720864a73d44d65585530efc5" # Current ATOMA DB object ID for testnet

0 commit comments

Comments
 (0)