Skip to content

Commit 634347c

Browse files
Lewis-Eclaude
andcommitted
chore: add diagnostic eprintln in serve() Event arms and stats_flusher loop heartbeat
Adds eprintln to every match arm in mini_agent::serve() to identify which task event terminates the supervisor, plus a per-iteration heartbeat in start_stats_flusher's select loop. Goal: distinguish "select! is still alive but no arm fires" from "the task was aborted by a *Died path in serve()". Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 37cfdd6 commit 634347c

2 files changed

Lines changed: 32 additions & 9 deletions

File tree

crates/datadog-trace-agent/src/mini_agent.rs

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -333,48 +333,68 @@ impl MiniAgent {
333333

334334
let result: Result<(), Box<dyn std::error::Error>> = match event {
335335
Event::Shutdown => {
336+
eprintln!("[diag mini_agent] serve(): Event::Shutdown selected");
336337
// The same shutdown_rx fan-out has already fired in each
337338
// accept loop concurrently with our select arm here.
338339
// Awaiting the transport handles waits for them to drain.
339-
if let Some(h) = tcp_handle.as_mut()
340-
&& let Err(e) = h.await
341-
{
342-
error!("TCP accept loop failed during shutdown: {e:?}");
340+
if let Some(h) = tcp_handle.as_mut() {
341+
eprintln!("[diag mini_agent] awaiting tcp_handle drain");
342+
if let Err(e) = h.await {
343+
eprintln!("[diag mini_agent] tcp_handle await Err: {e:?}");
344+
error!("TCP accept loop failed during shutdown: {e:?}");
345+
} else {
346+
eprintln!("[diag mini_agent] tcp_handle drained");
347+
}
343348
}
344349
#[cfg(all(windows, feature = "windows-pipes"))]
345-
if let Some(h) = pipe_handle.as_mut()
346-
&& let Err(e) = h.await
347-
{
348-
error!("Named pipe accept loop failed during shutdown: {e:?}");
350+
if let Some(h) = pipe_handle.as_mut() {
351+
eprintln!("[diag mini_agent] awaiting pipe_handle drain");
352+
if let Err(e) = h.await {
353+
eprintln!("[diag mini_agent] pipe_handle await Err: {e:?}");
354+
error!("Named pipe accept loop failed during shutdown: {e:?}");
355+
} else {
356+
eprintln!("[diag mini_agent] pipe_handle drained");
357+
}
349358
}
350359
// Now all handlers have written to the channels. Force-flush
351360
// the stats flusher.
361+
eprintln!("[diag mini_agent] sending flusher_shutdown_tx");
352362
let _ = flusher_shutdown_tx.send(());
363+
eprintln!("[diag mini_agent] awaiting stats_flusher_handle");
353364
match (&mut stats_flusher_handle).await {
354-
Ok(()) => Ok(()),
365+
Ok(()) => {
366+
eprintln!("[diag mini_agent] stats_flusher_handle returned Ok");
367+
Ok(())
368+
}
355369
Err(e) => {
370+
eprintln!("[diag mini_agent] stats_flusher_handle returned Err: {e:?}");
356371
Err(format!("Stats flusher task failed during shutdown: {e:?}").into())
357372
}
358373
}
359374
}
360375
Event::TcpDied(s) => {
376+
eprintln!("[diag mini_agent] Event::TcpDied: {s}");
361377
error!("TCP accept loop died: {s}");
362378
Err("TCP accept loop terminated unexpectedly".into())
363379
}
364380
#[cfg(all(windows, feature = "windows-pipes"))]
365381
Event::PipeDied(s) => {
382+
eprintln!("[diag mini_agent] Event::PipeDied: {s}");
366383
error!("Named pipe accept loop died: {s}");
367384
Err("Named pipe accept loop terminated unexpectedly".into())
368385
}
369386
Event::TraceFlusherDied(s) => {
387+
eprintln!("[diag mini_agent] Event::TraceFlusherDied: {s}");
370388
error!("Trace flusher task died: {s}");
371389
Err("Trace flusher task terminated unexpectedly".into())
372390
}
373391
Event::StatsFlusherDied(s) => {
392+
eprintln!("[diag mini_agent] Event::StatsFlusherDied: {s}");
374393
error!("Stats flusher task died: {s}");
375394
Err("Stats flusher task terminated unexpectedly".into())
376395
}
377396
Event::ConcentratorDied(s) => {
397+
eprintln!("[diag mini_agent] Event::ConcentratorDied: {s}");
378398
error!("Stats concentrator service task died: {s}");
379399
Err("Stats concentrator service task terminated unexpectedly".into())
380400
}

crates/datadog-trace-agent/src/stats_flusher.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,11 @@ impl StatsFlusher for ServerlessStatsFlusher {
9090
let mut interval =
9191
tokio::time::interval(time::Duration::from_secs(config.stats_flush_interval_secs));
9292
let mut buffer: Vec<pb::ClientStatsPayload> = Vec::new();
93+
let mut iter: u32 = 0;
9394

9495
loop {
96+
iter += 1;
97+
eprintln!("[diag stats_flusher] loop iter {iter}: awaiting select");
9598
tokio::select! {
9699
// Receive client stats and add them to the buffer
97100
Some(stats) = rx.recv() => {

0 commit comments

Comments
 (0)