Skip to content

Commit 27de9f3

Browse files
authored
feat(crashtracking): include Kind in crash ping and clarify requirements (#1595)
# What does this PR do? This PR updates what a `CrashPing` consists of. This is in preparation of allowing the crashtracker to not only handle UNIX signal based crashes, but other types of program terminations, such as unhandled exceptions. These are the biggest changes 1. We add a `CRASHTRACK_*_KIND` block in the communication protocol of the receiver. Previously, we always defaulted to `UnixSignal` in the receiver, which is not true in the world of supporting unhandled exceptions - This involves adding a public `with_kind` API to the crash info builder, which is why this PR is marked as `feat` 2. We also set `Kind` as an explicit `ErrorKind` for the `CrashPing`. Previously, under the assumption that we were only supporting unix signal based crashes, we would set it to `Crash ping`. This was bad design and this PR fixes that. This PR does not contain any changes for actually handling unhandled exceptions. It is still in the world of unix signals only, it can now be extended, however, in the PR above this one # Additional Notes PR on top of this on the stack: [feat(crashtracking): report unhandled exceptions](#1596) # How to test the change? Unit tests have been updated to include Kind. If not, then any time Kind is not included, crash ping build will fail Co-authored-by: gyuheon.oh <gyuheon.oh@datadoghq.com>
1 parent 0bd90fd commit 27de9f3

8 files changed

Lines changed: 89 additions & 33 deletions

File tree

bin_tests/tests/crashtracker_bin_test.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1365,11 +1365,16 @@ fn test_receiver_emits_debug_logs_on_receiver_issue() -> anyhow::Result<()> {
13651365
.context("spawning receiver process")?;
13661366

13671367
{
1368+
use libdd_crashtracker::ErrorKind;
1369+
13681370
let mut stdin = BufWriter::new(child.stdin.take().context("child stdin missing")?);
13691371
for line in [
13701372
"DD_CRASHTRACK_BEGIN_CONFIG".to_string(),
13711373
serde_json::to_string(&config)?,
13721374
"DD_CRASHTRACK_END_CONFIG".to_string(),
1375+
"DD_CRASHTRACK_BEGIN_KIND".to_string(),
1376+
serde_json::to_string(&ErrorKind::UnixSignal)?,
1377+
"DD_CRASHTRACK_END_KIND".to_string(),
13731378
"DD_CRASHTRACK_BEGIN_METADATA".to_string(),
13741379
serde_json::to_string(&metadata)?,
13751380
"DD_CRASHTRACK_END_METADATA".to_string(),
@@ -1567,7 +1572,7 @@ fn assert_crash_ping_message(body: &str) {
15671572

15681573
assert_eq!(message_json["version"].as_str(), Some("1.0"));
15691574

1570-
assert_eq!(message_json["kind"].as_str(), Some("Crash ping"));
1575+
assert_eq!(message_json["kind"].as_str(), Some("UnixSignal"));
15711576
}
15721577

15731578
// Old TestFixtures struct kept for UDS socket tests that weren't migrated

libdd-crashtracker-ffi/src/crash_info/builder.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,12 @@ pub unsafe extern "C" fn ddog_crasht_CrashInfoBuilder_with_thread_name(
428428
/// The `builder` can be null, but if non-null it must point to a Builder made by this module,
429429
/// which has not previously been dropped.
430430
/// All arguments must be valid.
431-
/// This method requires that the builder has a UUID and metadata set
431+
/// This method requires that the builder has `metadata` and `kind` set
432+
/// Applications can add `message` or `sig_info` to the builder to provide additional context.
433+
/// If set, the data will be used to derive the crash ping message in the order of
434+
/// - an explicit message set with `with_message`
435+
/// - sig_info set with `with_sig_info`
436+
/// - kind set with `with_kind`
432437
#[no_mangle]
433438
#[must_use]
434439
#[named]

libdd-crashtracker/src/collector/emitters.rs

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ use crate::runtime_callback::{
99
CallbackData,
1010
};
1111
use crate::shared::constants::*;
12-
use crate::{translate_si_code, CrashtrackerConfiguration, SignalNames, StacktraceCollection};
12+
use crate::{
13+
translate_si_code, CrashtrackerConfiguration, ErrorKind, SignalNames, StacktraceCollection,
14+
};
1315
use backtrace::Frame;
1416
use libc::{siginfo_t, ucontext_t};
1517
use std::{
@@ -145,13 +147,15 @@ pub(crate) fn emit_crashreport(
145147
crashing_tid: libc::pid_t,
146148
) -> Result<(), EmitterError> {
147149
// The following order is important in order to emit the crash ping:
148-
// - receiver expects the config
150+
// - receiver expects the config because the endpoint to emit to is there
149151
// - then message if any
150-
// - then siginfo (if the message is not set, we use the siginfo to generate the message)
152+
// - then siginfo if any
153+
// - then the kind if any
151154
// - then metadata
152155
emit_config(pipe, config_str)?;
153156
emit_message(pipe, message_ptr)?;
154157
emit_siginfo(pipe, sig_info)?;
158+
emit_kind(pipe, &ErrorKind::UnixSignal)?;
155159
emit_metadata(pipe, metadata_string)?;
156160
// after the metadata the ping should have been sent
157161
emit_ucontext(pipe, ucontext)?;
@@ -193,6 +197,15 @@ fn emit_config(w: &mut impl Write, config_str: &str) -> Result<(), EmitterError>
193197
Ok(())
194198
}
195199

200+
fn emit_kind<W: std::io::Write>(w: &mut W, kind: &ErrorKind) -> Result<(), EmitterError> {
201+
writeln!(w, "{DD_CRASHTRACK_BEGIN_KIND}")?;
202+
let _ = serde_json::to_writer(&mut *w, kind);
203+
writeln!(w)?;
204+
writeln!(w, "{DD_CRASHTRACK_END_KIND}")?;
205+
w.flush()?;
206+
Ok(())
207+
}
208+
196209
fn emit_metadata(w: &mut impl Write, metadata_str: &str) -> Result<(), EmitterError> {
197210
writeln!(w, "{DD_CRASHTRACK_BEGIN_METADATA}")?;
198211
writeln!(w, "{metadata_str}")?;

libdd-crashtracker/src/crash_info/builder.rs

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -401,14 +401,16 @@ impl CrashInfoBuilder {
401401
Ok(())
402402
}
403403

404-
/// This method requires that the builder has a UUID and metadata set.
405-
/// Siginfo is optional for platforms that don't support it (like Windows)
404+
/// This method requires that the builder has Metadata and Kind set
406405
pub fn build_crash_ping(&self) -> anyhow::Result<CrashPing> {
406+
let metadata = self.metadata.clone().context("metadata is required")?;
407+
let kind = self.error.kind.clone().context("kind is required")?;
407408
let message = self.error.message.clone();
408409
let sig_info = self.sig_info.clone();
409-
let metadata = self.metadata.clone().context("metadata is required")?;
410410

411-
let mut builder = CrashPingBuilder::new(self.uuid).with_metadata(metadata);
411+
let mut builder = CrashPingBuilder::new(self.uuid)
412+
.with_metadata(metadata)
413+
.with_kind(kind);
412414
if let Some(sig_info) = sig_info {
413415
builder = builder.with_sig_info(sig_info);
414416
}
@@ -419,16 +421,7 @@ impl CrashInfoBuilder {
419421
}
420422

421423
pub fn is_ping_ready(&self) -> bool {
422-
// On Unix platforms, wait for both metadata and siginfo
423-
// On Windows, siginfo is not available, so only wait for metadata
424-
#[cfg(unix)]
425-
{
426-
self.metadata.is_some() && self.sig_info.is_some()
427-
}
428-
#[cfg(windows)]
429-
{
430-
self.metadata.is_some()
431-
}
424+
self.metadata.is_some() && self.error.kind.is_some()
432425
}
433426

434427
pub fn has_message(&self) -> bool {
@@ -463,6 +456,8 @@ mod tests {
463456
#[test]
464457
fn test_with_message() {
465458
let mut builder = CrashInfoBuilder::new();
459+
460+
builder.with_kind(ErrorKind::UnixSignal).unwrap();
466461
let test_message = "Test error message".to_string();
467462

468463
let result = builder.with_message(test_message.clone());
@@ -473,6 +468,7 @@ mod tests {
473468
let sig_info = SigInfo::test_instance(42);
474469
builder.with_sig_info(sig_info).unwrap();
475470
builder.with_metadata(Metadata::test_instance(1)).unwrap();
471+
builder.with_kind(ErrorKind::UnixSignal).unwrap();
476472

477473
let crash_ping = builder.build_crash_ping().unwrap();
478474
assert!(crash_ping.message().contains(&test_message));
@@ -506,6 +502,7 @@ mod tests {
506502
let sig_info = SigInfo::test_instance(42);
507503
builder.with_sig_info(sig_info).unwrap();
508504
builder.with_metadata(Metadata::test_instance(1)).unwrap();
505+
builder.with_kind(ErrorKind::UnixSignal).unwrap();
509506

510507
let crash_ping = builder.build_crash_ping().unwrap();
511508
assert!(crash_ping.message().contains("second message"));
@@ -524,6 +521,7 @@ mod tests {
524521
builder.with_message(special_message.to_string()).unwrap();
525522
builder.with_sig_info(SigInfo::test_instance(42)).unwrap();
526523
builder.with_metadata(Metadata::test_instance(1)).unwrap();
524+
builder.with_kind(ErrorKind::UnixSignal).unwrap();
527525

528526
let crash_ping = builder.build_crash_ping().unwrap();
529527
assert!(crash_ping.message().contains(special_message));
@@ -543,6 +541,7 @@ mod tests {
543541

544542
builder.with_sig_info(SigInfo::test_instance(42)).unwrap();
545543
builder.with_metadata(Metadata::test_instance(1)).unwrap();
544+
builder.with_kind(ErrorKind::UnixSignal).unwrap();
546545

547546
let crash_ping = builder.build_crash_ping().unwrap();
548547
assert!(crash_ping.message().len() >= 10000);

libdd-crashtracker/src/crash_info/telemetry.rs

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33
use std::{fmt::Write, time::SystemTime};
44

5-
use crate::SigInfo;
5+
use crate::{ErrorKind, SigInfo};
66

77
use super::{CrashInfo, Metadata};
88
use anyhow::Context;
@@ -26,6 +26,7 @@ struct TelemetryMetadata {
2626
pub struct CrashPingBuilder {
2727
crash_uuid: Uuid,
2828
custom_message: Option<String>,
29+
kind: Option<ErrorKind>,
2930
metadata: Option<Metadata>,
3031
sig_info: Option<SigInfo>,
3132
}
@@ -38,6 +39,7 @@ impl CrashPingBuilder {
3839
Self {
3940
crash_uuid,
4041
custom_message: None,
42+
kind: None,
4143
metadata: None,
4244
sig_info: None,
4345
}
@@ -53,6 +55,11 @@ impl CrashPingBuilder {
5355
self
5456
}
5557

58+
pub fn with_kind(mut self, kind: ErrorKind) -> Self {
59+
self.kind = Some(kind);
60+
self
61+
}
62+
5663
pub fn with_metadata(mut self, metadata: Metadata) -> Self {
5764
self.metadata = Some(metadata);
5865
self
@@ -62,6 +69,7 @@ impl CrashPingBuilder {
6269
let crash_uuid = self.crash_uuid;
6370
let sig_info = self.sig_info;
6471
let metadata = self.metadata.context("metadata is required")?;
72+
let kind = self.kind.context("kind is required")?;
6573

6674
let message = if let Some(custom_message) = self.custom_message {
6775
format!("Crashtracker crash ping: crash processing started - {custom_message}")
@@ -71,13 +79,13 @@ impl CrashPingBuilder {
7179
sig_info.si_code_human_readable, sig_info.si_signo_human_readable
7280
)
7381
} else {
74-
"Crashtracker crash ping: crash processing started - Process terminated".to_string()
82+
format!("Crashtracker crash ping: crash processing started - Process terminated due to {:?}", kind)
7583
};
7684

7785
Ok(CrashPing {
7886
crash_uuid: crash_uuid.to_string(),
79-
kind: "Crash ping".to_string(),
8087
message,
88+
kind,
8189
metadata,
8290
siginfo: sig_info,
8391
version: CrashPing::current_schema_version(),
@@ -88,11 +96,11 @@ impl CrashPingBuilder {
8896
#[derive(Debug, Serialize)]
8997
pub struct CrashPing {
9098
crash_uuid: String,
99+
kind: ErrorKind,
100+
message: String,
91101
#[serde(skip_serializing_if = "Option::is_none")]
92102
siginfo: Option<SigInfo>,
93-
message: String,
94103
version: String,
95-
kind: String,
96104
metadata: Metadata,
97105
}
98106

@@ -446,7 +454,10 @@ fn extract_crash_info_tags(crash_info: &CrashInfo) -> anyhow::Result<String> {
446454
#[cfg(test)]
447455
mod tests {
448456
use super::TelemetryCrashUploader;
449-
use crate::crash_info::{test_utils::TestInstance, CrashInfo, CrashInfoBuilder, Metadata};
457+
use crate::{
458+
crash_info::{test_utils::TestInstance, CrashInfo, CrashInfoBuilder, Metadata},
459+
ErrorKind,
460+
};
450461
use libdd_common::Endpoint;
451462
use libdd_telemetry::data::LogLevel;
452463
use std::{collections::HashSet, fs};
@@ -580,6 +591,7 @@ mod tests {
580591
let mut crash_info_builder = CrashInfoBuilder::new();
581592
crash_info_builder.with_sig_info(sig_info.clone()).unwrap();
582593
crash_info_builder.with_metadata(metadata.clone()).unwrap();
594+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
583595
let crash_ping = crash_info_builder.build_crash_ping().unwrap();
584596
t.upload_crash_ping(&crash_ping).await.unwrap();
585597

@@ -607,7 +619,7 @@ mod tests {
607619
assert!(Uuid::parse_str(message_json["crash_uuid"].as_str().unwrap()).is_ok());
608620

609621
assert_eq!(message_json["version"], "1.0");
610-
assert_eq!(message_json["kind"], "Crash ping");
622+
assert_eq!(message_json["kind"], "UnixSignal");
611623

612624
let metadata_in_message = &message_json["metadata"];
613625
assert!(
@@ -655,6 +667,7 @@ mod tests {
655667
let mut crash_info_builder = CrashInfoBuilder::new();
656668
crash_info_builder.with_sig_info(sig_info.clone()).unwrap();
657669
crash_info_builder.with_metadata(metadata.clone()).unwrap();
670+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
658671
let crash_ping = crash_info_builder.build_crash_ping().unwrap();
659672
t.upload_crash_ping(&crash_ping).await.unwrap();
660673

@@ -707,7 +720,7 @@ mod tests {
707720
);
708721

709722
assert_eq!(message_json["version"], "1.0");
710-
assert_eq!(message_json["kind"], "Crash ping");
723+
assert_eq!(message_json["kind"], "UnixSignal");
711724

712725
let tags = log_entry["tags"].as_str().unwrap();
713726
let uuid_str = message_json["crash_uuid"].as_str().unwrap();
@@ -738,6 +751,7 @@ mod tests {
738751
let mut crash_info_builder = CrashInfoBuilder::new();
739752
crash_info_builder.with_sig_info(sig_info.clone()).unwrap();
740753
crash_info_builder.with_metadata(metadata.clone()).unwrap();
754+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
741755
let crash_ping = crash_info_builder.build_crash_ping()?;
742756

743757
let endpoint = Some(Endpoint::from_slice(&format!(
@@ -781,7 +795,7 @@ mod tests {
781795
assert!(message_json["crash_uuid"].is_string());
782796
assert!(Uuid::parse_str(message_json["crash_uuid"].as_str().unwrap()).is_ok());
783797
assert_eq!(message_json["version"], "1.0");
784-
assert_eq!(message_json["kind"], "Crash ping");
798+
assert_eq!(message_json["kind"], "UnixSignal");
785799

786800
Ok(())
787801
}
@@ -794,6 +808,7 @@ mod tests {
794808
crash_info_builder
795809
.with_metadata(Metadata::test_instance(1))
796810
.unwrap();
811+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
797812
let result = crash_info_builder.build_crash_ping();
798813
assert!(result.is_ok());
799814
let crash_ping = result.unwrap();
@@ -807,6 +822,7 @@ mod tests {
807822
crash_info_builder
808823
.with_sig_info(crate::SigInfo::test_instance(1))
809824
.unwrap();
825+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
810826
let result = crash_info_builder.build_crash_ping();
811827
assert!(result.is_err());
812828
assert!(result
@@ -822,6 +838,7 @@ mod tests {
822838
crash_info_builder
823839
.with_metadata(Metadata::test_instance(1))
824840
.unwrap();
841+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
825842
let result = crash_info_builder.build_crash_ping();
826843
assert!(result.is_ok());
827844
let crash_ping = result.unwrap();
@@ -838,6 +855,7 @@ mod tests {
838855
let mut crash_info_builder = CrashInfoBuilder::new();
839856
crash_info_builder.with_sig_info(sig_info.clone()).unwrap();
840857
crash_info_builder.with_metadata(metadata.clone()).unwrap();
858+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
841859
let crash_ping = crash_info_builder.build_crash_ping().unwrap();
842860

843861
assert!(!crash_ping.crash_uuid().is_empty());
@@ -857,6 +875,7 @@ mod tests {
857875
let mut crash_info_builder = CrashInfoBuilder::new();
858876
crash_info_builder.with_sig_info(sig_info.clone()).unwrap();
859877
crash_info_builder.with_metadata(metadata.clone()).unwrap();
878+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
860879
let crash_ping = crash_info_builder.build_crash_ping().unwrap();
861880

862881
assert!(!crash_ping.crash_uuid().is_empty());
@@ -882,6 +901,7 @@ mod tests {
882901
crash_info_builder
883902
.with_message("my process panicked".to_string())
884903
.unwrap();
904+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
885905
let crash_ping = crash_info_builder.build_crash_ping().unwrap();
886906

887907
assert!(!crash_ping.crash_uuid().is_empty());
@@ -918,6 +938,7 @@ mod tests {
918938
let mut crash_info_builder = CrashInfoBuilder::new();
919939
crash_info_builder.with_sig_info(sig_info.clone()).unwrap();
920940
crash_info_builder.with_metadata(metadata.clone()).unwrap();
941+
crash_info_builder.with_kind(ErrorKind::UnixSignal).unwrap();
921942
let crash_ping = crash_info_builder.build_crash_ping().unwrap();
922943

923944
uploader.upload_crash_ping(&crash_ping).await?;
@@ -941,7 +962,7 @@ mod tests {
941962
assert!(message_json["crash_uuid"].is_string());
942963
assert!(Uuid::parse_str(message_json["crash_uuid"].as_str().unwrap()).is_ok());
943964
assert_eq!(message_json["version"], "1.0");
944-
assert_eq!(message_json["kind"], "Crash ping");
965+
assert_eq!(message_json["kind"], "UnixSignal");
945966

946967
let uploaded_siginfo = &message_json["siginfo"];
947968
assert_eq!(uploaded_siginfo["si_signo"], sig_info.si_signo);

libdd-crashtracker/src/receiver/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ mod tests {
1818
use crate::collector::default_signals;
1919
use crate::crash_info::{SiCodes, SigInfo, SignalNames};
2020
use crate::shared::constants::*;
21-
use crate::{CrashtrackerConfiguration, StacktraceCollection};
21+
use crate::{CrashtrackerConfiguration, ErrorKind, StacktraceCollection};
2222
use std::time::Duration;
2323
use tokio::io::{AsyncWriteExt, BufReader};
2424
use tokio::net::UnixStream;
@@ -49,6 +49,10 @@ mod tests {
4949
.await?;
5050
to_socket(sender, DD_CRASHTRACK_END_SIGINFO).await?;
5151

52+
to_socket(sender, DD_CRASHTRACK_BEGIN_KIND).await?;
53+
to_socket(sender, serde_json::to_string(&ErrorKind::UnixSignal)?).await?;
54+
to_socket(sender, DD_CRASHTRACK_END_KIND).await?;
55+
5256
to_socket(sender, DD_CRASHTRACK_BEGIN_CONFIG).await?;
5357
to_socket(
5458
sender,

0 commit comments

Comments
 (0)