Skip to content

Commit a97e1d4

Browse files
authored
fix(crashtracker): support socket based receiver for all thread collection (#2080)
# What does this PR do? In [crash_handler.rs](https://github.com/DataDog/libdatadog/blob/582bee193e6f4688fa546ca0b3e3b382de672538/libdd-crashtracker/src/collector/crash_handler.rs#L308-L315), PR_SET_PTRACER is only called when receiver.handle.pid is `Some`: ``` if let Some(receiver_pid) = receiver.handle.pid { unsafe { libc::prctl(libc::PR_SET_PTRACER, receiver_pid as libc::c_ulong); } } ``` PHP connects to a pre-existing sidecar process [using a Unix socket](https://github.com/DataDog/dd-trace-php/blob/991880e2016965f09bc0093e10537737d283f121/ext/signals.c#L281-L289) (`optional_unix_socket_filename`). [In receiver_manager.rs](https://github.com/DataDog/libdatadog/blob/582bee193e6f4688fa546ca0b3e3b382de672538/libdd-crashtracker/src/collector/receiver_manager.rs#L64-L66), the from_socket() path creates a ProcessHandle with pid: None: ``` Ok(Self { handle: ProcessHandle::new(uds_fd, None), }) ``` If `ptrace_scope=1`, since pid is `None`, `PR_SET_PTRACER` is never called. The sidecar then cannot ptrace the crashed process's threads. The fix is to resolve the peer process's PID using `getsockopt(SO_PEERCRED)` on the Unix socket fd, then call `PR_SET_PTRACER` with that PID. # Motivation I noticed that all PHP crash reports (schema 1.7) have [error.threads with 0 frames per thread.](https://app.datadoghq.com/logs?query=source%3Aphp%20data_schema_version%3A1.7&agg_m=count&agg_m_source=base&agg_t=count&clustering_pattern_field_path=message&cols=host%2Cservice&messageDisplay=inline&refresh_mode=paused&storage=hot&stream_sort=desc&viz=stream&from_ts=1780372800000&to_ts=1780545599999&live=false) Thread names are present (`ddprof_time`, `ddprof_upload`), but stacks are empty with incomplete: false. This affects 100% of PHP crashes. Ruby and Python are unaffected. # Additional Notes Anything else we should know when reviewing? # How to test the change? Describe here in detail how the change can be validated. Co-authored-by: gyuheon.oh <gyuheon.oh@datadoghq.com>
1 parent 401e623 commit a97e1d4

11 files changed

Lines changed: 433 additions & 6 deletions

File tree

bin_tests/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ bench = false
5252
name = "crashing_test_app"
5353
bench = false
5454

55+
[[bin]]
56+
name = "crashtracker_unix_socket_receiver"
57+
bench = false
58+
5559
[[bin]]
5660
name = "prebuild"
5761
path = "src/bin/prebuild.rs"

bin_tests/src/artifacts.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,16 @@ pub fn crashing_app(profile: BuildProfile, panic_abort: bool) -> ArtifactsBuild
3939
}
4040
}
4141

42+
/// Creates an ArtifactsBuild for the Unix socket receiver binary (sidecar-style).
43+
pub fn crashtracker_unix_socket_receiver(profile: BuildProfile) -> ArtifactsBuild {
44+
ArtifactsBuild {
45+
name: "crashtracker_unix_socket_receiver".to_owned(),
46+
build_profile: profile,
47+
artifact_type: ArtifactType::Bin,
48+
..Default::default()
49+
}
50+
}
51+
4252
/// Creates an ArtifactsBuild for the test_the_tests binary.
4353
pub fn test_the_tests(profile: BuildProfile) -> ArtifactsBuild {
4454
ArtifactsBuild {
@@ -87,6 +97,7 @@ pub fn all_prebuild_artifacts() -> Vec<ArtifactsBuild> {
8797
for profile in [BuildProfile::Debug, BuildProfile::Release] {
8898
artifacts.push(crashtracker_bin_test(profile, false));
8999
artifacts.push(crashtracker_receiver(profile));
100+
artifacts.push(crashtracker_unix_socket_receiver(profile));
90101
artifacts.push(test_the_tests(profile));
91102
artifacts.push(profiling_ffi(profile));
92103
artifacts.push(crashing_app(profile, false));
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Sidecar-style crashtracker receiver that listens on a Unix socket.
5+
//! Used by integration tests to verify the socket-based receiver path
6+
7+
#[cfg(not(unix))]
8+
fn main() {}
9+
10+
#[cfg(unix)]
11+
fn main() -> anyhow::Result<()> {
12+
let socket_path = std::env::args()
13+
.nth(1)
14+
.expect("usage: crashtracker_unix_socket_receiver <socket_path>");
15+
libdd_crashtracker::receiver_entry_point_unix_socket(&socket_path)
16+
}

bin_tests/src/modes/behavior.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ pub fn get_behavior(mode_str: &str) -> Box<dyn Behavior> {
140140
"errno_preservation" => Box::new(test_016_errno_preservation::Test),
141141
"multi_thread_collection" => Box::new(test_017_multi_thread_collection::Test),
142142
"thread_limit" => Box::new(test_018_thread_limit::Test),
143+
"sidecar_donothing" => Box::new(test_019_sidecar_donothing::Test),
144+
"sidecar_multi_thread_collection" => {
145+
Box::new(test_020_sidecar_multi_thread_collection::Test)
146+
}
143147
"runtime_preload_logger" => Box::new(test_000_donothing::Test),
144148
_ => panic!("Unknown mode: {mode_str}"),
145149
}

bin_tests/src/modes/unix/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,5 @@ pub mod test_015_panic_hook_unknown_type;
1919
pub mod test_016_errno_preservation;
2020
pub mod test_017_multi_thread_collection;
2121
pub mod test_018_thread_limit;
22+
pub mod test_019_sidecar_donothing;
23+
pub mod test_020_sidecar_multi_thread_collection;
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Baseline crash test using a sidecar (Unix socket) receiver instead of fork/exec.
5+
//! The socket path is passed using the DD_TEST_UNIX_SOCKET_PATH env var, which
6+
//! the integration test sets after spawning the receiver process.
7+
8+
use crate::modes::behavior::Behavior;
9+
use libdd_crashtracker::CrashtrackerConfiguration;
10+
use std::path::Path;
11+
12+
pub struct Test;
13+
14+
impl Behavior for Test {
15+
fn setup(
16+
&self,
17+
_output_dir: &Path,
18+
config: &mut CrashtrackerConfiguration,
19+
) -> anyhow::Result<()> {
20+
let socket_path = std::env::var("DD_TEST_UNIX_SOCKET_PATH")
21+
.expect("DD_TEST_UNIX_SOCKET_PATH must be set for sidecar tests");
22+
config.set_unix_socket_path(socket_path);
23+
Ok(())
24+
}
25+
26+
fn pre(&self, _output_dir: &Path) -> anyhow::Result<()> {
27+
Ok(())
28+
}
29+
30+
fn post(&self, _output_dir: &Path) -> anyhow::Result<()> {
31+
Ok(())
32+
}
33+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Multi-thread collection test using a sidecar (Unix socket) receiver.
5+
//! Combines the sidecar receiver path (SO_PEERCRED for PR_SET_PTRACER) with
6+
//! collect_all_threads to verify that ptrace works across non-descendant processes.
7+
8+
use crate::modes::behavior::Behavior;
9+
use libdd_crashtracker::CrashtrackerConfiguration;
10+
use std::path::Path;
11+
use std::sync::{Arc, Barrier};
12+
use std::thread;
13+
use std::time::Duration;
14+
15+
pub struct Test;
16+
17+
#[inline(never)]
18+
fn worker_fn_0() {
19+
loop {
20+
std::hint::black_box(0x20_00u64);
21+
std::hint::spin_loop();
22+
}
23+
}
24+
25+
#[inline(never)]
26+
fn worker_fn_1() {
27+
loop {
28+
std::hint::black_box(0x20_01u64);
29+
std::hint::spin_loop();
30+
}
31+
}
32+
33+
impl Behavior for Test {
34+
fn setup(
35+
&self,
36+
_output_dir: &Path,
37+
config: &mut CrashtrackerConfiguration,
38+
) -> anyhow::Result<()> {
39+
let socket_path = std::env::var("DD_TEST_UNIX_SOCKET_PATH")
40+
.expect("DD_TEST_UNIX_SOCKET_PATH must be set for sidecar tests");
41+
config.set_unix_socket_path(socket_path);
42+
config.set_collect_all_threads(true);
43+
config.set_max_threads(32);
44+
Ok(())
45+
}
46+
47+
fn pre(&self, _output_dir: &Path) -> anyhow::Result<()> {
48+
Ok(())
49+
}
50+
51+
fn post(&self, _output_dir: &Path) -> anyhow::Result<()> {
52+
let barrier = Arc::new(Barrier::new(3));
53+
54+
let b0 = Arc::clone(&barrier);
55+
let h0 = thread::Builder::new()
56+
.name("ct_worker_0".to_string())
57+
.spawn(move || {
58+
b0.wait();
59+
worker_fn_0();
60+
})?;
61+
62+
let b1 = Arc::clone(&barrier);
63+
let h1 = thread::Builder::new()
64+
.name("ct_worker_1".to_string())
65+
.spawn(move || {
66+
b1.wait();
67+
worker_fn_1();
68+
})?;
69+
70+
barrier.wait();
71+
thread::sleep(Duration::from_millis(20));
72+
73+
std::mem::forget(h0);
74+
std::mem::forget(h1);
75+
Ok(())
76+
}
77+
}

bin_tests/src/test_types.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ pub enum TestMode {
2222
ErrnoPreservation,
2323
MultiThreadCollection,
2424
ThreadLimit,
25+
SidecarDoNothing,
26+
SidecarMultiThreadCollection,
2527
}
2628

2729
impl TestMode {
@@ -45,6 +47,8 @@ impl TestMode {
4547
Self::ErrnoPreservation => "errno_preservation",
4648
Self::MultiThreadCollection => "multi_thread_collection",
4749
Self::ThreadLimit => "thread_limit",
50+
Self::SidecarDoNothing => "sidecar_donothing",
51+
Self::SidecarMultiThreadCollection => "sidecar_multi_thread_collection",
4852
}
4953
}
5054

@@ -68,6 +72,8 @@ impl TestMode {
6872
Self::ErrnoPreservation,
6973
Self::MultiThreadCollection,
7074
Self::ThreadLimit,
75+
Self::SidecarDoNothing,
76+
Self::SidecarMultiThreadCollection,
7177
]
7278
}
7379
}
@@ -100,6 +106,8 @@ impl std::str::FromStr for TestMode {
100106
"errno_preservation" => Ok(Self::ErrnoPreservation),
101107
"multi_thread_collection" => Ok(Self::MultiThreadCollection),
102108
"thread_limit" => Ok(Self::ThreadLimit),
109+
"sidecar_donothing" => Ok(Self::SidecarDoNothing),
110+
"sidecar_multi_thread_collection" => Ok(Self::SidecarMultiThreadCollection),
103111
_ => Err(format!("Unknown test mode: {}", s)),
104112
}
105113
}

0 commit comments

Comments
 (0)