Skip to content

Commit 854b77c

Browse files
committed
Ptrace in receiver (need libunwind bindings)
1 parent f23f602 commit 854b77c

8 files changed

Lines changed: 171 additions & 168 deletions

File tree

libdd-crashtracker/src/collector/api.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,10 @@
44

55
use super::{crash_handler::enable, receiver_manager::Receiver};
66
use crate::{
7-
clear_spans, clear_traces,
8-
collector::crash_handler::register_panic_hook,
9-
collector::signal_handler_manager::register_crash_handlers,
10-
crash_info::Metadata,
11-
reset_counters,
12-
shared::configuration::CrashtrackerReceiverConfig,
13-
update_config, update_metadata, CrashtrackerConfiguration,
7+
clear_spans, clear_traces, collector::crash_handler::register_panic_hook,
8+
collector::signal_handler_manager::register_crash_handlers, crash_info::Metadata,
9+
reset_counters, shared::configuration::CrashtrackerReceiverConfig, update_config,
10+
update_metadata, CrashtrackerConfiguration,
1411
};
1512

1613
pub static DEFAULT_SYMBOLS: [libc::c_int; 4] =

libdd-crashtracker/src/collector/collector_manager.rs

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,9 @@ impl Collector {
5757
tid,
5858
);
5959
}
60-
child_pid if child_pid > 0 => {
61-
// Parent process; enable ptrace permission for child if multi-thread collection is enabled
62-
#[cfg(target_os = "linux")]
63-
if config.collect_all_threads() {
64-
// Allow the collector child to ptrace this process for thread context collection
65-
// SAFETY: prctl is async-signal-safe and we're just setting ptrace permissions
66-
unsafe {
67-
libc::prctl(libc::PR_SET_PTRACER, child_pid as libc::c_ulong, 0, 0, 0);
68-
}
69-
}
70-
Ok(Self {
71-
handle: ProcessHandle::new(receiver.handle.uds_fd, Some(child_pid)),
72-
})
73-
},
60+
child_pid if child_pid > 0 => Ok(Self {
61+
handle: ProcessHandle::new(receiver.handle.uds_fd, Some(child_pid)),
62+
}),
7463
code => {
7564
// Error
7665
Err(CollectorSpawnError::ForkFailed(code))

libdd-crashtracker/src/collector/crash_handler.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,18 @@ fn handle_posix_signal_impl(
303303

304304
let receiver = Receiver::from_crashtracker_config(config)?;
305305

306+
// Enable ptrace permissions for receiver if multi-thread collection is enabled
307+
#[cfg(target_os = "linux")]
308+
if config.collect_all_threads() {
309+
if let Some(receiver_pid) = receiver.handle.pid {
310+
// Allow the receiver to ptrace this process for thread context collection
311+
// SAFETY: prctl is async-signal-safe and we're just setting ptrace permissions
312+
unsafe {
313+
libc::prctl(libc::PR_SET_PTRACER, receiver_pid as libc::c_ulong, 0, 0, 0);
314+
}
315+
}
316+
}
317+
306318
let collector = Collector::spawn(
307319
&receiver,
308320
config,

libdd-crashtracker/src/collector/emitters.rs

Lines changed: 0 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,6 @@ pub(crate) fn emit_crashreport(
127127
}
128128
}
129129

130-
// Emit other threads (Phase 1: name/state; Phase 2: also stack if context available).
131-
#[cfg(target_os = "linux")]
132-
if config.collect_all_threads() {
133-
let _ = emit_all_threads(pipe, config, ppid, crashing_tid);
134-
}
135-
136130
writeln!(pipe, "{DD_CRASHTRACK_DONE}")?;
137131
pipe.flush()?;
138132
Ok(())
@@ -486,134 +480,6 @@ fn emit_thread_block(
486480
Ok(())
487481
}
488482

489-
/// Read a thread's name from /proc/<pid>/task/<tid>/comm.
490-
#[cfg(target_os = "linux")]
491-
fn read_thread_name(pid: libc::pid_t, tid: libc::pid_t) -> Option<String> {
492-
let path = format!("/proc/{pid}/task/{tid}/comm");
493-
std::fs::read_to_string(path)
494-
.ok()
495-
.map(|s| s.trim_end_matches('\n').to_string())
496-
}
497-
498-
/// Read a thread's scheduler state letter from /proc/<pid>/task/<tid>/status.
499-
/// Returns the single-letter state ("S", "R", "D") or None on failure.
500-
#[cfg(target_os = "linux")]
501-
fn read_thread_state(pid: libc::pid_t, tid: libc::pid_t) -> Option<String> {
502-
let path = format!("/proc/{pid}/task/{tid}/status");
503-
let file = std::fs::File::open(path).ok()?;
504-
let reader = std::io::BufReader::new(file);
505-
for line in reader.lines() {
506-
let line = line.ok()?;
507-
if let Some(rest) = line.strip_prefix("State:") {
508-
return Some(rest.trim().to_string());
509-
}
510-
}
511-
None
512-
}
513-
514-
/// Enumerate all live thread IDs under /proc/<pid>/task/ using std::fs (safe to
515-
/// call from the collector child, where there are no async-signal-safety constraints).
516-
#[cfg(target_os = "linux")]
517-
fn enumerate_task_tids(pid: libc::pid_t) -> Vec<libc::pid_t> {
518-
let path = format!("/proc/{pid}/task");
519-
let Ok(entries) = std::fs::read_dir(&path) else {
520-
return vec![];
521-
};
522-
entries
523-
.filter_map(|e| e.ok())
524-
.filter_map(|e| {
525-
e.file_name()
526-
.to_str()
527-
.and_then(|s| s.parse::<libc::pid_t>().ok())
528-
})
529-
.collect()
530-
}
531-
532-
/// Emit thread blocks for all threads other than the crashing thread.
533-
///
534-
/// Called from the collector child (after fork), so std::fs and ptrace are safe to use.
535-
/// Uses a streaming approach to avoid allocating vectors or hashmaps.
536-
/// For each thread:
537-
/// - Uses ptrace to capture thread context (registers + stack)
538-
/// - Reads name and state from /proc/<ppid>/task/<tid>/
539-
/// - Immediately emits the thread block without intermediate storage
540-
#[cfg(target_os = "linux")]
541-
fn emit_all_threads(
542-
w: &mut impl Write,
543-
config: &CrashtrackerConfiguration,
544-
ppid: libc::pid_t,
545-
crashing_tid: libc::pid_t,
546-
) -> Result<(), EmitterError> {
547-
use crate::collector::ptrace_collector::stream_thread_contexts;
548-
use std::time::Duration;
549-
550-
// Calculate timeout for ptrace operations
551-
let context_timeout = Duration::from_millis((config.timeout().as_millis() / 2).min(200) as u64);
552-
553-
let result = stream_thread_contexts(
554-
ppid,
555-
crashing_tid,
556-
config.max_threads(),
557-
context_timeout,
558-
|tid, captured_context| {
559-
// Read thread metadata from /proc
560-
let name = read_thread_name(ppid, tid).unwrap_or_else(|| tid.to_string());
561-
let state = read_thread_state(ppid, tid);
562-
563-
// Get ucontext pointer if we captured context for this thread
564-
let ucontext = captured_context.map(|ctx| &ctx.ucontext as *const _);
565-
566-
// Immediately emit the thread block
567-
match emit_thread_block(
568-
w,
569-
tid,
570-
false,
571-
&name,
572-
state.as_deref(),
573-
config.resolve_frames(),
574-
ucontext,
575-
) {
576-
Ok(()) => true, // Continue with next thread
577-
Err(_) => false, // Stop iteration on write error
578-
}
579-
},
580-
);
581-
582-
// Handle the case where ptrace setup fails entirely
583-
if result.is_err() {
584-
// Fall back to thread enumeration without context capture
585-
// This provides basic thread information even when ptrace fails
586-
let tids = enumerate_task_tids(ppid);
587-
let max = config.max_threads();
588-
let mut emitted = 0;
589-
590-
for tid in tids {
591-
if tid == crashing_tid {
592-
continue;
593-
}
594-
if emitted >= max {
595-
break;
596-
}
597-
598-
let name = read_thread_name(ppid, tid).unwrap_or_else(|| tid.to_string());
599-
let state = read_thread_state(ppid, tid);
600-
601-
emit_thread_block(
602-
w,
603-
tid,
604-
false,
605-
&name,
606-
state.as_deref(),
607-
config.resolve_frames(),
608-
None, // No context available
609-
)?;
610-
emitted += 1;
611-
}
612-
}
613-
614-
Ok(())
615-
}
616-
617483
fn emit_config(w: &mut impl Write, config_str: &str) -> Result<(), EmitterError> {
618484
writeln!(w, "{DD_CRASHTRACK_BEGIN_CONFIG}")?;
619485
writeln!(w, "{config_str}")?;

libdd-crashtracker/src/collector/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ mod receiver_manager;
1313
mod saguard;
1414
mod signal_handler_manager;
1515
mod spans;
16-
#[cfg(target_os = "linux")]
17-
pub(crate) mod ptrace_collector;
1816

1917
pub use additional_tags::{
2018
clear_additional_tags, consume_and_emit_additional_tags, insert_additional_tag,

libdd-crashtracker/src/receiver/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ pub use entry_points::{
77
async_receiver_entry_point_unix_listener, async_receiver_entry_point_unix_socket,
88
get_receiver_unix_socket, receiver_entry_point_stdin, receiver_entry_point_unix_socket,
99
};
10+
#[cfg(target_os = "linux")]
11+
mod ptrace_collector;
1012
mod receive_report;
1113

1214
#[cfg(feature = "benchmarking")]

libdd-crashtracker/src/collector/ptrace_collector.rs renamed to libdd-crashtracker/src/receiver/ptrace_collector.rs

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ use std::time::{Duration, Instant};
3232
/// Maximum number of threads to collect contexts for
3333
const MAX_TRACKED_THREADS: usize = 128;
3434

35-
36-
/// A captured thread context containing register state
35+
/// A captured thread context containing basic register state
36+
///
37+
/// TODO: Add remote libunwind stack walking using:
38+
/// - unw_init_remote() with _UPT_accessors for ptrace-based remote unwinding
39+
/// - _UPT_create(), _UPT_destroy() for managing ptrace state
40+
/// - This will enable full stack traces for all threads
3741
pub struct CapturedThreadContext {
3842
/// The captured register context as a ucontext_t
3943
pub ucontext: ucontext_t,
@@ -60,7 +64,11 @@ impl std::fmt::Display for PtraceError {
6064
write!(f, "Failed to attach to thread {}: errno {}", tid, errno)
6165
}
6266
PtraceError::RegisterReadFailed(tid, errno) => {
63-
write!(f, "Failed to read registers from thread {}: errno {}", tid, errno)
67+
write!(
68+
f,
69+
"Failed to read registers from thread {}: errno {}",
70+
tid, errno
71+
)
6472
}
6573
PtraceError::DetachFailed(tid, errno) => {
6674
write!(f, "Failed to detach from thread {}: errno {}", tid, errno)
@@ -77,8 +85,7 @@ impl std::error::Error for PtraceError {}
7785
/// Returns a vector of thread IDs.
7886
pub fn enumerate_threads(pid: libc::pid_t) -> Result<Vec<libc::pid_t>, PtraceError> {
7987
let task_dir = format!("/proc/{}/task", pid);
80-
let entries = std::fs::read_dir(&task_dir)
81-
.map_err(PtraceError::EnumerationFailed)?;
88+
let entries = std::fs::read_dir(&task_dir).map_err(PtraceError::EnumerationFailed)?;
8289

8390
let mut tids = Vec::new();
8491
for entry in entries {
@@ -215,13 +222,15 @@ pub fn read_thread_registers(tid: libc::pid_t) -> Result<ucontext_t, PtraceError
215222
Ok(uctx)
216223
}
217224

218-
219225
/// Capture register context for a single thread
220-
pub fn capture_thread_context(_pid: libc::pid_t, tid: libc::pid_t) -> Result<CapturedThreadContext, PtraceError> {
226+
pub fn capture_thread_context(
227+
_pid: libc::pid_t,
228+
tid: libc::pid_t,
229+
) -> Result<CapturedThreadContext, PtraceError> {
221230
// Attach to the thread
222231
attach_thread(tid)?;
223232

224-
// Read registers
233+
// Read basic registers
225234
let ucontext = match read_thread_registers(tid) {
226235
Ok(uctx) => uctx,
227236
Err(e) => {
@@ -231,7 +240,7 @@ pub fn capture_thread_context(_pid: libc::pid_t, tid: libc::pid_t) -> Result<Cap
231240
};
232241

233242
// Detach from the thread
234-
// detach_thread(tid)?;
243+
detach_thread(tid)?;
235244

236245
Ok(CapturedThreadContext { ucontext })
237246
}
@@ -249,7 +258,8 @@ pub fn stream_thread_contexts<F>(
249258
mut callback: F,
250259
) -> Result<(), PtraceError>
251260
where
252-
F: FnMut(libc::pid_t, Option<&CapturedThreadContext>) -> bool, // returns false to stop iteration
261+
F: FnMut(libc::pid_t, Option<&CapturedThreadContext>) -> bool, /* returns false to stop
262+
* iteration */
253263
{
254264
let start_time = Instant::now();
255265

@@ -293,4 +303,3 @@ where
293303

294304
Ok(())
295305
}
296-

0 commit comments

Comments
 (0)