Skip to content

Commit f23f602

Browse files
committed
Ptrace in collector child
1 parent fa1affa commit f23f602

8 files changed

Lines changed: 380 additions & 930 deletions

File tree

libdd-crashtracker/src/collector/api.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@ use super::{crash_handler::enable, receiver_manager::Receiver};
66
use crate::{
77
clear_spans, clear_traces,
88
collector::crash_handler::register_panic_hook,
9-
collector::signal_handler_manager::{
10-
register_crash_handlers, register_thread_context_signal_handler,
11-
},
9+
collector::signal_handler_manager::register_crash_handlers,
1210
crash_info::Metadata,
1311
reset_counters,
1412
shared::configuration::CrashtrackerReceiverConfig,
@@ -65,10 +63,6 @@ pub fn on_fork(
6563

6664
// panic hook is unaffected by fork.
6765

68-
// Reset the thread context buffer so the forked child starts clean.
69-
#[cfg(target_os = "linux")]
70-
crate::collector::thread_context_buffer::reset_thread_context_buffer();
71-
7266
update_metadata(metadata)?;
7367
update_config(config)?;
7468
Receiver::update_stored_config(receiver_config)?;
@@ -95,11 +89,6 @@ pub fn init(
9589
Receiver::update_stored_config(receiver_config)?;
9690
register_crash_handlers(&config)?;
9791
register_panic_hook()?;
98-
if config.collect_all_threads() {
99-
#[cfg(target_os = "linux")]
100-
crate::collector::thread_context_buffer::init_thread_context_buffer(config.max_threads());
101-
register_thread_context_signal_handler()?;
102-
}
10392
enable();
10493
Ok(())
10594
}

libdd-crashtracker/src/collector/collector_manager.rs

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,20 @@ impl Collector {
5757
tid,
5858
);
5959
}
60-
pid if pid > 0 => Ok(Self {
61-
handle: ProcessHandle::new(receiver.handle.uds_fd, Some(pid)),
62-
}),
60+
child_pid if child_pid > 0 => {
61+
// Parent process; enable ptrace permission for child if multi-thread collection is enabled
62+
#[cfg(target_os = "linux")]
63+
if config.collect_all_threads() {
64+
// Allow the collector child to ptrace this process for thread context collection
65+
// SAFETY: prctl is async-signal-safe and we're just setting ptrace permissions
66+
unsafe {
67+
libc::prctl(libc::PR_SET_PTRACER, child_pid as libc::c_ulong, 0, 0, 0);
68+
}
69+
}
70+
Ok(Self {
71+
handle: ProcessHandle::new(receiver.handle.uds_fd, Some(child_pid)),
72+
})
73+
},
6374
code => {
6475
// Error
6576
Err(CollectorSpawnError::ForkFailed(code))

libdd-crashtracker/src/collector/crash_handler.rs

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -301,19 +301,6 @@ fn handle_posix_signal_impl(
301301

302302
let timeout_manager = TimeoutManager::new(config.timeout());
303303

304-
// Phase 2: collect ucontexts from all other threads before forking, so the
305-
// collector child can unwind their stacks.
306-
#[cfg(target_os = "linux")]
307-
if config.collect_all_threads() {
308-
use super::thread_context_buffer::collect_thread_contexts;
309-
// Timeout for context collection: half of the overall crash timeout, capped
310-
// at 200ms to avoid delaying the crash report excessively.
311-
let context_timeout_ms = (config.timeout().as_millis() / 2).min(200) as u64;
312-
// SAFETY: current_tid() is the TID of the crashing thread -- we skip it.
313-
let crashing_tid = unsafe { libc::syscall(libc::SYS_gettid) as libc::pid_t };
314-
collect_thread_contexts(crashing_tid, config.max_threads(), context_timeout_ms);
315-
}
316-
317304
let receiver = Receiver::from_crashtracker_config(config)?;
318305

319306
let collector = Collector::spawn(

libdd-crashtracker/src/collector/emitters.rs

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -531,53 +531,84 @@ fn enumerate_task_tids(pid: libc::pid_t) -> Vec<libc::pid_t> {
531531

532532
/// Emit thread blocks for all threads other than the crashing thread.
533533
///
534-
/// Called from the collector child (after fork), so std::fs is safe to use.
534+
/// Called from the collector child (after fork), so std::fs and ptrace are safe to use.
535+
/// Uses a streaming approach to avoid allocating vectors or hashmaps.
535536
/// For each thread:
537+
/// - Uses ptrace to capture thread context (registers + stack)
536538
/// - Reads name and state from /proc/<ppid>/task/<tid>/
537-
/// - If a ucontext was captured (collect_all_threads), emits the stack trace.
538-
/// - Otherwise emits an empty (incomplete) stack trace.
539+
/// - Immediately emits the thread block without intermediate storage
539540
#[cfg(target_os = "linux")]
540541
fn emit_all_threads(
541542
w: &mut impl Write,
542543
config: &CrashtrackerConfiguration,
543544
ppid: libc::pid_t,
544545
crashing_tid: libc::pid_t,
545546
) -> Result<(), EmitterError> {
546-
use crate::collector::thread_context_buffer::iter_collected_contexts;
547-
548-
// Build a map from TID to captured ucontext pointer (may be empty if buffer
549-
// was not initialised or the thread did not respond in time).
550-
let contexts: std::collections::HashMap<libc::pid_t, *const ucontext_t> =
551-
iter_collected_contexts()
552-
.map(|c| (c.tid, c.ucontext))
553-
.collect();
554-
555-
let tids = enumerate_task_tids(ppid);
556-
let max = config.max_threads();
557-
let mut emitted = 0;
558-
559-
for tid in tids {
560-
if tid == crashing_tid {
561-
continue;
562-
}
563-
if emitted >= max {
564-
break;
565-
}
547+
use crate::collector::ptrace_collector::stream_thread_contexts;
548+
use std::time::Duration;
549+
550+
// Calculate timeout for ptrace operations
551+
let context_timeout = Duration::from_millis((config.timeout().as_millis() / 2).min(200) as u64);
552+
553+
let result = stream_thread_contexts(
554+
ppid,
555+
crashing_tid,
556+
config.max_threads(),
557+
context_timeout,
558+
|tid, captured_context| {
559+
// Read thread metadata from /proc
560+
let name = read_thread_name(ppid, tid).unwrap_or_else(|| tid.to_string());
561+
let state = read_thread_state(ppid, tid);
562+
563+
// Get ucontext pointer if we captured context for this thread
564+
let ucontext = captured_context.map(|ctx| &ctx.ucontext as *const _);
565+
566+
// Immediately emit the thread block
567+
match emit_thread_block(
568+
w,
569+
tid,
570+
false,
571+
&name,
572+
state.as_deref(),
573+
config.resolve_frames(),
574+
ucontext,
575+
) {
576+
Ok(()) => true, // Continue with next thread
577+
Err(_) => false, // Stop iteration on write error
578+
}
579+
},
580+
);
581+
582+
// Handle the case where ptrace setup fails entirely
583+
if result.is_err() {
584+
// Fall back to thread enumeration without context capture
585+
// This provides basic thread information even when ptrace fails
586+
let tids = enumerate_task_tids(ppid);
587+
let max = config.max_threads();
588+
let mut emitted = 0;
589+
590+
for tid in tids {
591+
if tid == crashing_tid {
592+
continue;
593+
}
594+
if emitted >= max {
595+
break;
596+
}
566597

567-
let name = read_thread_name(ppid, tid).unwrap_or_else(|| tid.to_string());
568-
let state = read_thread_state(ppid, tid);
569-
let ucontext = contexts.get(&tid).copied();
570-
571-
emit_thread_block(
572-
w,
573-
tid,
574-
false,
575-
&name,
576-
state.as_deref(),
577-
config.resolve_frames(),
578-
ucontext,
579-
)?;
580-
emitted += 1;
598+
let name = read_thread_name(ppid, tid).unwrap_or_else(|| tid.to_string());
599+
let state = read_thread_state(ppid, tid);
600+
601+
emit_thread_block(
602+
w,
603+
tid,
604+
false,
605+
&name,
606+
state.as_deref(),
607+
config.resolve_frames(),
608+
None, // No context available
609+
)?;
610+
emitted += 1;
611+
}
581612
}
582613

583614
Ok(())

libdd-crashtracker/src/collector/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mod saguard;
1414
mod signal_handler_manager;
1515
mod spans;
1616
#[cfg(target_os = "linux")]
17-
pub(crate) mod thread_context_buffer;
17+
pub(crate) mod ptrace_collector;
1818

1919
pub use additional_tags::{
2020
clear_additional_tags, consume_and_emit_additional_tags, insert_additional_tag,

0 commit comments

Comments
 (0)