@@ -99,7 +99,7 @@ static BUFFER: AtomicPtr<ThreadContextBuffer> = AtomicPtr::new(ptr::null_mut());
9999
100100/// Allocate the thread context buffer.
101101///
102- /// Must be called once from a non-signal context (typically inside `init()`).
102+ /// Must be called once from a non-signal context (inside `init()`).
103103/// Subsequent calls are a no-op.
104104pub fn init_thread_context_buffer ( capacity : usize ) {
105105 let capacity = capacity. clamp ( 1 , MAX_TRACKED_THREADS ) ;
@@ -164,7 +164,7 @@ pub(crate) extern "C" fn handle_collect_context_signal(
164164 if slot. tid . load ( Ordering :: SeqCst ) == tid {
165165 if !ucontext. is_null ( ) {
166166 // SAFETY: ucontext is the kernel-saved register state, valid for the
167- // duration of the signal handler. The slot ctx field is not accessed
167+ // duration of the signal handler. The slot ctx field is not accessed
168168 // by anyone else until ready is observed as true.
169169 unsafe {
170170 ptr:: copy_nonoverlapping (
@@ -279,7 +279,7 @@ fn parse_tid(bytes: &[u8]) -> Option<libc::pid_t> {
279279/// then wait for each contacted thread to write its ucontext into its slot.
280280///
281281/// Called from the primary crash signal handler before fork()-ing the collector
282- /// child. Must be async-signal-safe.
282+ /// child. Must be async-signal-safe.
283283///
284284/// - `crashing_tid` TID of the thread that received the fatal signal (skipped).
285285/// - `max_threads` cap on how many threads to sample.
@@ -380,6 +380,37 @@ pub fn iter_collected_contexts() -> impl Iterator<Item = CollectedContext> {
380380#[ cfg( test) ]
381381mod tests {
382382 use super :: * ;
383+ use std:: sync:: Arc ;
384+ use std:: time:: Duration ;
385+
386+ /// Claim the first free slot for `tid`. Returns the slot index on success.
387+ fn claim_slot ( buf : & ThreadContextBuffer , tid : i32 ) -> Option < usize > {
388+ buf. slots . iter ( ) . enumerate ( ) . find_map ( |( i, slot) | {
389+ slot. tid
390+ . compare_exchange ( 0 , tid, Ordering :: SeqCst , Ordering :: Relaxed )
391+ . ok ( )
392+ . map ( |_| i)
393+ } )
394+ }
395+
396+ /// Install `handle_collect_context_signal` as the SIGUSR2 handler and return
397+ /// the previous `sigaction` so the caller can restore it.
398+ fn install_context_handler ( ) -> libc:: sigaction {
399+ let mut sa: libc:: sigaction = unsafe { std:: mem:: zeroed ( ) } ;
400+ sa. sa_sigaction = handle_collect_context_signal as * const ( ) as usize ;
401+ sa. sa_flags = libc:: SA_SIGINFO | libc:: SA_RESTART | libc:: SA_NODEFER ;
402+ let mut old: libc:: sigaction = unsafe { std:: mem:: zeroed ( ) } ;
403+ unsafe { libc:: sigaction ( libc:: SIGUSR2 , & sa, & mut old) } ;
404+ old
405+ }
406+
407+ fn restore_handler ( old : & libc:: sigaction ) {
408+ unsafe { libc:: sigaction ( libc:: SIGUSR2 , old, std:: ptr:: null_mut ( ) ) } ;
409+ }
410+
411+ fn current_tid ( ) -> i32 {
412+ unsafe { libc:: syscall ( libc:: SYS_gettid ) as i32 }
413+ }
383414
384415 #[ test]
385416 fn test_parse_tid_valid ( ) {
@@ -419,4 +450,186 @@ mod tests {
419450 assert_eq ! ( buf. slots[ 0 ] . tid. load( Ordering :: SeqCst ) , 0 ) ;
420451 assert ! ( !buf. slots[ 0 ] . ready. load( Ordering :: SeqCst ) ) ;
421452 }
453+
454+ #[ test]
455+ fn test_signal_handler_writes_context_and_sets_ready ( ) {
456+ init_thread_context_buffer ( 16 ) ;
457+ reset_thread_context_buffer ( ) ;
458+
459+ let tid = current_tid ( ) ;
460+ let buf = get_buffer ( ) . unwrap ( ) ;
461+ let idx = claim_slot ( buf, tid) . expect ( "should find a free slot" ) ;
462+
463+ let mut fake_ctx: libc:: ucontext_t = unsafe { std:: mem:: zeroed ( ) } ;
464+ fake_ctx. uc_flags = 0x1234_5678 ;
465+
466+ handle_collect_context_signal (
467+ libc:: SIGUSR2 ,
468+ std:: ptr:: null_mut ( ) ,
469+ & mut fake_ctx as * mut libc:: ucontext_t as * mut libc:: c_void ,
470+ ) ;
471+
472+ let slot = & buf. slots [ idx] ;
473+ assert ! (
474+ slot. ready. load( Ordering :: Acquire ) ,
475+ "slot should be marked ready after handler runs"
476+ ) ;
477+ // Verify the context bytes were actually copied.
478+ let copied_flags = unsafe { ( * ( * slot. ctx . get ( ) ) . as_ptr ( ) ) . uc_flags } ;
479+ assert_eq ! (
480+ copied_flags, 0x1234_5678 ,
481+ "uc_flags sentinel should survive the copy"
482+ ) ;
483+
484+ reset_thread_context_buffer ( ) ;
485+ }
486+
487+ /// Even with a null ucontext (kernel gave us nothing), the slot must be
488+ /// marked ready so the spin-wait in collect_thread_contexts can complete.
489+ #[ test]
490+ fn test_signal_handler_null_ucontext_still_sets_ready ( ) {
491+ init_thread_context_buffer ( 16 ) ;
492+ reset_thread_context_buffer ( ) ;
493+
494+ let tid = current_tid ( ) ;
495+ let buf = get_buffer ( ) . unwrap ( ) ;
496+ let idx = claim_slot ( buf, tid) . expect ( "should find a free slot" ) ;
497+
498+ handle_collect_context_signal (
499+ libc:: SIGUSR2 ,
500+ std:: ptr:: null_mut ( ) ,
501+ std:: ptr:: null_mut ( ) , // null ucontext
502+ ) ;
503+
504+ assert ! (
505+ buf. slots[ idx] . ready. load( Ordering :: Acquire ) ,
506+ "null ucontext should still set ready so the caller doesn't stall"
507+ ) ;
508+
509+ reset_thread_context_buffer ( ) ;
510+ }
511+
512+ /// The handler must silently do nothing when the current TID has no slot
513+ /// claimed for it
514+ #[ test]
515+ fn test_signal_handler_ignores_unclaimed_tid ( ) {
516+ init_thread_context_buffer ( 16 ) ;
517+ reset_thread_context_buffer ( ) ; // all slots tid=0, ready=false
518+
519+ let mut fake_ctx: libc:: ucontext_t = unsafe { std:: mem:: zeroed ( ) } ;
520+ // Call with no slot pre-claimed for the current TID.
521+ handle_collect_context_signal (
522+ libc:: SIGUSR2 ,
523+ std:: ptr:: null_mut ( ) ,
524+ & mut fake_ctx as * mut libc:: ucontext_t as * mut libc:: c_void ,
525+ ) ;
526+
527+ let buf = get_buffer ( ) . unwrap ( ) ;
528+ let tid = current_tid ( ) ;
529+ for slot in & buf. slots {
530+ assert ! (
531+ !slot. ready. load( Ordering :: Acquire ) ,
532+ "no slot should be ready when TID was not pre-claimed"
533+ ) ;
534+ assert_ne ! (
535+ slot. tid. load( Ordering :: Acquire ) ,
536+ tid,
537+ "handler must not claim a slot for an unclaimed TID"
538+ ) ;
539+ }
540+
541+ reset_thread_context_buffer ( ) ;
542+ }
543+
544+ /// The crashing thread shouldn't appear as one of the captured threads
545+ #[ test]
546+ #[ cfg_attr( miri, ignore) ] // sigaction is not supported by Miri
547+ fn test_collect_never_captures_crashing_tid ( ) {
548+ init_thread_context_buffer ( 16 ) ;
549+ reset_thread_context_buffer ( ) ;
550+
551+ let old_sa = install_context_handler ( ) ;
552+ let crashing_tid = unsafe { libc:: syscall ( libc:: SYS_gettid ) as libc:: pid_t } ;
553+
554+ collect_thread_contexts ( crashing_tid, 16 , 300 ) ;
555+
556+ let buf = get_buffer ( ) . unwrap ( ) ;
557+ for slot in & buf. slots {
558+ assert_ne ! (
559+ slot. tid. load( Ordering :: Acquire ) ,
560+ crashing_tid as i32 ,
561+ "crashing TID ({crashing_tid}) must not appear in any slot"
562+ ) ;
563+ }
564+
565+ restore_handler ( & old_sa) ;
566+ reset_thread_context_buffer ( ) ;
567+ }
568+
569+ /// Two background threads sleeping in a syscall should both respond to
570+ /// SIGUSR2 and have their contexts captured before the timeout fires.
571+ ///
572+ /// We record each worker's TID before they block and verify that exactly
573+ /// those TIDs appear in the buffer afterwards.
574+ #[ test]
575+ #[ cfg_attr( miri, ignore) ] // sigaction is not supported by Miri
576+ fn test_collect_captures_background_threads ( ) {
577+ init_thread_context_buffer ( 16 ) ;
578+ reset_thread_context_buffer ( ) ;
579+
580+ let old_sa = install_context_handler ( ) ;
581+
582+ let ( tx, rx) = std:: sync:: mpsc:: channel :: < i32 > ( ) ;
583+ let keep = Arc :: new ( std:: sync:: atomic:: AtomicBool :: new ( true ) ) ;
584+
585+ let handles: Vec < _ > = ( 0 ..2 )
586+ . map ( |_| {
587+ let tx = tx. clone ( ) ;
588+ let keep = Arc :: clone ( & keep) ;
589+ std:: thread:: spawn ( move || {
590+ tx. send ( current_tid ( ) ) . unwrap ( ) ;
591+ while keep. load ( Ordering :: Relaxed ) {
592+ std:: thread:: sleep ( Duration :: from_millis ( 20 ) ) ;
593+ }
594+ } )
595+ } )
596+ . collect ( ) ;
597+
598+ // Receive exactly one TID per worker
599+ drop ( tx) ;
600+ let worker_tids: Vec < i32 > = ( 0 ..2 ) . map ( |_| rx. recv ( ) . unwrap ( ) ) . collect ( ) ;
601+ std:: thread:: sleep ( Duration :: from_millis ( 50 ) ) ;
602+
603+ let crashing_tid = unsafe { libc:: syscall ( libc:: SYS_gettid ) as libc:: pid_t } ;
604+ collect_thread_contexts ( crashing_tid, 16 , 500 ) ;
605+
606+ let buf = get_buffer ( ) . unwrap ( ) ;
607+
608+ // Both workers must have responded and their contexts must be in the buffer.
609+ for & wtid in & worker_tids {
610+ let found = buf
611+ . slots
612+ . iter ( )
613+ . any ( |s| s. tid . load ( Ordering :: Acquire ) == wtid && s. ready . load ( Ordering :: Acquire ) ) ;
614+ assert ! (
615+ found,
616+ "worker TID {wtid} should have its context captured in the buffer"
617+ ) ;
618+ }
619+
620+ // The crashing TID must still be absent.
621+ assert ! (
622+ buf. slots
623+ . iter( )
624+ . all( |s| s. tid. load( Ordering :: Acquire ) != crashing_tid as i32 ) ,
625+ "crashing TID ({crashing_tid}) must not be in the buffer"
626+ ) ;
627+
628+ keep. store ( false , Ordering :: Relaxed ) ;
629+ for h in handles {
630+ let _ = h. join ( ) ;
631+ }
632+ restore_handler ( & old_sa) ;
633+ reset_thread_context_buffer ( ) ;
634+ }
422635}
0 commit comments