88//! Since `rustc` doesn't currently support the TLSDESC dialect, we use a C shim to set and get the
99//! thread-local storage used for the context.
1010//!
11- //! As for [process context][crate::otel_process_ctx], we use atomics as an inter-process
12- //! synchronization method; see the discussion there about assumptions and pitfalls.
11+ //! ## Synchronization
12+ //!
13+ //! Readers are constrained to the same thread as the writer and operate like async-signal
14+ //! handlers: the writer thread is always stopped while a reader runs. There is thus no
15+ //! cross-thread synchronization concerns. The only hazard is compiler reordering or not committing
16+ //! writes to memory, which is handled using volatile writes.
1317
1418#[ cfg( target_os = "linux" ) ]
1519pub mod linux {
16- use std:: sync:: atomic:: { AtomicPtr , AtomicU8 , Ordering } ;
1720 use std:: {
1821 ffi:: c_void,
1922 mem,
2023 ops:: { Deref , DerefMut } ,
2124 ptr:: { self , NonNull } ,
22- sync:: atomic:: fence,
2325 } ;
2426
2527 extern "C" {
2628 /// Return the address of the current thread's `custom_labels_current_set_v2` local.
2729 fn libdd_get_custom_labels_current_set_v2 ( ) -> * mut * mut c_void ;
2830 }
2931
30- /// Return a pointer to the TLS slot holding the context. The address calculation requires to
32+ /// Return a pointer to the TLS slot holding the context. The address calculation requires a
3133 /// call to a C shim in order to use the TLSDESC dialect from Rust. Note that the returned
32- /// address is stable (per thread), so the result can be reused several time in a sequence.
34+ /// address is stable (per thread), so the result can be reused several times in a sequence.
35+ ///
36+ /// Use `write_tls_slot` to make sure writes are visible to the reader. Since we're the only
37+ /// writer, reads can be done directly.
3338 #[ allow( clippy:: missing_safety_doc) ]
34- unsafe fn get_tls_ptr ( ) -> * mut * mut c_void {
35- libdd_get_custom_labels_current_set_v2 ( )
36- }
37-
38- /// Call [get_tls_ptr], convert it to an atomic pointer, and provide proceed with the closure
39- /// `f`.
40- fn with_atomic_tls_ptr < T > ( f : impl FnOnce ( & AtomicPtr < ThreadContextRecord > ) -> T ) -> T {
41- const {
42- assert ! (
43- mem:: align_of:: <AtomicPtr <ThreadContextRecord >>( )
44- == mem:: align_of:: <* mut ThreadContextRecord >( )
45- )
46- }
47-
48- // Safety: the const assertion above ensures the alignment is correct. The TLS slot is
49- // valid for writes during the lifetime of the program, and the scopped closure pattern
50- // makes sure the atomic doesn't escape, such that we don't mix accesses.
51- unsafe {
52- f ( AtomicPtr :: from_ptr (
53- get_tls_ptr ( ) as * mut * mut ThreadContextRecord
54- ) )
55- }
39+ fn get_tls_ptr ( ) -> * mut * mut ThreadContextRecord {
40+ // Safety: this is just an FFI call, but there's no particular pre-condition to uphold: the
41+ // TLS slot should always be accessible.
42+ unsafe { libdd_get_custom_labels_current_set_v2 ( ) . cast ( ) }
5643 }
5744
5845 /// Maximum size in bytes of the `attrs_data` field.
@@ -66,38 +53,19 @@ pub mod linux {
6653 ///
6754 /// # Synchronization
6855 ///
69- /// `valid` is the inter-process synchronization point between the writer and the reader, is
70- /// modified atomically and protected by appropriate fences.
56+ /// Readers are async-signal handlers on the same thread; the writer is always stopped while a
57+ /// reader runs. `valid` is written with `ptr::write_volatile` so the compiler cannot reorder
58+ /// its store past the surrounding field writes:
7159 ///
72- /// - The writer sets `valid = 1` *after* all other fields are populated.
73- /// - The writer sets `valid = 0` *before* modifying fields during a reattach .
60+ /// - The writer sets `valid = 1` *after* all other fields are populated (publish) .
61+ /// - The writer sets `valid = 0` *before* modifying fields in-place (modify) .
7462 #[ repr( C ) ]
7563 pub struct ThreadContextRecord {
7664 /// 128-bit trace identifier; all-zeroes means "no trace".
7765 pub trace_id : [ u8 ; 16 ] ,
7866 /// 64-bit span identifier.
7967 pub span_id : [ u8 ; 8 ] ,
80- /// Whether the record is ready/consistent.
81- ///
82- /// # Memory ordering
83- ///
84- /// We ensure all accesses to `valid` use `Ordering::SeqCst`. Indeed, the pattern is the
85- /// following: we set `valid` from 1 to 0, start modifying the record, and set it back to
86- /// 1. We want to avoid having a reader observing the modifications while still seeing
87- /// valid being set to 1. My current understanding is that doing with acquire-release
88- /// ordering is not possible, because **there's no such thing as an acquire-store**. So if
89- /// a reader sees `valid` set to 1, it doesn't establish any synchronization with the
90- /// `valid := 0` assignment, and thus there's no ordering guarantees of respective memory
91- /// operations.
92- ///
93- /// Using sequential consistency forces all stores and loads to `valid` to be totally
94- /// ordered, which makes the re-ordering impossible: if a reader sees `1`, then the read
95- /// happens-before the `valid := 0`, which happens-before the modifications.
96- ///
97- /// Note that there's still an ABA problem: while a reader has seen `0` and started reading
98- /// the record, a writer could sneak in, and modify it before the reader finishes. However
99- /// this is currently a problem of the specification, not of this particular
100- /// implementation.
68+ /// Whether the record is ready/consistent. Written with `ptr::write_volatile`.
10169 pub valid : u8 ,
10270 pub _reserved : u8 ,
10371 /// Number of populated bytes in `attrs_data`.
@@ -131,13 +99,12 @@ pub mod linux {
13199 record
132100 }
133101
134- /// Set the `valid` field to 1 atomically.
135- pub fn set_valid_atomically ( & mut self , value : u8 , ordering : Ordering ) {
136- // Safety: `AtomicU8` doesn't have alignment constraint (it's 1), and we hold a unique
137- // borrow to `self`, so `self.valid` is valid for both reads and writes. Finally, the
138- // atomic is materialized for just one store, so there's no interleaving with
139- // non-atomic accesses.
140- unsafe { AtomicU8 :: from_ptr ( & mut self . valid as * mut u8 ) . store ( value, ordering) }
102+ /// Write `value` to the `valid` field using a volatile store, preventing the compiler from
103+ /// reordering this write past adjacent field writes.
104+ fn write_valid ( & mut self , value : u8 ) {
105+ // Safety: we have an exclusive borrow of `self`, so `valid` is live and valid for
106+ // writes.
107+ unsafe { ptr:: write_volatile ( & mut self . valid as * mut u8 , value) }
141108 }
142109
143110 /// Encode `attributes` into `record.attrs_data` as packed key-value records. Existing data
@@ -213,7 +180,7 @@ pub mod linux {
213180 }
214181
215182 /// Reconstruct a [ThreadContextRecord] from a raw pointer to `ThreadContextRecord` that is
216- /// either `null` or comes from [`Self::into_raw`]. Return `None` if `ptr` is null.
183+ /// either `null` or comes from [`Self::into_raw`]. Return `None` if `ptr` is null.
217184 ///
218185 /// # Safety
219186 ///
@@ -241,72 +208,59 @@ pub mod linux {
241208 }
242209
243210 impl ThreadContext {
244- /// Atomically swap a new (or previously detached), already built thread context record.
245- /// The `valid` flag is set to `1`. Return the previously attached context, if any.
211+ /// Publish a new (or previously detached) thread context record by writing its pointer
212+ /// into the TLS slot. Sets `valid = 1` before publishing. Returns the previously attached
213+ /// context, if any.
246214 pub fn attach ( mut self ) -> Option < ThreadContext > {
247- with_atomic_tls_ptr ( |tls_ptr| {
248- unsafe {
249- // Safety: `self` has ownership of the underlying record, which is alive and
250- // valid for writes.
251- //
252- // We don't need any synchronization here, since the swap's ordering ensures
253- // that a reader seeing the new pointer will see the record fully initialized
254- // and `valid` set to 0 (and the current record can't be `self`, since `attach`
255- // consumes self). Nobody could read the `valid = 0` before, since the record
256- // isn't even shared yet.
257- self . 0 . as_mut ( ) . valid = 1 ;
258- // Safety: if there is a non-null pointer in here, it came from a previous call
259- // to `ThreadContext::into_raw`
260- ThreadContext :: from_raw ( tls_ptr. swap ( self . into_raw ( ) , Ordering :: Release ) )
261- }
262- } )
215+ let slot = get_tls_ptr ( ) ;
216+ // Set `valid = 1` written before the TLS pointer is updated, so any reader that
217+ // observes the new pointer also observes `valid = 1`.
218+ self . write_valid ( 1 ) ;
219+ Self :: swap ( slot, self . 0 . as_ptr ( ) )
220+ }
221+
222+ /// Swap the current context with a pointer value. Return the previously attached context,
223+ /// if any.
224+ fn swap (
225+ slot : * mut * mut ThreadContextRecord ,
226+ tgt : * mut ThreadContextRecord ,
227+ ) -> Option < ThreadContext > {
228+ unsafe {
229+ // Safety: TLS slot is always live and valid for reads and writes.
230+ let prev = ThreadContext :: from_raw ( * slot) ;
231+ ptr:: write_volatile ( slot, tgt) ;
232+ // Safety: a non-null value in the slot came from a prior `into_raw` call.
233+ prev
234+ }
263235 }
264236
265- /// Take a closure representing an update of the thread context record, and modify the
266- /// currently attached record in-place. Take care of setting `valid` to the proper values
267- /// and related synchronization .
237+ /// Modify the currently attached record in-place. Sets `valid = 0` before the update and
238+ /// `valid = 1` after, so a reader that fires between the two writes sees an inconsistent
239+ /// record and skips it .
268240 ///
269241 /// # Error
270242 ///
271- /// Returns an error if there's currently no thread context record attached.
272- pub fn modify ( f : impl FnOnce ( & mut ThreadContextRecord ) ) -> anyhow:: Result < ( ) > {
273- with_atomic_tls_ptr ( |tls_ptr| {
274- let current_ptr = tls_ptr. swap ( ptr:: null_mut ( ) , Ordering :: Relaxed ) ;
275-
276- // Safety: if the TLS slot is set to a non-null value, this value is a thread
277- // context record owned by the current thread.
278- if let Some ( current) = unsafe { current_ptr. as_mut ( ) } {
279- current. set_valid_atomically ( 0 , Ordering :: Relaxed ) ;
280- fence ( Ordering :: SeqCst ) ;
281- f ( current) ;
282- fence ( Ordering :: SeqCst ) ;
283- current. set_valid_atomically ( 1 , Ordering :: Relaxed ) ;
284- } else {
285- return Err ( anyhow:: anyhow!( "no context currently attached" ) ) ;
286- } ;
287-
288- tls_ptr. swap ( current_ptr, Ordering :: Relaxed ) ;
289-
243+ /// Returns an error if there is no thread context record currently attached.
244+ pub fn modify ( ) -> anyhow:: Result < ( ) > {
245+ let slot = get_tls_ptr ( ) ;
246+
247+ // Safety: the tls slot is always valid for reads and writes.
248+ if let Some ( current) = unsafe { ( * slot) . as_mut ( ) } {
249+ current. write_valid ( 0 ) ;
250+ todo ! ( ) ;
251+ current. write_valid ( 1 ) ;
290252 Ok ( ( ) )
291- } )
253+ } else {
254+ Err ( anyhow:: anyhow!( "no context currently attached" ) )
255+ }
292256 }
293257
294- /// Detach the current record from the TLS slot.
295- ///
296- /// Sets the TLS pointer to `null`. If there was a record previously attached, sets `valid`
297- /// to `0` and return it.
258+ /// Detach the current record from the TLS slot. Writes null to the slot, sets `valid = 0`
259+ /// on the detached record, and returns it.
298260 pub fn detach ( ) -> Option < ThreadContext > {
299- with_atomic_tls_ptr ( |tls_ptr| {
300- let prev = tls_ptr. swap ( ptr:: null_mut ( ) , Ordering :: Relaxed ) ;
301-
302- // Safety: if the TLS slot is not null, it is pointing to a live
303- // `ThreadContextRecord` valid for writes.
304- unsafe {
305- ThreadContext :: from_raw ( prev) . map ( |mut ctx| {
306- ctx. 0 . as_mut ( ) . set_valid_atomically ( 0 , Ordering :: Relaxed ) ;
307- ctx
308- } )
309- }
261+ Self :: swap ( get_tls_ptr ( ) , ptr:: null_mut ( ) ) . map ( |mut ctx| {
262+ ctx. write_valid ( 0 ) ;
263+ ctx
310264 } )
311265 }
312266 }
0 commit comments