Skip to content

Commit 43f6204

Browse files
prontclaude
andauthored
fix(observability): fix panic in allocation tracing dealloc path (#25136)
* fix(observability): fix panic in allocation tracing when deallocating pre-tracking memory When `--allocation-tracing` is enabled at runtime, the custom allocator wraps every allocation with an extra byte to store the allocation group ID. Previously, allocations made before tracking was enabled used the original (unwrapped) layout. When those were later freed, `dealloc` read an out-of-bounds byte as the group ID, hitting `NonZeroU8::new_unchecked(0)` -- undefined behavior that recent Rust toolchains (>= ~1.78) turn into an abort in debug builds. Additionally, reentrant allocations (wrapped layout but tracing closure skipped) left the group ID header uninitialized, causing misattributed deallocations and skewed per-group memory accounting. Fix: always allocate with the wrapped layout, regardless of whether tracking is currently enabled. The group ID header byte is set to: - UNTRACKED (0): tracking was off at allocation time - UNTRACED (u8::MAX): tracking was on but the tracing closure was skipped due to reentrancy - A real group ID (1..254): normal traced allocation On deallocation, all paths free with the wrapped layout (which is always correct now). UNTRACKED and UNTRACED skip `trace_deallocation` to keep per-group accounting balanced. This eliminates: - The original panic/UB from `NonZero::new_unchecked(0)` - Layout mismatches for pre-tracking allocations (including realloc) - Accounting skew from uninitialized headers on reentrant allocations This bug has been latent since #15221 (Nov 2022) which introduced the runtime toggle. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * add authors * fix(observability): strengthen allocation tracing tests Harden test coverage based on code review feedback: - Assert exact ROOT group ID and trace counters in tracked alloc test - Add end-to-end reentrant allocation test that exercises the real reentrancy path via thread-local borrow - Remove redundant manual-sentinel test now covered by reentrant test - Rename sentinel collision test to reflect what it actually checks Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3112033 commit 43f6204

2 files changed

Lines changed: 192 additions & 13 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a panic (abort) when running with `--allocation-tracing` in debug builds, caused by deallocating memory that was allocated before tracking was enabled. Also fixed per-group memory accounting skew for reentrant allocations whose tracing closure was skipped, which left the group ID header uninitialized and caused deallocations to be attributed to wrong groups.
2+
3+
authors: pront

src/internal_telemetry/allocations/allocator/tracing_allocator.rs

Lines changed: 189 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,24 @@ use super::{
99
};
1010
use crate::internal_telemetry::allocations::TRACK_ALLOCATIONS;
1111

12+
/// Header value for allocations made while tracking is disabled.
13+
/// On deallocation we free with the wrapped layout but skip tracing.
14+
const UNTRACKED: u8 = 0;
15+
16+
/// Header value for allocations whose tracing closure was skipped due to
17+
/// reentrancy. `register()` never hands out `u8::MAX`, so this cannot
18+
/// collide with a real group ID. On deallocation we free with the wrapped
19+
/// layout but skip `trace_deallocation` to keep accounting balanced.
20+
const UNTRACED: u8 = u8::MAX;
21+
1222
/// A tracing allocator that groups allocation events by groups.
1323
///
24+
/// Every allocation made through this allocator uses the "wrapped" layout:
25+
/// the requested layout extended by one byte to store an allocation group
26+
/// ID. This byte is always present regardless of whether tracking is
27+
/// enabled, which guarantees that `dealloc` can always read a valid header
28+
/// and free with the correct (wrapped) layout.
29+
///
1430
/// This allocator can only be used when specified via `#[global_allocator]`.
1531
pub struct GroupedTraceableAllocator<A, T> {
1632
allocator: A,
@@ -29,11 +45,6 @@ unsafe impl<A: GlobalAlloc, T: Tracer> GlobalAlloc for GroupedTraceableAllocator
2945
#[inline]
3046
unsafe fn alloc(&self, object_layout: Layout) -> *mut u8 {
3147
unsafe {
32-
if !TRACK_ALLOCATIONS.load(Ordering::Relaxed) {
33-
return self.allocator.alloc(object_layout);
34-
}
35-
36-
// Allocate our wrapped layout and make sure the allocation succeeded.
3748
let (actual_layout, offset_to_group_id) = get_wrapped_layout(object_layout);
3849
let actual_ptr = self.allocator.alloc(actual_layout);
3950
if actual_ptr.is_null() {
@@ -42,6 +53,16 @@ unsafe impl<A: GlobalAlloc, T: Tracer> GlobalAlloc for GroupedTraceableAllocator
4253

4354
let group_id_ptr = actual_ptr.add(offset_to_group_id).cast::<u8>();
4455

56+
if !TRACK_ALLOCATIONS.load(Ordering::Relaxed) {
57+
group_id_ptr.write(UNTRACKED);
58+
return actual_ptr;
59+
}
60+
61+
// Write the untraced sentinel so that `dealloc` always finds a
62+
// valid header, even when the tracing closure below is skipped
63+
// due to reentrancy.
64+
group_id_ptr.write(UNTRACED);
65+
4566
let object_size = object_layout.size();
4667

4768
try_with_suspended_allocation_group(
@@ -58,19 +79,19 @@ unsafe impl<A: GlobalAlloc, T: Tracer> GlobalAlloc for GroupedTraceableAllocator
5879
#[inline]
5980
unsafe fn dealloc(&self, object_ptr: *mut u8, object_layout: Layout) {
6081
unsafe {
61-
if !TRACK_ALLOCATIONS.load(Ordering::Relaxed) {
62-
self.allocator.dealloc(object_ptr, object_layout);
63-
return;
64-
}
65-
// Regenerate the wrapped layout so we know where we have to look, as the pointer we've given relates to the
66-
// requested layout, not the wrapped layout that was actually allocated.
6782
let (wrapped_layout, offset_to_group_id) = get_wrapped_layout(object_layout);
68-
6983
let raw_group_id = object_ptr.add(offset_to_group_id).cast::<u8>().read();
7084

71-
// Deallocate before tracking, just to make sure we're reclaiming memory as soon as possible.
85+
// Always free with the wrapped layout since all allocations
86+
// (tracked or not) use it.
7287
self.allocator.dealloc(object_ptr, wrapped_layout);
7388

89+
// Skip tracing for untracked (tracking was off) and untraced
90+
// (reentrant, closure skipped) allocations.
91+
if raw_group_id == UNTRACKED || raw_group_id == UNTRACED {
92+
return;
93+
}
94+
7495
let object_size = object_layout.size();
7596
let source_group_id = AllocationGroupId::from_raw(raw_group_id);
7697

@@ -96,3 +117,158 @@ fn get_wrapped_layout(object_layout: Layout) -> (Layout, usize) {
96117

97118
(actual_layout.pad_to_align(), offset_to_group_id)
98119
}
120+
121+
#[cfg(test)]
122+
mod tests {
123+
use std::{
124+
alloc::{GlobalAlloc, Layout, System},
125+
sync::atomic::{AtomicUsize, Ordering},
126+
};
127+
128+
use serial_test::serial;
129+
130+
use super::*;
131+
use crate::internal_telemetry::allocations::allocator::{
132+
token::AllocationGroupId, tracer::Tracer,
133+
};
134+
135+
/// RAII guard that enables `TRACK_ALLOCATIONS` on creation and
136+
/// restores it to `false` on drop, ensuring cleanup even if the
137+
/// test panics.
138+
struct TrackingGuard;
139+
140+
impl TrackingGuard {
141+
fn enable() -> Self {
142+
TRACK_ALLOCATIONS.store(true, Ordering::Release);
143+
Self
144+
}
145+
}
146+
147+
impl Drop for TrackingGuard {
148+
fn drop(&mut self) {
149+
TRACK_ALLOCATIONS.store(false, Ordering::Release);
150+
}
151+
}
152+
153+
struct CountingTracer {
154+
allocs: AtomicUsize,
155+
deallocs: AtomicUsize,
156+
}
157+
158+
impl CountingTracer {
159+
const fn new() -> Self {
160+
Self {
161+
allocs: AtomicUsize::new(0),
162+
deallocs: AtomicUsize::new(0),
163+
}
164+
}
165+
}
166+
167+
impl Tracer for CountingTracer {
168+
fn trace_allocation(&self, _size: usize, _group_id: AllocationGroupId) {
169+
self.allocs.fetch_add(1, Ordering::Relaxed);
170+
}
171+
172+
fn trace_deallocation(&self, _size: usize, _source_group_id: AllocationGroupId) {
173+
self.deallocs.fetch_add(1, Ordering::Relaxed);
174+
}
175+
}
176+
177+
#[test]
178+
fn sentinels_do_not_collide_with_root_id() {
179+
assert_eq!(UNTRACED, u8::MAX);
180+
assert_ne!(UNTRACED, AllocationGroupId::ROOT.as_raw());
181+
assert_ne!(UNTRACKED, AllocationGroupId::ROOT.as_raw());
182+
}
183+
184+
/// Allocations made while tracking is disabled get UNTRACKED (0) in the
185+
/// header. Deallocating them (whether tracking is on or off) must use
186+
/// the wrapped layout and skip tracing.
187+
#[test]
188+
#[serial]
189+
fn untracked_alloc_dealloc_skips_tracing() {
190+
let allocator = GroupedTraceableAllocator::new(System, CountingTracer::new());
191+
let layout = Layout::from_size_align(64, 8).unwrap();
192+
193+
// Tracking starts off (default state). Allocate while disabled.
194+
let ptr = unsafe { allocator.alloc(layout) };
195+
assert!(!ptr.is_null());
196+
197+
// Header must be UNTRACKED.
198+
let (_, offset) = get_wrapped_layout(layout);
199+
let raw_id = unsafe { ptr.add(offset).cast::<u8>().read() };
200+
assert_eq!(raw_id, UNTRACKED);
201+
202+
// Enable tracking, then dealloc -- must not panic, no trace events.
203+
let _guard = TrackingGuard::enable();
204+
unsafe { allocator.dealloc(ptr, layout) };
205+
206+
assert_eq!(allocator.tracer.allocs.load(Ordering::Relaxed), 0);
207+
assert_eq!(allocator.tracer.deallocs.load(Ordering::Relaxed), 0);
208+
}
209+
210+
/// Tracked allocation: header is a valid non-zero, non-sentinel group
211+
/// ID, tracing fires for both alloc and dealloc, and dealloc completes
212+
/// without panic.
213+
#[test]
214+
#[serial]
215+
fn tracked_alloc_dealloc_does_not_panic() {
216+
let allocator = GroupedTraceableAllocator::new(System, CountingTracer::new());
217+
let layout = Layout::from_size_align(64, 8).unwrap();
218+
219+
let _guard = TrackingGuard::enable();
220+
let ptr = unsafe { allocator.alloc(layout) };
221+
assert!(!ptr.is_null());
222+
223+
let (_, offset) = get_wrapped_layout(layout);
224+
let raw_id = unsafe { ptr.add(offset).cast::<u8>().read() };
225+
assert_eq!(
226+
raw_id,
227+
AllocationGroupId::ROOT.as_raw(),
228+
"header must be the ROOT group ID"
229+
);
230+
assert_eq!(allocator.tracer.allocs.load(Ordering::Relaxed), 1);
231+
232+
unsafe { allocator.dealloc(ptr, layout) };
233+
234+
assert_eq!(allocator.tracer.deallocs.load(Ordering::Relaxed), 1);
235+
}
236+
237+
/// End-to-end reentrant allocation: hold a mutable borrow on the
238+
/// thread-local group stack so `try_with_suspended_allocation_group`
239+
/// skips the tracing closure. The header must be UNTRACED and both
240+
/// trace counters must stay at zero through alloc + dealloc.
241+
#[test]
242+
#[serial]
243+
fn reentrant_alloc_writes_untraced_and_skips_tracing() {
244+
use crate::internal_telemetry::allocations::allocator::token::LOCAL_ALLOCATION_GROUP_STACK;
245+
246+
let allocator = GroupedTraceableAllocator::new(System, CountingTracer::new());
247+
let layout = Layout::from_size_align(64, 8).unwrap();
248+
249+
let _guard = TrackingGuard::enable();
250+
251+
// Hold a mutable borrow to simulate reentrancy -- any nested
252+
// `try_borrow_mut` inside `try_with_suspended_allocation_group`
253+
// will fail, causing the tracing closure to be skipped.
254+
LOCAL_ALLOCATION_GROUP_STACK.with(|group_stack| {
255+
let _borrow = group_stack.borrow_mut();
256+
257+
let ptr = unsafe { allocator.alloc(layout) };
258+
assert!(!ptr.is_null());
259+
260+
let (_, offset) = get_wrapped_layout(layout);
261+
let raw_id = unsafe { ptr.add(offset).cast::<u8>().read() };
262+
assert_eq!(
263+
raw_id, UNTRACED,
264+
"reentrant alloc must write UNTRACED sentinel"
265+
);
266+
267+
assert_eq!(allocator.tracer.allocs.load(Ordering::Relaxed), 0);
268+
269+
unsafe { allocator.dealloc(ptr, layout) };
270+
271+
assert_eq!(allocator.tracer.deallocs.load(Ordering::Relaxed), 0);
272+
});
273+
}
274+
}

0 commit comments

Comments
 (0)