Skip to content

Commit ab5c4e6

Browse files
perf(prof): speedup hot path in allocator (#3505)
Co-authored-by: Levi Morrison <levi.morrison@datadoghq.com>
1 parent e2834c8 commit ab5c4e6

10 files changed

Lines changed: 208 additions & 86 deletions

File tree

profiling/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ harness = false
6262

6363
[features]
6464
default = ["io_profiling"]
65+
debug_stats = []
6566
io_profiling = []
6667
stack_walking_tests = []
6768
test = []

profiling/src/allocation/allocation_ge84.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
use crate::allocation::{
2-
collect_allocation, ALLOCATION_PROFILING_COUNT, ALLOCATION_PROFILING_SIZE,
3-
ALLOCATION_PROFILING_STATS,
2+
allocation_profiling_stats_mut, allocation_profiling_stats_should_collect, collect_allocation,
43
};
54
use crate::bindings::{self as zend};
65
use crate::{RefCellExt, PROFILER_NAME};
76
use core::{cell::Cell, ptr};
87
use lazy_static::lazy_static;
98
use libc::{c_char, c_void, size_t};
109
use log::{debug, error, trace, warn};
11-
use std::sync::atomic::Ordering::{Relaxed, SeqCst};
10+
use std::sync::atomic::Ordering::Relaxed;
11+
12+
#[cfg(feature = "debug_stats")]
13+
use crate::allocation::{ALLOCATION_PROFILING_COUNT, ALLOCATION_PROFILING_SIZE};
1214

1315
#[derive(Copy, Clone)]
1416
struct ZendMMState {
@@ -353,8 +355,10 @@ pub fn alloc_prof_rshutdown() {
353355
}
354356

355357
unsafe extern "C" fn alloc_prof_malloc(len: size_t) -> *mut c_void {
356-
ALLOCATION_PROFILING_COUNT.fetch_add(1, SeqCst);
357-
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, SeqCst);
358+
#[cfg(feature = "debug_stats")]
359+
ALLOCATION_PROFILING_COUNT.fetch_add(1, Relaxed);
360+
#[cfg(feature = "debug_stats")]
361+
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, Relaxed);
358362

359363
let ptr = tls_zend_mm_state_get!(alloc)(len);
360364

@@ -364,9 +368,7 @@ unsafe extern "C" fn alloc_prof_malloc(len: size_t) -> *mut c_void {
364368
return ptr;
365369
}
366370

367-
if ALLOCATION_PROFILING_STATS
368-
.borrow_mut_or_false(|allocations| allocations.should_collect_allocation(len))
369-
{
371+
if allocation_profiling_stats_should_collect(len) {
370372
collect_allocation(len);
371373
}
372374

@@ -422,8 +424,10 @@ unsafe fn alloc_prof_orig_free(ptr: *mut c_void) {
422424
}
423425

424426
unsafe extern "C" fn alloc_prof_realloc(prev_ptr: *mut c_void, len: size_t) -> *mut c_void {
425-
ALLOCATION_PROFILING_COUNT.fetch_add(1, SeqCst);
426-
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, SeqCst);
427+
#[cfg(feature = "debug_stats")]
428+
ALLOCATION_PROFILING_COUNT.fetch_add(1, Relaxed);
429+
#[cfg(feature = "debug_stats")]
430+
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, Relaxed);
427431

428432
let ptr = tls_zend_mm_state_get!(realloc)(prev_ptr, len);
429433

@@ -433,9 +437,7 @@ unsafe extern "C" fn alloc_prof_realloc(prev_ptr: *mut c_void, len: size_t) -> *
433437
return ptr;
434438
}
435439

436-
if ALLOCATION_PROFILING_STATS
437-
.borrow_mut_or_false(|allocations| allocations.should_collect_allocation(len))
438-
{
440+
if allocation_profiling_stats_should_collect(len) {
439441
collect_allocation(len);
440442
}
441443

profiling/src/allocation/allocation_le83.rs

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
use crate::allocation::{
2-
collect_allocation, ALLOCATION_PROFILING_COUNT, ALLOCATION_PROFILING_SIZE,
3-
ALLOCATION_PROFILING_STATS,
4-
};
1+
use crate::allocation::{allocation_profiling_stats_should_collect, collect_allocation};
52
use crate::bindings::{
63
self as zend, datadog_php_install_handler, datadog_php_zif_handler,
74
ddog_php_prof_copy_long_into_zval,
@@ -11,7 +8,10 @@ use core::{cell::Cell, ptr};
118
use lazy_static::lazy_static;
129
use libc::{c_char, c_int, c_void, size_t};
1310
use log::{debug, error, trace, warn};
14-
use std::sync::atomic::Ordering::{Relaxed, SeqCst};
11+
use std::sync::atomic::Ordering::Relaxed;
12+
13+
#[cfg(feature = "debug_stats")]
14+
use crate::allocation::{ALLOCATION_PROFILING_COUNT, ALLOCATION_PROFILING_SIZE};
1515

1616
static mut GC_MEM_CACHES_HANDLER: zend::InternalFunctionHandler = None;
1717

@@ -344,8 +344,10 @@ unsafe extern "C" fn alloc_prof_gc_mem_caches(
344344
}
345345

346346
unsafe extern "C" fn alloc_prof_malloc(len: size_t) -> *mut c_void {
347-
ALLOCATION_PROFILING_COUNT.fetch_add(1, SeqCst);
348-
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, SeqCst);
347+
#[cfg(feature = "debug_stats")]
348+
ALLOCATION_PROFILING_COUNT.fetch_add(1, Relaxed);
349+
#[cfg(feature = "debug_stats")]
350+
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, Relaxed);
349351

350352
let ptr = tls_zend_mm_state_get!(alloc)(len);
351353

@@ -355,9 +357,7 @@ unsafe extern "C" fn alloc_prof_malloc(len: size_t) -> *mut c_void {
355357
return ptr;
356358
}
357359

358-
if ALLOCATION_PROFILING_STATS
359-
.borrow_mut_or_false(|allocations| allocations.should_collect_allocation(len))
360-
{
360+
if allocation_profiling_stats_should_collect(len) {
361361
collect_allocation(len);
362362
}
363363

@@ -403,8 +403,10 @@ unsafe fn alloc_prof_orig_free(ptr: *mut c_void) {
403403
}
404404

405405
unsafe extern "C" fn alloc_prof_realloc(prev_ptr: *mut c_void, len: size_t) -> *mut c_void {
406-
ALLOCATION_PROFILING_COUNT.fetch_add(1, SeqCst);
407-
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, SeqCst);
406+
#[cfg(feature = "debug_stats")]
407+
ALLOCATION_PROFILING_COUNT.fetch_add(1, Relaxed);
408+
#[cfg(feature = "debug_stats")]
409+
ALLOCATION_PROFILING_SIZE.fetch_add(len as u64, Relaxed);
408410

409411
let ptr = tls_zend_mm_state_get!(realloc)(prev_ptr, len);
410412

@@ -414,9 +416,7 @@ unsafe extern "C" fn alloc_prof_realloc(prev_ptr: *mut c_void, len: size_t) -> *
414416
return ptr;
415417
}
416418

417-
if ALLOCATION_PROFILING_STATS
418-
.borrow_mut_or_false(|allocations| allocations.should_collect_allocation(len))
419-
{
419+
if allocation_profiling_stats_should_collect(len) {
420420
collect_allocation(len);
421421
}
422422

profiling/src/allocation/mod.rs

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
1+
mod tls_allocation_profiling_stats;
2+
3+
#[cfg(php_zend_mm_set_custom_handlers_ex)]
4+
pub mod allocation_ge84;
5+
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
6+
pub mod allocation_le83;
7+
8+
pub use tls_allocation_profiling_stats::*;
9+
110
use crate::bindings::{self as zend};
211
use crate::profiling::Profiler;
312
use crate::{RefCellExt, REQUEST_LOCALS};
413
use libc::size_t;
514
use log::{debug, error, trace};
615
use rand::rngs::ThreadRng;
716
use rand_distr::{Distribution, Poisson};
8-
use std::cell::RefCell;
917
use std::ffi::c_void;
1018
use std::sync::atomic::{AtomicU64, Ordering};
1119

12-
#[cfg(php_zend_mm_set_custom_handlers_ex)]
13-
pub mod allocation_ge84;
14-
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
15-
pub mod allocation_le83;
16-
1720
/// Default sampling interval in bytes (4MB)
1821
pub const DEFAULT_ALLOCATION_SAMPLING_INTERVAL: u64 = 1024 * 4096;
1922

@@ -24,11 +27,13 @@ pub static ALLOCATION_PROFILING_INTERVAL: AtomicU64 =
2427
/// This will store the count of allocations (including reallocations) during
2528
/// a profiling period. This will overflow when doing more than u64::MAX
2629
/// allocations, which seems big enough to ignore.
30+
#[cfg(feature = "debug_stats")]
2731
pub static ALLOCATION_PROFILING_COUNT: AtomicU64 = AtomicU64::new(0);
2832

2933
/// This will store the accumulated size of all allocations in bytes during the
3034
/// profiling period. This will overflow when allocating more than 18 exabyte
3135
/// of memory (u64::MAX) which might not happen, so we can ignore this.
36+
#[cfg(feature = "debug_stats")]
3237
pub static ALLOCATION_PROFILING_SIZE: AtomicU64 = AtomicU64::new(0);
3338

3439
pub struct AllocationProfilingStats {
@@ -42,7 +47,7 @@ impl AllocationProfilingStats {
4247
fn new() -> AllocationProfilingStats {
4348
// Safety: this will only error if lambda <= 0
4449
let poisson =
45-
Poisson::new(ALLOCATION_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64).unwrap();
50+
Poisson::new(ALLOCATION_PROFILING_INTERVAL.load(Ordering::Relaxed) as f64).unwrap();
4651
let mut stats = AllocationProfilingStats {
4752
next_sample: 0,
4853
poisson,
@@ -83,23 +88,6 @@ pub fn collect_allocation(len: size_t) {
8388
}
8489
}
8590

86-
thread_local! {
87-
static ALLOCATION_PROFILING_STATS: RefCell<AllocationProfilingStats> =
88-
RefCell::new(AllocationProfilingStats::new());
89-
}
90-
91-
pub fn alloc_prof_ginit() {
92-
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
93-
allocation_le83::alloc_prof_ginit();
94-
#[cfg(php_zend_mm_set_custom_handlers_ex)]
95-
allocation_ge84::alloc_prof_ginit();
96-
}
97-
98-
pub fn alloc_prof_gshutdown() {
99-
#[cfg(php_zend_mm_set_custom_handlers_ex)]
100-
allocation_ge84::alloc_prof_gshutdown();
101-
}
102-
10391
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
10492
pub fn alloc_prof_startup() {
10593
allocation_le83::alloc_prof_startup();
@@ -120,11 +108,11 @@ pub fn alloc_prof_first_rinit() {
120108
return;
121109
}
122110

123-
ALLOCATION_PROFILING_INTERVAL.store(sampling_distance as u64, Ordering::SeqCst);
111+
ALLOCATION_PROFILING_INTERVAL.store(sampling_distance as u64, Ordering::Relaxed);
124112

125113
trace!(
126114
"Memory allocation profiling initialized with a sampling distance of {} bytes.",
127-
ALLOCATION_PROFILING_INTERVAL.load(Ordering::SeqCst)
115+
ALLOCATION_PROFILING_INTERVAL.load(Ordering::Relaxed)
128116
);
129117
}
130118

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
//! The thread-local allocation profiling stats are held in this module.
2+
//! The stats are used on the hot-path of allocation, so this code is
3+
//! performance sensitive. It is encapsulated so that some unsafe techniques
4+
//! can be used but expose a relatively safe API.
5+
6+
use super::AllocationProfilingStats;
7+
use libc::size_t;
8+
use std::cell::UnsafeCell;
9+
use std::mem::MaybeUninit;
10+
11+
#[cfg(php_zend_mm_set_custom_handlers_ex)]
12+
use super::allocation_ge84;
13+
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
14+
use super::allocation_le83;
15+
16+
thread_local! {
17+
/// This is initialized in ginit, before any memory allocator hooks are
18+
/// installed. During a request, all accesses will be initialized.
19+
///
20+
/// This is not pub so that unsafe code can be contained to this module.
21+
static ALLOCATION_PROFILING_STATS: UnsafeCell<MaybeUninit<AllocationProfilingStats>> =
22+
const { UnsafeCell::new(MaybeUninit::uninit()) };
23+
}
24+
25+
/// Accesses the thread-local [`AllocationProfilingStats`], passing a mutable
26+
/// reference to the contained `MaybeUninit` to `F`.
27+
///
28+
/// # Safety
29+
///
30+
/// 1. There should not be any active borrows to the thread-local variable
31+
/// [`AllocationProfilingStats`] when this function is called.
32+
/// 2. Function `F` should not do anything which causes a new borrow on
33+
/// [`AllocationProfilingStats`].
34+
/// 3. Do not call this function in ALLOCATION_PROFILING_STATS's destructor,
35+
/// as it assumes that [`std::thread::LocalKey::try_with`] cannot fail.
36+
///
37+
/// This is not pub to limit caller's ability to violate these conditions.
38+
unsafe fn allocation_profiling_stats_mut<F, R>(f: F) -> R
39+
where
40+
F: FnOnce(&mut MaybeUninit<AllocationProfilingStats>) -> R,
41+
{
42+
let result = ALLOCATION_PROFILING_STATS.try_with(|cell| {
43+
let ptr: *mut MaybeUninit<AllocationProfilingStats> = cell.get();
44+
// SAFETY: the cell is statically initialized to [`MaybeUninit::uninit`] so the
45+
// _cell_ is valid and initialized memory. As required by this own
46+
// function's safety requirements, there should not be any active borrows
47+
// to [`ALLOCATION_PROFILING_STATS`], so this mutable dereference is sound.
48+
let uninit = unsafe { &mut *ptr };
49+
f(uninit)
50+
});
51+
unsafe {
52+
// SAFETY: this function is not called in a destructor, therefore it
53+
// cannot return an AccessError:
54+
// > If the key has been destroyed (which may happen if this is called
55+
// > in a destructor), this function will return an AccessError.
56+
result.unwrap_unchecked()
57+
}
58+
}
59+
60+
/// Given the provided allocation length `len`, return whether the allocation
61+
/// should be collected. This is a mutable operation, as the thread-local
62+
/// variable will be modified to reduce the distance until the next sample.
63+
pub fn allocation_profiling_stats_should_collect(len: size_t) -> bool {
64+
let f = |maybe_uninit: &mut MaybeUninit<AllocationProfilingStats>| {
65+
// SAFETY: ALLOCATION_PROFILING_STATS was initialized in GINIT.
66+
let stats = unsafe { maybe_uninit.assume_init_mut() };
67+
stats.should_collect_allocation(len)
68+
};
69+
70+
// SAFETY:
71+
// 1. This function doesn't expose any way for the caller to keep a
72+
// borrow alive, nor do the other public functions, so there cannot be
73+
// any existing borrows alive.
74+
// 2. This closure will not cause any new borrows.
75+
// 3. This function isn't called during ALLOCATION_PROFILING_STATS's dtor,
76+
// as MaybeUninit's destructor does nothing, you have to specifically drop
77+
// it. Even if the destructor were called, AllocationProfilingStats's dtor
78+
// doesn't access the TLS variable (it can't, it doesn't have access).
79+
unsafe { allocation_profiling_stats_mut(f) }
80+
}
81+
82+
/// Initializes the allocation profiler's globals.
83+
///
84+
/// # Safety
85+
///
86+
/// Must be called once per PHP thread ginit.
87+
pub unsafe fn ginit() {
88+
// SAFETY:
89+
// 1. During ginit, there will not be any other borrows.
90+
// 2. This closure will not make new borrows.
91+
// 3. This is not during the thread-local destructor.
92+
unsafe {
93+
allocation_profiling_stats_mut(|uninit| {
94+
uninit.write(AllocationProfilingStats::new());
95+
})
96+
};
97+
98+
#[cfg(not(php_zend_mm_set_custom_handlers_ex))]
99+
allocation_le83::alloc_prof_ginit();
100+
#[cfg(php_zend_mm_set_custom_handlers_ex)]
101+
allocation_ge84::alloc_prof_ginit();
102+
}
103+
104+
/// Shuts down the allocation profiler's globals.
105+
///
106+
/// # Safety
107+
///
108+
/// Must be called once per PHP thread gshutdown.
109+
pub unsafe fn gshutdown() {
110+
#[cfg(php_zend_mm_set_custom_handlers_ex)]
111+
allocation_ge84::alloc_prof_gshutdown();
112+
113+
// SAFETY:
114+
// 1. During gshutdown, there will not be any other borrows.
115+
// 2. This closure will not make new borrows.
116+
// 3. This is not during the thread-local destructor.
117+
unsafe { allocation_profiling_stats_mut(|maybe_uninit| maybe_uninit.assume_init_drop()) }
118+
}

profiling/src/exception.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub static EXCEPTION_PROFILING_INTERVAL: AtomicU32 =
2626
/// This will store the number of exceptions thrown during a profiling period. It will overflow
2727
/// when throwing more then 4_294_967_295 exceptions during this period which we currently
2828
/// believe will bring down your application anyway, so accurate numbers are not a problem.
29+
#[cfg(feature = "debug_stats")]
2930
pub static EXCEPTION_PROFILING_EXCEPTION_COUNT: AtomicU32 = AtomicU32::new(0);
3031

3132
pub struct ExceptionProfilingStats {
@@ -39,7 +40,7 @@ impl ExceptionProfilingStats {
3940
fn new() -> ExceptionProfilingStats {
4041
// Safety: this will only error if lambda <= 0
4142
let poisson =
42-
Poisson::new(EXCEPTION_PROFILING_INTERVAL.load(Ordering::SeqCst) as f64).unwrap();
43+
Poisson::new(EXCEPTION_PROFILING_INTERVAL.load(Ordering::Relaxed) as f64).unwrap();
4344
let mut stats = ExceptionProfilingStats {
4445
next_sample: 0,
4546
poisson,
@@ -168,7 +169,7 @@ pub fn exception_profiling_first_rinit() {
168169
return;
169170
}
170171

171-
EXCEPTION_PROFILING_INTERVAL.store(sampling_distance, Ordering::SeqCst);
172+
EXCEPTION_PROFILING_INTERVAL.store(sampling_distance, Ordering::Relaxed);
172173

173174
info!("Exception profiling initialized with sampling distance: {sampling_distance}");
174175
}
@@ -184,7 +185,8 @@ unsafe extern "C" fn exception_profiling_throw_exception_hook(
184185
#[cfg(php7)] exception: *mut zend::zval,
185186
#[cfg(php8)] exception: *mut zend::zend_object,
186187
) {
187-
EXCEPTION_PROFILING_EXCEPTION_COUNT.fetch_add(1, Ordering::SeqCst);
188+
#[cfg(feature = "debug_stats")]
189+
EXCEPTION_PROFILING_EXCEPTION_COUNT.fetch_add(1, Ordering::Relaxed);
188190

189191
let exception_enabled = REQUEST_LOCALS
190192
.borrow_or_false(|locals| locals.system_settings().profiling_exception_enabled);

0 commit comments

Comments
 (0)