Skip to content

Commit d29b8d2

Browse files
authored
chore(benchmarks): add allocation size tracking allocator (#1905)
# What does this PR do? Adds an allocation size tracking allocator that can be used to benchmark memory used by functions. # Motivation Measure all the things... Co-authored-by: bjorn.antonsson <bjorn.antonsson@datadoghq.com>
1 parent 974d690 commit d29b8d2

9 files changed

Lines changed: 380 additions & 11 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

LICENSE-3rdparty.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ core-foundation-sys,https://github.com/servo/core-foundation-rs,MIT OR Apache-2.
9595
cpp_demangle,https://github.com/gimli-rs/cpp_demangle,MIT OR Apache-2.0,"Nick Fitzgerald <fitzgen@gmail.com>, Jim Blandy <jimb@red-bean.com>, Kyle Huey <khuey@kylehuey.com>"
9696
cpufeatures,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
9797
crc32fast,https://github.com/srijs/rust-crc32fast,MIT OR Apache-2.0,"Sam Rijs <srijs@airpost.net>, Alex Crichton <alex@alexcrichton.com>"
98+
criterion,https://github.com/bheisler/criterion.rs,Apache-2.0 OR MIT,"Jorge Aparicio <japaricious@gmail.com>, Brook Heisler <brookheisler@gmail.com>"
9899
criterion-plot,https://github.com/bheisler/criterion.rs,MIT OR Apache-2.0,"Jorge Aparicio <japaricious@gmail.com>, Brook Heisler <brookheisler@gmail.com>"
99100
critical-section,https://github.com/rust-embedded/critical-section,MIT OR Apache-2.0,The critical-section Authors
100101
crossbeam-channel,https://github.com/crossbeam-rs/crossbeam,MIT OR Apache-2.0,The crossbeam-channel Authors

datadog-ipc/benches/ipc.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use tokio::runtime;
1515
fn criterion_benchmark(c: &mut Criterion) {
1616
let (conn_server, conn_client) = datadog_ipc::SeqpacketConn::socketpair().unwrap();
1717

18-
let worker = thread::spawn(move || {
18+
let _worker = thread::spawn(move || {
1919
let rt = runtime::Builder::new_current_thread()
2020
.enable_all()
2121
.build()
@@ -36,15 +36,6 @@ fn criterion_benchmark(c: &mut Criterion) {
3636
c.bench_function("two way interface", |b| {
3737
b.iter(|| channel.call_req_cnt().unwrap())
3838
});
39-
40-
#[cfg(not(target_arch = "aarch64"))]
41-
println!(
42-
"Total requests handled: {}",
43-
channel.call_req_cnt().unwrap()
44-
);
45-
46-
drop(channel);
47-
worker.join().unwrap();
4839
}
4940

5041
#[cfg(unix)]

libdd-common/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ regex = "1.5"
3535
# Use rustls-no-provider instead of rustls to avoid reqwest forcing aws-lc-rs as the crypto
3636
# backend. We install the ring provider explicitly in connector/mod.rs instead.
3737
reqwest = { version = "0.13.2", features = ["rustls-no-provider", "hickory-dns"], default-features = false, optional = true }
38+
criterion = { version = "0.5.1", optional = true }
3839
# Pinned to <0.8.3: version 0.8.3+ pulls in openssl-probe@0.2 which probes multiple
3940
# certificate directories and parses individual cert files instead of loading a single
4041
# bundle, adding unnecessary I/O overhead in latency-sensitive environments.
@@ -103,6 +104,8 @@ fips = ["tls-core", "hyper-rustls/fips"]
103104
reqwest = ["dep:reqwest", "test-utils"]
104105
# Enable test utilities for use in other crates
105106
test-utils = ["dep:httparse", "dep:rand", "dep:mime", "dep:multer"]
107+
# Enable benchmark utilities (ReportingAllocator, Criterion allocation measurement)
108+
bench-utils = ["dep:criterion"]
106109

107110
[lints.rust]
108111
# We run coverage checks in our github actions. These checks are run with

libdd-common/src/bench_utils.rs

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
// Copyright 2021-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Scaffolding for memory usage benchmarks.
5+
//!
6+
//! See the `ReportingAllocator` type and `memory_allocated_measurement` for usage.
7+
8+
#![allow(missing_docs)]
9+
10+
use std::{
11+
alloc::{GlobalAlloc, System},
12+
cell::Cell,
13+
time::Duration,
14+
};
15+
16+
use criterion::{Criterion, Throughput};
17+
18+
pub trait MeasurementName {
19+
fn name() -> &'static str;
20+
}
21+
22+
impl MeasurementName for criterion::measurement::WallTime {
23+
fn name() -> &'static str {
24+
"wall_time"
25+
}
26+
}
27+
28+
pub fn memory_allocated_measurement(
29+
global_alloc: &'static ReportingAllocator<System>,
30+
) -> Criterion<AllocatedBytesMeasurement<System>> {
31+
Criterion::default()
32+
.with_measurement(AllocatedBytesMeasurement(Cell::new(false), global_alloc))
33+
.measurement_time(Duration::from_millis(1))
34+
.warm_up_time(Duration::from_millis(1))
35+
.without_plots()
36+
.plotting_backend(criterion::PlottingBackend::None)
37+
.sample_size(10)
38+
}
39+
40+
#[derive(Debug)]
41+
struct AllocStats {
42+
allocated_bytes: usize,
43+
#[allow(dead_code)]
44+
allocations: usize,
45+
}
46+
47+
pub struct ReportingAllocator<T: GlobalAlloc> {
48+
alloc: T,
49+
allocated_bytes: std::sync::atomic::AtomicUsize,
50+
allocations: std::sync::atomic::AtomicUsize,
51+
}
52+
53+
impl<T: GlobalAlloc> ReportingAllocator<T> {
54+
pub const fn new(alloc: T) -> Self {
55+
Self {
56+
alloc,
57+
allocated_bytes: std::sync::atomic::AtomicUsize::new(0),
58+
allocations: std::sync::atomic::AtomicUsize::new(0),
59+
}
60+
}
61+
62+
fn stats(&self) -> AllocStats {
63+
AllocStats {
64+
allocated_bytes: self
65+
.allocated_bytes
66+
.load(std::sync::atomic::Ordering::Relaxed),
67+
allocations: self.allocations.load(std::sync::atomic::Ordering::Relaxed),
68+
}
69+
}
70+
}
71+
72+
unsafe impl<T: GlobalAlloc> GlobalAlloc for ReportingAllocator<T> {
73+
unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
74+
self.allocated_bytes
75+
.fetch_add(layout.size(), std::sync::atomic::Ordering::Relaxed);
76+
self.allocations
77+
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
78+
self.alloc.alloc(layout)
79+
}
80+
81+
unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
82+
self.alloc.dealloc(ptr, layout);
83+
}
84+
}
85+
86+
pub struct AllocatedBytesMeasurement<T: GlobalAlloc + 'static>(
87+
Cell<bool>,
88+
&'static ReportingAllocator<T>,
89+
);
90+
91+
impl<T: GlobalAlloc> MeasurementName for AllocatedBytesMeasurement<T> {
92+
fn name() -> &'static str {
93+
"allocated_bytes"
94+
}
95+
}
96+
97+
impl<T: GlobalAlloc> criterion::measurement::Measurement for AllocatedBytesMeasurement<T> {
98+
type Intermediate = usize;
99+
100+
type Value = usize;
101+
102+
fn start(&self) -> Self::Intermediate {
103+
self.1.stats().allocated_bytes
104+
}
105+
106+
fn end(&self, i: Self::Intermediate) -> Self::Value {
107+
self.1.stats().allocated_bytes - i
108+
}
109+
110+
fn add(&self, v1: &Self::Value, v2: &Self::Value) -> Self::Value {
111+
*v1 + *v2
112+
}
113+
114+
fn zero(&self) -> Self::Value {
115+
0
116+
}
117+
118+
fn to_f64(&self, value: &Self::Value) -> f64 {
119+
let b = self.0.get();
120+
self.0.set(!b);
121+
// Criterion does not handle all-identical measurement values well, and since
122+
// allocation is deterministic that tends to happen a lot. Add a small +/- epsilon
123+
// so each pair of measurements differs slightly without skewing the distribution.
124+
*value as f64 + if b { 0.01 } else { -0.01 }
125+
}
126+
127+
fn formatter(&self) -> &dyn criterion::measurement::ValueFormatter {
128+
&AllocationFormatter
129+
}
130+
}
131+
132+
struct AllocationFormatter;
133+
134+
impl criterion::measurement::ValueFormatter for AllocationFormatter {
135+
fn scale_values(&self, typical_value: f64, values: &mut [f64]) -> &'static str {
136+
let log_scale: f64 = typical_value.log10().round();
137+
if log_scale.is_infinite() || log_scale.is_nan() || log_scale < 0.0 {
138+
return "b";
139+
}
140+
let scale = (log_scale as i32 / 3).min(4);
141+
values.iter_mut().for_each(|v| *v /= 10_f64.powi(scale * 3));
142+
match scale {
143+
0 => "b",
144+
1 => "Kb",
145+
2 => "Mb",
146+
3 => "Gb",
147+
_ => "Tb",
148+
}
149+
}
150+
151+
fn scale_throughputs(
152+
&self,
153+
_typical_value: f64,
154+
throughput: &criterion::Throughput,
155+
_values: &mut [f64],
156+
) -> &'static str {
157+
match throughput {
158+
Throughput::Bytes(_) => "B/s",
159+
Throughput::BytesDecimal(_) => "B/s",
160+
Throughput::Elements(_) => "elements/s",
161+
}
162+
}
163+
164+
fn scale_for_machines(&self, _values: &mut [f64]) -> &'static str {
165+
"b"
166+
}
167+
}
168+
169+
#[cfg(test)]
170+
mod tests {
171+
use super::*;
172+
use criterion::measurement::{Measurement, ValueFormatter};
173+
use std::alloc::{GlobalAlloc, Layout, System};
174+
175+
static SHARED: ReportingAllocator<System> = ReportingAllocator::new(System);
176+
177+
// --- ReportingAllocator ---
178+
179+
#[test]
180+
fn new_starts_at_zero() {
181+
let a = ReportingAllocator::new(System);
182+
let s = a.stats();
183+
assert_eq!(s.allocated_bytes, 0);
184+
assert_eq!(s.allocations, 0);
185+
}
186+
187+
#[test]
188+
fn alloc_increments_both_counters() {
189+
let a = ReportingAllocator::new(System);
190+
let layout = Layout::from_size_align(64, 8).unwrap();
191+
let ptr = unsafe { a.alloc(layout) };
192+
assert!(!ptr.is_null());
193+
assert_eq!(a.stats().allocated_bytes, 64);
194+
assert_eq!(a.stats().allocations, 1);
195+
unsafe { a.dealloc(ptr, layout) };
196+
}
197+
198+
#[test]
199+
fn dealloc_does_not_change_counters() {
200+
let a = ReportingAllocator::new(System);
201+
let layout = Layout::from_size_align(32, 8).unwrap();
202+
let ptr = unsafe { a.alloc(layout) };
203+
let bytes_after_alloc = a.stats().allocated_bytes;
204+
unsafe { a.dealloc(ptr, layout) };
205+
assert_eq!(a.stats().allocated_bytes, bytes_after_alloc);
206+
assert_eq!(a.stats().allocations, 1);
207+
}
208+
209+
#[test]
210+
fn multiple_allocs_accumulate() {
211+
let a = ReportingAllocator::new(System);
212+
let l1 = Layout::from_size_align(16, 8).unwrap();
213+
let l2 = Layout::from_size_align(32, 8).unwrap();
214+
let p1 = unsafe { a.alloc(l1) };
215+
let p2 = unsafe { a.alloc(l2) };
216+
assert_eq!(a.stats().allocated_bytes, 48);
217+
assert_eq!(a.stats().allocations, 2);
218+
unsafe {
219+
a.dealloc(p1, l1);
220+
a.dealloc(p2, l2);
221+
}
222+
}
223+
224+
// --- AllocatedBytesMeasurement ---
225+
226+
#[test]
227+
fn measurement_zero_and_add() {
228+
let m = AllocatedBytesMeasurement(Cell::new(false), &SHARED);
229+
assert_eq!(m.zero(), 0);
230+
assert_eq!(m.add(&100, &200), 300);
231+
}
232+
233+
#[test]
234+
fn measurement_start_end_tracks_delta() {
235+
let m = AllocatedBytesMeasurement(Cell::new(false), &SHARED);
236+
let start = m.start();
237+
let layout = Layout::from_size_align(256, 8).unwrap();
238+
let ptr = unsafe { SHARED.alloc(layout) };
239+
// Other tests may also allocate via SHARED concurrently, so allow >= 256.
240+
assert!(m.end(start) >= 256);
241+
unsafe { SHARED.dealloc(ptr, layout) };
242+
}
243+
244+
#[test]
245+
fn measurement_to_f64_alternates_epsilon() {
246+
let m = AllocatedBytesMeasurement(Cell::new(false), &SHARED);
247+
// Initial state: Cell = false → first result is value - 0.01
248+
assert!((m.to_f64(&1000) - 999.99).abs() < 1e-9);
249+
// After first call: Cell = true → result is value + 0.01
250+
assert!((m.to_f64(&1000) - 1000.01).abs() < 1e-9);
251+
// Alternates back
252+
assert!((m.to_f64(&1000) - 999.99).abs() < 1e-9);
253+
}
254+
255+
#[test]
256+
fn measurement_name() {
257+
assert_eq!(
258+
AllocatedBytesMeasurement::<System>::name(),
259+
"allocated_bytes"
260+
);
261+
}
262+
263+
// --- AllocationFormatter::scale_values ---
264+
265+
#[test]
266+
fn scale_values_zero_returns_bytes() {
267+
let f = AllocationFormatter;
268+
let mut v = [42.0_f64];
269+
assert_eq!(f.scale_values(0.0, &mut v), "b");
270+
}
271+
272+
#[test]
273+
fn scale_values_sub_byte_returns_bytes() {
274+
let f = AllocationFormatter;
275+
let mut v = [0.5_f64];
276+
// log10(0.1) = -1 → negative → "b"
277+
assert_eq!(f.scale_values(0.1, &mut v), "b");
278+
}
279+
280+
#[test]
281+
fn scale_values_bytes() {
282+
let f = AllocationFormatter;
283+
let mut v = [1.0_f64];
284+
assert_eq!(f.scale_values(1.0, &mut v), "b");
285+
assert!((v[0] - 1.0).abs() < 1e-9);
286+
}
287+
288+
#[test]
289+
fn scale_values_kilobytes() {
290+
let f = AllocationFormatter;
291+
let mut v = [2000.0_f64];
292+
assert_eq!(f.scale_values(1000.0, &mut v), "Kb");
293+
assert!((v[0] - 2.0).abs() < 1e-9);
294+
}
295+
296+
#[test]
297+
fn scale_values_megabytes() {
298+
let f = AllocationFormatter;
299+
let mut v = [3_000_000.0_f64];
300+
assert_eq!(f.scale_values(1_000_000.0, &mut v), "Mb");
301+
assert!((v[0] - 3.0).abs() < 1e-9);
302+
}
303+
304+
#[test]
305+
fn scale_values_gigabytes() {
306+
let f = AllocationFormatter;
307+
let mut v = [4_000_000_000.0_f64];
308+
assert_eq!(f.scale_values(1_000_000_000.0, &mut v), "Gb");
309+
assert!((v[0] - 4.0).abs() < 1e-9);
310+
}
311+
312+
#[test]
313+
fn scale_values_terabytes() {
314+
let f = AllocationFormatter;
315+
let mut v = [5_000_000_000_000.0_f64];
316+
assert_eq!(f.scale_values(1_000_000_000_000.0, &mut v), "Tb");
317+
assert!((v[0] - 5.0).abs() < 1e-9);
318+
}
319+
320+
#[test]
321+
fn scale_values_very_large_clamps_to_terabytes() {
322+
let f = AllocationFormatter;
323+
let mut v = [1e18_f64];
324+
assert_eq!(f.scale_values(1e18, &mut v), "Tb");
325+
}
326+
327+
#[test]
328+
fn scale_for_machines_returns_bytes_unit() {
329+
let f = AllocationFormatter;
330+
let mut v = [1000.0_f64];
331+
assert_eq!(f.scale_for_machines(&mut v), "b");
332+
}
333+
}

0 commit comments

Comments
 (0)