Skip to content

Commit 75730ae

Browse files
committed
perf(sampling): optimize matching and bound cache memory
1 parent 1fd6940 commit 75730ae

3 files changed

Lines changed: 498 additions & 97 deletions

File tree

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! LRU cache wrapper with dual limits: maximum entry count AND maximum tracked byte size.
5+
//!
6+
//! `lru::LruCache` only supports an entry-count capacity, which is unsafe for caches keyed on
7+
//! arbitrary user strings: a few very large keys can balloon memory. `BoundedByteCache` adds
8+
//! a byte budget on top, evicting least-recently-used entries until both limits are satisfied.
9+
10+
use lru::LruCache;
11+
use std::borrow::Borrow;
12+
use std::hash::Hash;
13+
use std::mem::size_of;
14+
use std::num::NonZeroUsize;
15+
16+
/// Default maximum entry count.
17+
pub const DEFAULT_MAX_ENTRIES: usize = 256;
18+
19+
/// Default maximum tracked byte size (256 KiB).
20+
pub const DEFAULT_MAX_BYTES: usize = 256 * 1024;
21+
22+
/// LRU cache bounded by both entry count and total tracked byte size.
23+
///
24+
/// Byte accounting covers `key.as_ref().len() + size_of::<V>()`. Heap-allocated value contents
25+
/// are not tracked; this wrapper assumes small inline values (e.g. `bool`).
26+
pub struct BoundedByteCache<K, V>
27+
where
28+
K: Hash + Eq + AsRef<[u8]>,
29+
{
30+
inner: LruCache<K, V>,
31+
current_bytes: usize,
32+
max_bytes: usize,
33+
}
34+
35+
impl<K, V> BoundedByteCache<K, V>
36+
where
37+
K: Hash + Eq + AsRef<[u8]>,
38+
{
39+
/// `max_entries` of zero is treated as 1 (a cache with no slots is nonsensical).
40+
pub fn new(max_entries: usize, max_bytes: usize) -> Self {
41+
let entry_cap = NonZeroUsize::new(max_entries).unwrap_or(NonZeroUsize::MIN);
42+
Self {
43+
inner: LruCache::new(entry_cap),
44+
current_bytes: 0,
45+
max_bytes,
46+
}
47+
}
48+
49+
#[inline]
50+
pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
51+
where
52+
K: Borrow<Q>,
53+
Q: Hash + Eq + ?Sized,
54+
{
55+
self.inner.get(key)
56+
}
57+
58+
/// Insert `key -> value`. Entries larger than `max_bytes` are dropped silently. Otherwise
59+
/// LRU entries are evicted until the new entry fits.
60+
#[inline]
61+
pub fn put(&mut self, key: K, value: V) {
62+
let entry_bytes = Self::entry_size(&key);
63+
64+
if entry_bytes > self.max_bytes {
65+
return;
66+
}
67+
68+
// Replacing an existing key: deduct its bytes first.
69+
if self.inner.pop(&key).is_some() {
70+
self.current_bytes = self.current_bytes.saturating_sub(entry_bytes);
71+
}
72+
73+
while self.current_bytes + entry_bytes > self.max_bytes {
74+
match self.inner.pop_lru() {
75+
Some((evicted_key, _)) => {
76+
self.current_bytes = self
77+
.current_bytes
78+
.saturating_sub(Self::entry_size(&evicted_key));
79+
}
80+
None => break,
81+
}
82+
}
83+
84+
// `push` may evict an LRU entry to honor the entry-count cap; deduct its bytes.
85+
if let Some((replaced_key, _)) = self.inner.push(key, value) {
86+
self.current_bytes = self
87+
.current_bytes
88+
.saturating_sub(Self::entry_size(&replaced_key));
89+
}
90+
self.current_bytes += entry_bytes;
91+
}
92+
93+
#[cfg(test)]
94+
pub fn current_bytes(&self) -> usize {
95+
self.current_bytes
96+
}
97+
98+
pub fn len(&self) -> usize {
99+
self.inner.len()
100+
}
101+
102+
fn entry_size(key: &K) -> usize {
103+
Self::PER_ENTRY_OVERHEAD + key.as_ref().len() + size_of::<V>()
104+
}
105+
106+
/// Approximate per-entry fixed heap overhead, in bytes. Covers:
107+
/// - `Vec<u8>` header on the key (24 B on 64-bit targets)
108+
/// - `lru` doubly-linked-list node (prev/next pointers, ~24 B)
109+
/// - `HashMap` bucket amortized (~16 B)
110+
///
111+
/// Rounded up so `max_bytes` is a pessimistic upper bound on actual heap usage. Recheck
112+
/// if the `lru` crate is upgraded across a major version — the linked-list node layout
113+
/// is the volatile piece.
114+
const PER_ENTRY_OVERHEAD: usize = 64;
115+
}
116+
117+
#[cfg(test)]
118+
mod tests {
119+
use super::*;
120+
121+
#[test]
122+
fn test_basic_put_and_get() {
123+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 1024);
124+
cache.put(b"hello".to_vec(), true);
125+
assert_eq!(cache.get(b"hello".as_ref()), Some(&true));
126+
assert_eq!(cache.len(), 1);
127+
}
128+
129+
#[test]
130+
fn test_evicts_lru_when_over_byte_budget() {
131+
// Each entry costs PER_ENTRY_OVERHEAD (64) + 4 (key) + 1 (bool) = 69 bytes. Budget
132+
// of 150 fits two entries (138 B) but not three.
133+
let budget = 150;
134+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, budget);
135+
cache.put(b"aaaa".to_vec(), true);
136+
cache.put(b"bbbb".to_vec(), false);
137+
assert_eq!(cache.len(), 2);
138+
cache.put(b"cccc".to_vec(), true);
139+
assert_eq!(cache.len(), 2);
140+
assert_eq!(cache.get(b"aaaa".as_ref()), None);
141+
assert_eq!(cache.get(b"bbbb".as_ref()), Some(&false));
142+
assert_eq!(cache.get(b"cccc".as_ref()), Some(&true));
143+
assert!(cache.current_bytes() <= budget);
144+
}
145+
146+
#[test]
147+
fn test_evicts_lru_when_over_entry_count() {
148+
// Generous byte budget; entry-count cap of 2 drives eviction.
149+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(2, 1024);
150+
cache.put(b"a".to_vec(), true);
151+
cache.put(b"b".to_vec(), false);
152+
cache.put(b"c".to_vec(), true);
153+
assert_eq!(cache.len(), 2);
154+
assert_eq!(cache.get(b"a".as_ref()), None);
155+
assert_eq!(cache.get(b"b".as_ref()), Some(&false));
156+
assert_eq!(cache.get(b"c".as_ref()), Some(&true));
157+
}
158+
159+
#[test]
160+
fn test_oversize_entry_is_rejected() {
161+
// Any entry costs at least PER_ENTRY_OVERHEAD bytes, so a 32-byte budget rejects
162+
// every insertion.
163+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 32);
164+
cache.put(b"small".to_vec(), true);
165+
assert_eq!(cache.len(), 0);
166+
assert_eq!(cache.current_bytes(), 0);
167+
}
168+
169+
#[test]
170+
fn test_replacing_key_does_not_double_count() {
171+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 1024);
172+
cache.put(b"k".to_vec(), true);
173+
let bytes_after_first = cache.current_bytes();
174+
cache.put(b"k".to_vec(), false);
175+
assert_eq!(cache.current_bytes(), bytes_after_first);
176+
assert_eq!(cache.get(b"k".as_ref()), Some(&false));
177+
assert_eq!(cache.len(), 1);
178+
}
179+
180+
#[test]
181+
fn test_get_bumps_recency() {
182+
// Budget for exactly two 4-byte-keyed entries (69 B each = 138 B total).
183+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 150);
184+
cache.put(b"aaaa".to_vec(), true);
185+
cache.put(b"bbbb".to_vec(), true);
186+
let _ = cache.get(b"aaaa".as_ref());
187+
cache.put(b"cccc".to_vec(), true);
188+
assert_eq!(cache.get(b"aaaa".as_ref()), Some(&true));
189+
assert_eq!(cache.get(b"bbbb".as_ref()), None);
190+
}
191+
192+
#[test]
193+
fn test_many_inserts_stay_within_both_limits() {
194+
let max_entries = 8;
195+
// 8 entries * (PER_ENTRY_OVERHEAD 64 + 8-byte key + 1) = 584 bytes; round up.
196+
let max_bytes = 600;
197+
let mut cache: BoundedByteCache<Vec<u8>, bool> =
198+
BoundedByteCache::new(max_entries, max_bytes);
199+
for i in 0u16..1000 {
200+
cache.put(format!("key-{:04}", i).into_bytes(), i % 2 == 0);
201+
assert!(cache.current_bytes() <= max_bytes);
202+
assert!(cache.len() <= max_entries);
203+
}
204+
}
205+
206+
#[test]
207+
fn test_zero_entries_clamps_to_one() {
208+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(0, 1024);
209+
cache.put(b"a".to_vec(), true);
210+
cache.put(b"b".to_vec(), false);
211+
assert_eq!(cache.len(), 1);
212+
assert_eq!(cache.get(b"a".as_ref()), None);
213+
assert_eq!(cache.get(b"b".as_ref()), Some(&false));
214+
}
215+
}

0 commit comments

Comments
 (0)