Skip to content

Commit 6f98b4c

Browse files
committed
perf(sampling): optimize matching and bound cache memory
1 parent 1fd6940 commit 6f98b4c

3 files changed

Lines changed: 443 additions & 97 deletions

File tree

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! LRU cache wrapper with dual limits: maximum entry count AND maximum tracked byte size.
5+
//!
6+
//! `lru::LruCache` only supports an entry-count capacity, which is unsafe for caches keyed on
7+
//! arbitrary user strings: a few very large keys can balloon memory. `BoundedByteCache` adds
8+
//! a byte budget on top, evicting least-recently-used entries until both limits are satisfied.
9+
10+
use lru::LruCache;
11+
use std::borrow::Borrow;
12+
use std::hash::Hash;
13+
use std::mem::size_of;
14+
use std::num::NonZeroUsize;
15+
16+
/// Default maximum entry count.
17+
pub const DEFAULT_MAX_ENTRIES: usize = 256;
18+
19+
/// Default maximum tracked byte size (256 KiB).
20+
pub const DEFAULT_MAX_BYTES: usize = 256 * 1024;
21+
22+
/// LRU cache bounded by both entry count and total tracked byte size.
23+
///
24+
/// Byte accounting covers `key.as_ref().len() + size_of::<V>()`. Heap-allocated value contents
25+
/// are not tracked; this wrapper assumes small inline values (e.g. `bool`).
26+
pub struct BoundedByteCache<K, V>
27+
where
28+
K: Hash + Eq + AsRef<[u8]>,
29+
{
30+
inner: LruCache<K, V>,
31+
current_bytes: usize,
32+
max_bytes: usize,
33+
}
34+
35+
impl<K, V> BoundedByteCache<K, V>
36+
where
37+
K: Hash + Eq + AsRef<[u8]>,
38+
{
39+
/// `max_entries` of zero is treated as 1 (a cache with no slots is nonsensical).
40+
pub fn new(max_entries: usize, max_bytes: usize) -> Self {
41+
let entry_cap = NonZeroUsize::new(max_entries).unwrap_or(NonZeroUsize::MIN);
42+
Self {
43+
inner: LruCache::new(entry_cap),
44+
current_bytes: 0,
45+
max_bytes,
46+
}
47+
}
48+
49+
#[inline]
50+
pub fn get<Q>(&mut self, key: &Q) -> Option<&V>
51+
where
52+
K: Borrow<Q>,
53+
Q: Hash + Eq + ?Sized,
54+
{
55+
self.inner.get(key)
56+
}
57+
58+
/// Insert `key -> value`. Entries larger than `max_bytes` are dropped silently. Otherwise
59+
/// LRU entries are evicted until the new entry fits.
60+
#[inline]
61+
pub fn put(&mut self, key: K, value: V) {
62+
let entry_bytes = Self::entry_size(&key);
63+
64+
if entry_bytes > self.max_bytes {
65+
return;
66+
}
67+
68+
// Replacing an existing key: deduct its bytes first.
69+
if self.inner.pop(&key).is_some() {
70+
self.current_bytes = self.current_bytes.saturating_sub(entry_bytes);
71+
}
72+
73+
while self.current_bytes + entry_bytes > self.max_bytes {
74+
match self.inner.pop_lru() {
75+
Some((evicted_key, _)) => {
76+
self.current_bytes = self
77+
.current_bytes
78+
.saturating_sub(Self::entry_size(&evicted_key));
79+
}
80+
None => break,
81+
}
82+
}
83+
84+
// `push` may evict an LRU entry to honor the entry-count cap; deduct its bytes.
85+
if let Some((replaced_key, _)) = self.inner.push(key, value) {
86+
self.current_bytes = self
87+
.current_bytes
88+
.saturating_sub(Self::entry_size(&replaced_key));
89+
}
90+
self.current_bytes += entry_bytes;
91+
}
92+
93+
#[allow(dead_code)]
94+
pub fn current_bytes(&self) -> usize {
95+
self.current_bytes
96+
}
97+
98+
pub fn len(&self) -> usize {
99+
self.inner.len()
100+
}
101+
102+
fn entry_size(key: &K) -> usize {
103+
key.as_ref().len() + size_of::<V>()
104+
}
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use super::*;
110+
111+
#[test]
112+
fn test_basic_put_and_get() {
113+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 1024);
114+
cache.put(b"hello".to_vec(), true);
115+
assert_eq!(cache.get(b"hello".as_ref()), Some(&true));
116+
assert_eq!(cache.len(), 1);
117+
}
118+
119+
#[test]
120+
fn test_evicts_lru_when_over_byte_budget() {
121+
// 10-byte budget; each entry costs 4 (key) + 1 (bool) = 5 bytes.
122+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 10);
123+
cache.put(b"aaaa".to_vec(), true);
124+
cache.put(b"bbbb".to_vec(), false);
125+
assert_eq!(cache.len(), 2);
126+
cache.put(b"cccc".to_vec(), true);
127+
assert_eq!(cache.len(), 2);
128+
assert_eq!(cache.get(b"aaaa".as_ref()), None);
129+
assert_eq!(cache.get(b"bbbb".as_ref()), Some(&false));
130+
assert_eq!(cache.get(b"cccc".as_ref()), Some(&true));
131+
assert!(cache.current_bytes() <= 10);
132+
}
133+
134+
#[test]
135+
fn test_evicts_lru_when_over_entry_count() {
136+
// Generous byte budget; entry-count cap of 2 drives eviction.
137+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(2, 1024);
138+
cache.put(b"a".to_vec(), true);
139+
cache.put(b"b".to_vec(), false);
140+
cache.put(b"c".to_vec(), true);
141+
assert_eq!(cache.len(), 2);
142+
assert_eq!(cache.get(b"a".as_ref()), None);
143+
assert_eq!(cache.get(b"b".as_ref()), Some(&false));
144+
assert_eq!(cache.get(b"c".as_ref()), Some(&true));
145+
}
146+
147+
#[test]
148+
fn test_oversize_entry_is_rejected() {
149+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 4);
150+
cache.put(b"small".to_vec(), true);
151+
assert_eq!(cache.len(), 0);
152+
assert_eq!(cache.current_bytes(), 0);
153+
}
154+
155+
#[test]
156+
fn test_replacing_key_does_not_double_count() {
157+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 1024);
158+
cache.put(b"k".to_vec(), true);
159+
let bytes_after_first = cache.current_bytes();
160+
cache.put(b"k".to_vec(), false);
161+
assert_eq!(cache.current_bytes(), bytes_after_first);
162+
assert_eq!(cache.get(b"k".as_ref()), Some(&false));
163+
assert_eq!(cache.len(), 1);
164+
}
165+
166+
#[test]
167+
fn test_get_bumps_recency() {
168+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 10);
169+
cache.put(b"aaaa".to_vec(), true);
170+
cache.put(b"bbbb".to_vec(), true);
171+
let _ = cache.get(b"aaaa".as_ref());
172+
cache.put(b"cccc".to_vec(), true);
173+
assert_eq!(cache.get(b"aaaa".as_ref()), Some(&true));
174+
assert_eq!(cache.get(b"bbbb".as_ref()), None);
175+
}
176+
177+
#[test]
178+
fn test_many_inserts_stay_within_both_limits() {
179+
let max_entries = 8;
180+
let max_bytes = 100;
181+
let mut cache: BoundedByteCache<Vec<u8>, bool> =
182+
BoundedByteCache::new(max_entries, max_bytes);
183+
for i in 0u16..1000 {
184+
cache.put(format!("key-{:04}", i).into_bytes(), i % 2 == 0);
185+
assert!(cache.current_bytes() <= max_bytes);
186+
assert!(cache.len() <= max_entries);
187+
}
188+
}
189+
190+
#[test]
191+
fn test_zero_entries_clamps_to_one() {
192+
let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(0, 1024);
193+
cache.put(b"a".to_vec(), true);
194+
cache.put(b"b".to_vec(), false);
195+
assert_eq!(cache.len(), 1);
196+
assert_eq!(cache.get(b"a".as_ref()), None);
197+
assert_eq!(cache.get(b"b".as_ref()), Some(&false));
198+
}
199+
}

0 commit comments

Comments
 (0)