|
| 1 | +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +//! LRU cache wrapper with dual limits: maximum entry count AND maximum tracked byte size. |
| 5 | +//! |
| 6 | +//! `lru::LruCache` only supports an entry-count capacity, which is unsafe for caches keyed on |
| 7 | +//! arbitrary user strings: a few very large keys can balloon memory. `BoundedByteCache` adds |
| 8 | +//! a byte budget on top, evicting least-recently-used entries until both limits are satisfied. |
| 9 | +
|
| 10 | +use lru::LruCache; |
| 11 | +use std::borrow::Borrow; |
| 12 | +use std::hash::Hash; |
| 13 | +use std::mem::size_of; |
| 14 | +use std::num::NonZeroUsize; |
| 15 | + |
| 16 | +/// Default maximum entry count. |
| 17 | +pub const DEFAULT_MAX_ENTRIES: usize = 256; |
| 18 | + |
| 19 | +/// Default maximum tracked byte size (256 KiB). |
| 20 | +pub const DEFAULT_MAX_BYTES: usize = 256 * 1024; |
| 21 | + |
| 22 | +/// LRU cache bounded by both entry count and total tracked byte size. |
| 23 | +/// |
| 24 | +/// Byte accounting covers `key.as_ref().len() + size_of::<V>()`. Heap-allocated value contents |
| 25 | +/// are not tracked; this wrapper assumes small inline values (e.g. `bool`). |
| 26 | +pub struct BoundedByteCache<K, V> |
| 27 | +where |
| 28 | + K: Hash + Eq + AsRef<[u8]>, |
| 29 | +{ |
| 30 | + inner: LruCache<K, V>, |
| 31 | + current_bytes: usize, |
| 32 | + max_bytes: usize, |
| 33 | +} |
| 34 | + |
| 35 | +impl<K, V> BoundedByteCache<K, V> |
| 36 | +where |
| 37 | + K: Hash + Eq + AsRef<[u8]>, |
| 38 | +{ |
| 39 | + /// `max_entries` of zero is treated as 1 (a cache with no slots is nonsensical). |
| 40 | + pub fn new(max_entries: usize, max_bytes: usize) -> Self { |
| 41 | + let entry_cap = NonZeroUsize::new(max_entries).unwrap_or(NonZeroUsize::MIN); |
| 42 | + Self { |
| 43 | + inner: LruCache::new(entry_cap), |
| 44 | + current_bytes: 0, |
| 45 | + max_bytes, |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + #[inline] |
| 50 | + pub fn get<Q>(&mut self, key: &Q) -> Option<&V> |
| 51 | + where |
| 52 | + K: Borrow<Q>, |
| 53 | + Q: Hash + Eq + ?Sized, |
| 54 | + { |
| 55 | + self.inner.get(key) |
| 56 | + } |
| 57 | + |
| 58 | + /// Insert `key -> value`. Entries larger than `max_bytes` are dropped silently. Otherwise |
| 59 | + /// LRU entries are evicted until the new entry fits. |
| 60 | + #[inline] |
| 61 | + pub fn put(&mut self, key: K, value: V) { |
| 62 | + let entry_bytes = Self::entry_size(&key); |
| 63 | + |
| 64 | + if entry_bytes > self.max_bytes { |
| 65 | + return; |
| 66 | + } |
| 67 | + |
| 68 | + // Replacing an existing key: deduct its bytes first. |
| 69 | + if self.inner.pop(&key).is_some() { |
| 70 | + self.current_bytes = self.current_bytes.saturating_sub(entry_bytes); |
| 71 | + } |
| 72 | + |
| 73 | + while self.current_bytes + entry_bytes > self.max_bytes { |
| 74 | + match self.inner.pop_lru() { |
| 75 | + Some((evicted_key, _)) => { |
| 76 | + self.current_bytes = self |
| 77 | + .current_bytes |
| 78 | + .saturating_sub(Self::entry_size(&evicted_key)); |
| 79 | + } |
| 80 | + None => break, |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + // `push` may evict an LRU entry to honor the entry-count cap; deduct its bytes. |
| 85 | + if let Some((replaced_key, _)) = self.inner.push(key, value) { |
| 86 | + self.current_bytes = self |
| 87 | + .current_bytes |
| 88 | + .saturating_sub(Self::entry_size(&replaced_key)); |
| 89 | + } |
| 90 | + self.current_bytes += entry_bytes; |
| 91 | + } |
| 92 | + |
| 93 | + #[allow(dead_code)] |
| 94 | + pub fn current_bytes(&self) -> usize { |
| 95 | + self.current_bytes |
| 96 | + } |
| 97 | + |
| 98 | + pub fn len(&self) -> usize { |
| 99 | + self.inner.len() |
| 100 | + } |
| 101 | + |
| 102 | + fn entry_size(key: &K) -> usize { |
| 103 | + key.as_ref().len() + size_of::<V>() |
| 104 | + } |
| 105 | +} |
| 106 | + |
| 107 | +#[cfg(test)] |
| 108 | +mod tests { |
| 109 | + use super::*; |
| 110 | + |
| 111 | + #[test] |
| 112 | + fn test_basic_put_and_get() { |
| 113 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 1024); |
| 114 | + cache.put(b"hello".to_vec(), true); |
| 115 | + assert_eq!(cache.get(b"hello".as_ref()), Some(&true)); |
| 116 | + assert_eq!(cache.len(), 1); |
| 117 | + } |
| 118 | + |
| 119 | + #[test] |
| 120 | + fn test_evicts_lru_when_over_byte_budget() { |
| 121 | + // 10-byte budget; each entry costs 4 (key) + 1 (bool) = 5 bytes. |
| 122 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 10); |
| 123 | + cache.put(b"aaaa".to_vec(), true); |
| 124 | + cache.put(b"bbbb".to_vec(), false); |
| 125 | + assert_eq!(cache.len(), 2); |
| 126 | + cache.put(b"cccc".to_vec(), true); |
| 127 | + assert_eq!(cache.len(), 2); |
| 128 | + assert_eq!(cache.get(b"aaaa".as_ref()), None); |
| 129 | + assert_eq!(cache.get(b"bbbb".as_ref()), Some(&false)); |
| 130 | + assert_eq!(cache.get(b"cccc".as_ref()), Some(&true)); |
| 131 | + assert!(cache.current_bytes() <= 10); |
| 132 | + } |
| 133 | + |
| 134 | + #[test] |
| 135 | + fn test_evicts_lru_when_over_entry_count() { |
| 136 | + // Generous byte budget; entry-count cap of 2 drives eviction. |
| 137 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(2, 1024); |
| 138 | + cache.put(b"a".to_vec(), true); |
| 139 | + cache.put(b"b".to_vec(), false); |
| 140 | + cache.put(b"c".to_vec(), true); |
| 141 | + assert_eq!(cache.len(), 2); |
| 142 | + assert_eq!(cache.get(b"a".as_ref()), None); |
| 143 | + assert_eq!(cache.get(b"b".as_ref()), Some(&false)); |
| 144 | + assert_eq!(cache.get(b"c".as_ref()), Some(&true)); |
| 145 | + } |
| 146 | + |
| 147 | + #[test] |
| 148 | + fn test_oversize_entry_is_rejected() { |
| 149 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 4); |
| 150 | + cache.put(b"small".to_vec(), true); |
| 151 | + assert_eq!(cache.len(), 0); |
| 152 | + assert_eq!(cache.current_bytes(), 0); |
| 153 | + } |
| 154 | + |
| 155 | + #[test] |
| 156 | + fn test_replacing_key_does_not_double_count() { |
| 157 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 1024); |
| 158 | + cache.put(b"k".to_vec(), true); |
| 159 | + let bytes_after_first = cache.current_bytes(); |
| 160 | + cache.put(b"k".to_vec(), false); |
| 161 | + assert_eq!(cache.current_bytes(), bytes_after_first); |
| 162 | + assert_eq!(cache.get(b"k".as_ref()), Some(&false)); |
| 163 | + assert_eq!(cache.len(), 1); |
| 164 | + } |
| 165 | + |
| 166 | + #[test] |
| 167 | + fn test_get_bumps_recency() { |
| 168 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(256, 10); |
| 169 | + cache.put(b"aaaa".to_vec(), true); |
| 170 | + cache.put(b"bbbb".to_vec(), true); |
| 171 | + let _ = cache.get(b"aaaa".as_ref()); |
| 172 | + cache.put(b"cccc".to_vec(), true); |
| 173 | + assert_eq!(cache.get(b"aaaa".as_ref()), Some(&true)); |
| 174 | + assert_eq!(cache.get(b"bbbb".as_ref()), None); |
| 175 | + } |
| 176 | + |
| 177 | + #[test] |
| 178 | + fn test_many_inserts_stay_within_both_limits() { |
| 179 | + let max_entries = 8; |
| 180 | + let max_bytes = 100; |
| 181 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = |
| 182 | + BoundedByteCache::new(max_entries, max_bytes); |
| 183 | + for i in 0u16..1000 { |
| 184 | + cache.put(format!("key-{:04}", i).into_bytes(), i % 2 == 0); |
| 185 | + assert!(cache.current_bytes() <= max_bytes); |
| 186 | + assert!(cache.len() <= max_entries); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + #[test] |
| 191 | + fn test_zero_entries_clamps_to_one() { |
| 192 | + let mut cache: BoundedByteCache<Vec<u8>, bool> = BoundedByteCache::new(0, 1024); |
| 193 | + cache.put(b"a".to_vec(), true); |
| 194 | + cache.put(b"b".to_vec(), false); |
| 195 | + assert_eq!(cache.len(), 1); |
| 196 | + assert_eq!(cache.get(b"a".as_ref()), None); |
| 197 | + assert_eq!(cache.get(b"b".as_ref()), Some(&false)); |
| 198 | + } |
| 199 | +} |
0 commit comments