Skip to content

Commit 6b7771a

Browse files
authored
feat: add cache stats for new cache (#4024)
This can be used to compute the hit and miss rate of the caches.
1 parent 97ed5ec commit 6b7771a

2 files changed

Lines changed: 194 additions & 6 deletions

File tree

rust/lance-core/src/cache.rs

Lines changed: 190 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@
44
//! Cache implementation
55
66
use std::any::{Any, TypeId};
7-
use std::sync::Arc;
7+
use std::sync::{
8+
atomic::{AtomicU64, Ordering},
9+
Arc,
10+
};
811

912
use futures::Future;
1013
use moka::sync::Cache;
@@ -45,6 +48,8 @@ impl SizedRecord {
4548
pub struct LanceCache {
4649
cache: Arc<Cache<(String, TypeId), SizedRecord>>,
4750
prefix: String,
51+
hits: Arc<AtomicU64>,
52+
misses: Arc<AtomicU64>,
4853
}
4954

5055
impl std::fmt::Debug for LanceCache {
@@ -76,13 +81,17 @@ impl LanceCache {
7681
Self {
7782
cache: Arc::new(cache),
7883
prefix: String::new(),
84+
hits: Arc::new(AtomicU64::new(0)),
85+
misses: Arc::new(AtomicU64::new(0)),
7986
}
8087
}
8188

8289
pub fn no_cache() -> Self {
8390
Self {
8491
cache: Arc::new(Cache::new(0)),
8592
prefix: String::new(),
93+
hits: Arc::new(AtomicU64::new(0)),
94+
misses: Arc::new(AtomicU64::new(0)),
8695
}
8796
}
8897

@@ -97,6 +106,8 @@ impl LanceCache {
97106
Self {
98107
cache: self.cache.clone(),
99108
prefix: format!("{}{}/", self.prefix, prefix),
109+
hits: self.hits.clone(),
110+
misses: self.misses.clone(),
100111
}
101112
}
102113

@@ -154,9 +165,13 @@ impl LanceCache {
154165

155166
pub fn get<T: DeepSizeOf + Send + Sync + 'static>(&self, key: &str) -> Option<Arc<T>> {
156167
let key = self.get_key(key);
157-
self.cache
158-
.get(&(key, TypeId::of::<T>()))
159-
.map(|metadata| metadata.record.clone().downcast::<T>().unwrap())
168+
if let Some(metadata) = self.cache.get(&(key, TypeId::of::<T>())) {
169+
self.hits.fetch_add(1, Ordering::Relaxed);
170+
Some(metadata.record.clone().downcast::<T>().unwrap())
171+
} else {
172+
self.misses.fetch_add(1, Ordering::Relaxed);
173+
None
174+
}
160175
}
161176

162177
pub fn get_unsized<T: DeepSizeOf + Send + Sync + 'static + ?Sized>(
@@ -181,14 +196,32 @@ impl LanceCache {
181196
F: FnOnce(&str) -> Fut,
182197
Fut: Future<Output = Result<T>>,
183198
{
184-
if let Some(metadata) = self.get::<T>(&key) {
185-
return Ok(metadata);
199+
let full_key = self.get_key(&key);
200+
if let Some(metadata) = self.cache.get(&(full_key, TypeId::of::<T>())) {
201+
self.hits.fetch_add(1, Ordering::Relaxed);
202+
return Ok(metadata.record.clone().downcast::<T>().unwrap());
186203
}
187204

205+
self.misses.fetch_add(1, Ordering::Relaxed);
188206
let metadata = Arc::new(loader(&key).await?);
189207
self.insert(&key, metadata.clone());
190208
Ok(metadata)
191209
}
210+
211+
pub fn stats(&self) -> CacheStats {
212+
CacheStats {
213+
hits: self.hits.load(Ordering::Relaxed),
214+
misses: self.misses.load(Ordering::Relaxed),
215+
}
216+
}
217+
}
218+
219+
#[derive(Debug, Clone)]
220+
pub struct CacheStats {
221+
/// Number of times `get`, `get_unsized`, or `get_or_insert` found an item in the cache.
222+
pub hits: u64,
223+
/// Number of times `get`, `get_unsized`, or `get_or_insert` did not find an item in the cache.
224+
pub misses: u64,
192225
}
193226

194227
#[cfg(test)]
@@ -247,4 +280,155 @@ mod tests {
247280
let retrieved = retrieved.as_any().downcast_ref::<MyType>().unwrap();
248281
assert_eq!(retrieved.0, 42);
249282
}
283+
284+
#[test]
285+
fn test_cache_stats_basic() {
286+
let cache = LanceCache::with_capacity(1000);
287+
288+
// Initially no hits or misses
289+
let stats = cache.stats();
290+
assert_eq!(stats.hits, 0);
291+
assert_eq!(stats.misses, 0);
292+
293+
// Miss on first get
294+
let result = cache.get::<Vec<i32>>("nonexistent");
295+
assert!(result.is_none());
296+
let stats = cache.stats();
297+
assert_eq!(stats.hits, 0);
298+
assert_eq!(stats.misses, 1);
299+
300+
// Insert and then hit
301+
cache.insert("key1", Arc::new(vec![1, 2, 3]));
302+
let result = cache.get::<Vec<i32>>("key1");
303+
assert!(result.is_some());
304+
let stats = cache.stats();
305+
assert_eq!(stats.hits, 1);
306+
assert_eq!(stats.misses, 1);
307+
308+
// Another hit
309+
let result = cache.get::<Vec<i32>>("key1");
310+
assert!(result.is_some());
311+
let stats = cache.stats();
312+
assert_eq!(stats.hits, 2);
313+
assert_eq!(stats.misses, 1);
314+
315+
// Another miss
316+
let result = cache.get::<Vec<i32>>("nonexistent2");
317+
assert!(result.is_none());
318+
let stats = cache.stats();
319+
assert_eq!(stats.hits, 2);
320+
assert_eq!(stats.misses, 2);
321+
}
322+
323+
#[test]
324+
fn test_cache_stats_with_prefixes() {
325+
let base_cache = LanceCache::with_capacity(1000);
326+
let prefixed_cache = base_cache.with_key_prefix("test");
327+
328+
// Stats should be shared between base and prefixed cache
329+
let stats = base_cache.stats();
330+
assert_eq!(stats.hits, 0);
331+
assert_eq!(stats.misses, 0);
332+
333+
let stats = prefixed_cache.stats();
334+
assert_eq!(stats.hits, 0);
335+
assert_eq!(stats.misses, 0);
336+
337+
// Miss on prefixed cache
338+
let result = prefixed_cache.get::<Vec<i32>>("key1");
339+
assert!(result.is_none());
340+
341+
// Both should show the miss
342+
let stats = base_cache.stats();
343+
assert_eq!(stats.hits, 0);
344+
assert_eq!(stats.misses, 1);
345+
346+
let stats = prefixed_cache.stats();
347+
assert_eq!(stats.hits, 0);
348+
assert_eq!(stats.misses, 1);
349+
350+
// Insert through prefixed cache and hit
351+
prefixed_cache.insert("key1", Arc::new(vec![1, 2, 3]));
352+
let result = prefixed_cache.get::<Vec<i32>>("key1");
353+
assert!(result.is_some());
354+
355+
// Both should show the hit
356+
let stats = base_cache.stats();
357+
assert_eq!(stats.hits, 1);
358+
assert_eq!(stats.misses, 1);
359+
360+
let stats = prefixed_cache.stats();
361+
assert_eq!(stats.hits, 1);
362+
assert_eq!(stats.misses, 1);
363+
}
364+
365+
#[test]
366+
fn test_cache_stats_unsized() {
367+
#[derive(Debug, DeepSizeOf)]
368+
struct MyType(i32);
369+
370+
trait MyTrait: DeepSizeOf + Send + Sync + Any {}
371+
372+
impl MyTrait for MyType {}
373+
374+
let cache = LanceCache::with_capacity(1000);
375+
376+
// Miss on unsized get
377+
let result = cache.get_unsized::<dyn MyTrait>("test");
378+
assert!(result.is_none());
379+
let stats = cache.stats();
380+
assert_eq!(stats.hits, 0);
381+
assert_eq!(stats.misses, 1);
382+
383+
// Insert and hit on unsized
384+
let item = Arc::new(MyType(42));
385+
let item_dyn: Arc<dyn MyTrait> = item;
386+
cache.insert_unsized("test", item_dyn);
387+
388+
let result = cache.get_unsized::<dyn MyTrait>("test");
389+
assert!(result.is_some());
390+
let stats = cache.stats();
391+
assert_eq!(stats.hits, 1);
392+
assert_eq!(stats.misses, 1);
393+
}
394+
395+
#[tokio::test]
396+
async fn test_cache_stats_get_or_insert() {
397+
let cache = LanceCache::with_capacity(1000);
398+
399+
// First call should be a miss and load the value
400+
let result: Arc<Vec<i32>> = cache
401+
.get_or_insert("key1".to_string(), |_key| async { Ok(vec![1, 2, 3]) })
402+
.await
403+
.unwrap();
404+
assert_eq!(*result, vec![1, 2, 3]);
405+
406+
let stats = cache.stats();
407+
assert_eq!(stats.hits, 0);
408+
assert_eq!(stats.misses, 1);
409+
410+
// Second call should be a hit
411+
let result: Arc<Vec<i32>> = cache
412+
.get_or_insert("key1".to_string(), |_key| async {
413+
panic!("Should not be called")
414+
})
415+
.await
416+
.unwrap();
417+
assert_eq!(*result, vec![1, 2, 3]);
418+
419+
let stats = cache.stats();
420+
assert_eq!(stats.hits, 1);
421+
assert_eq!(stats.misses, 1);
422+
423+
// Different key should be another miss
424+
let result: Arc<Vec<i32>> = cache
425+
.get_or_insert("key2".to_string(), |_key| async { Ok(vec![4, 5, 6]) })
426+
.await
427+
.unwrap();
428+
assert_eq!(*result, vec![4, 5, 6]);
429+
430+
let stats = cache.stats();
431+
assert_eq!(stats.hits, 1);
432+
assert_eq!(stats.misses, 2);
433+
}
250434
}

rust/lance/src/session.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,10 @@ impl Session {
160160
pub fn store_registry(&self) -> Arc<ObjectStoreRegistry> {
161161
self.store_registry.clone()
162162
}
163+
164+
pub fn metadata_cache_stats(&self) -> lance_core::cache::CacheStats {
165+
self.metadata_cache.stats()
166+
}
163167
}
164168

165169
impl Default for Session {

0 commit comments

Comments
 (0)