Skip to content

Commit a5f2620

Browse files
committed
sstable: push filter check inside cache
Previously, every bloom filter check in `SeekPrefixGE` required: 1. Getting the filter block from cache (atomic refcount increment) 2. Calling `MayContain` on the filter data (a very fast operation) 3. Releasing the block (atomic refcount decrement) 4. Updating hit counter (atomic increment) For a simple bloom filter check (a few hash operations), the atomic refcount operations represent significant overhead (especially when there are a small number of very hot filters in the upper LSM levels). Add a new `cache.Handle.TableFilterMayContain` method that performs the filter check while holding the cache's read lock, without incrementing/decrementing the refcount. The value is safe to access while holding the read lock since eviction requires the write lock. When the filter block is not in cache, the code falls back to the existing `readFilterBlock` path which handles reading from disk and populating the cache. We no longer record these accesses as cache hits for statistics purposes. We still record misses, and since separate statistics by block type, we'll still know what percentage of overall misses are due to filter blocks.
1 parent 2f83567 commit a5f2620

5 files changed

Lines changed: 75 additions & 0 deletions

File tree

internal/cache/cache.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,28 @@ func (c *Handle) Get(
271271
return c.cache.getShard(k).get(k, level, category, false /* peekOnly */)
272272
}
273273

274+
// TableFilterMayContain looks up a cached filter block and checks if it may contain the
275+
// given key, while holding the cache's read lock. This avoids the refcount
276+
// overhead of Get/Release for the common case of bloom filter checks.
277+
//
278+
// TableFilterMayContain does not update hit counters.
279+
//
280+
// The dataOffset specifies how many bytes to skip at the start of the cached
281+
// buffer (typically block.MetadataSize to skip block metadata).
282+
//
283+
// Returns (true, mayContain) if found; (false, false) if not in cache.
284+
// When not found, the caller should fall back to reading from disk.
285+
func (c *Handle) TableFilterMayContain(
286+
fileNum base.DiskFileNum,
287+
offset uint64,
288+
dataOffset int,
289+
filter base.TableFilterDecoder,
290+
filterKey []byte,
291+
) (found bool, mayContain bool) {
292+
k := makeKey(c.id, fileNum, offset)
293+
return c.cache.getShard(k).tableFilterMayContain(k, dataOffset, filter, filterKey)
294+
}
295+
274296
// GetWithReadHandle retrieves the cache value for the specified handleID, fileNum
275297
// and offset. If found, a valid Handle is returned (with cacheHit set to
276298
// true), else a valid ReadHandle is returned.

internal/cache/clockpro.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,34 @@ func (c *shard) getWithReadEntry(k key, level base.Level, category Category) (*V
184184
return nil, re
185185
}
186186

187+
// tableFilterMayContain looks up a cache entry and checks if the filter may
188+
// contain the given key, while holding the read lock. This avoids refcount
189+
// overhead for bloom filter checks.
190+
//
191+
// The dataOffset specifies how many bytes to skip at the start of the cached
192+
// buffer (to skip block metadata).
193+
//
194+
// tableFilterMayContain does not update hit counters.
195+
//
196+
// Returns (true, mayContain) if found; (false, false) otherwise.
197+
func (c *shard) tableFilterMayContain(
198+
k key, dataOffset int, filter base.TableFilterDecoder, filterKey []byte,
199+
) (found bool, mayContain bool) {
200+
c.mu.RLock()
201+
defer c.mu.RUnlock()
202+
203+
if e, _ := c.blocks.Get(k); e != nil {
204+
if v := e.val; v != nil && len(v.buf) > dataOffset {
205+
// Update referenced flag for CLOCK-Pro (same as regular get).
206+
if !e.referenced.Load() {
207+
e.referenced.Store(true)
208+
}
209+
return true, filter.MayContain(v.buf[dataOffset:], filterKey)
210+
}
211+
}
212+
return false, false
213+
}
214+
187215
func (c *shard) set(k key, value *Value, markAccessed bool) {
188216
c.mu.Lock()
189217
defer c.mu.Unlock()

sstable/block/block.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,11 @@ func (r *Reader) ChecksumType() ChecksumType {
375375
return r.checksumType
376376
}
377377

378+
// CacheHandle returns the cache Handle, or nil if caching is disabled.
379+
func (r *Reader) CacheHandle() *cache.Handle {
380+
return r.opts.CacheOpts.CacheHandle
381+
}
382+
378383
var kindToCacheCategory = [blockkind.NumKinds]cache.Category{
379384
blockkind.Unknown: cache.CategoryBackground,
380385
blockkind.SSTableData: cache.CategorySSTableData,

sstable/filter.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,8 @@ func (f *tableFilterReader) mayContain(data, key []byte) bool {
6565
}
6666
return mayContain
6767
}
68+
69+
// Decoder returns the underlying filter decoder.
70+
func (f *tableFilterReader) Decoder() base.TableFilterDecoder {
71+
return f.decoder
72+
}

sstable/reader_iter_single_lvl.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,21 @@ func (i *singleLevelIterator[I, PI, D, PD]) bloomFilterMayContain(prefix []byte)
10271027
}
10281028
}
10291029

1030+
// Fast path: check directly in cache without refcount overhead.
1031+
if cacheHandle := i.reader.blockReader.CacheHandle(); cacheHandle != nil {
1032+
found, mayContain := cacheHandle.TableFilterMayContain(
1033+
i.reader.blockReader.FileNum(),
1034+
i.reader.filterBH.Offset,
1035+
block.MetadataSize,
1036+
i.reader.tableFilter.Decoder(),
1037+
prefixToCheck,
1038+
)
1039+
if found {
1040+
return mayContain, nil
1041+
}
1042+
}
1043+
1044+
// Slow path: read filter block from disk.
10301045
dataH, err := i.reader.readFilterBlock(i.ctx, i.readEnv.Block, i.indexFilterRH, i.reader.filterBH)
10311046
if err != nil {
10321047
return false, err

0 commit comments

Comments
 (0)