Skip to content

Commit 05b0c37

Browse files
committed
Document Retain and add test
Expand the doc comment to explain: in-place semantics vs NewBSIRetainSet, why skipping bA when dropped==0 is correct (BSI consistency invariant), and when to prefer Retain over the allocating alternative. The in-place form is used in caterwaul's CleanBitmaps, which iterates every term bitmap and filters it down to currently-valid doc IDs before writing it back to storage. No new BSI allocation is needed since the result is immediately serialized.
1 parent 72267ff commit 05b0c37

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

roaring64/bsi64.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -990,7 +990,18 @@ func (b *BSI) ClearValues(foundSet *Bitmap) {
990990
wg.Wait()
991991
}
992992

993-
// Retains only values found in retain. Returns how many values were not retained.
993+
// Retain removes from the BSI all values whose column IDs are not in retain,
994+
// modifying the BSI in place. It returns the number of column IDs dropped.
995+
//
996+
// This is the in-place equivalent of NewBSIRetainSet. Prefer it when no copy
997+
// is needed, such as when the BSI will be immediately re-serialized — it
998+
// avoids the allocation of a new BSI and all its bit planes.
999+
//
1000+
// The bit planes (bA) are only updated when the existence bitmap actually
1001+
// shrinks. This is safe because BSI consistency guarantees that bA contains no
1002+
// set bits for column IDs absent from eBM; if eBM is unchanged after the
1003+
// intersection then retain covers all existing column IDs and bA needs no
1004+
// update.
9941005
func (b *BSI) Retain(retain *Bitmap) (dropped uint64) {
9951006
preCard := b.eBM.GetCardinality()
9961007
b.eBM.And(retain)

roaring64/bsi64_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,23 @@ func TestRangeNilBig(t *testing.T) {
806806
assert.Equal(t, tmpAll.GetCardinality(), setAll.GetCardinality())
807807
}
808808

809+
func TestRetain(t *testing.T) {
810+
bsi := setup() // values 0..100 inclusive = 101 entries
811+
retain := BitmapOf(50)
812+
dropped := bsi.Retain(retain)
813+
assert.Equal(t, uint64(100), dropped)
814+
assert.Equal(t, uint64(1), bsi.GetCardinality())
815+
val, ok := bsi.GetValue(50)
816+
assert.True(t, ok)
817+
assert.Equal(t, int64(50), val)
818+
819+
// When retain covers all existing column IDs, nothing is dropped and bA is
820+
// not touched (the dropped==0 early-return path).
821+
dropped = bsi.Retain(BitmapOf(50, 99))
822+
assert.Equal(t, uint64(0), dropped)
823+
assert.Equal(t, uint64(1), bsi.GetCardinality())
824+
}
825+
809826
func BenchmarkClearValues(b *testing.B) {
810827
bsi := setupLargeBSI(b)
811828
if bsi == nil {

0 commit comments

Comments
 (0)