Skip to content

Commit 3a8d6b5

Browse files
authored
Merge pull request #517 from anacrolix/bsi-retain
BSI: add Retain for in-place filtering
2 parents 584d599 + 05b0c37 commit 3a8d6b5

File tree

2 files changed

+42
-0
lines changed

2 files changed

+42
-0
lines changed

roaring64/bsi64.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,31 @@ func (b *BSI) ClearValues(foundSet *Bitmap) {
10091009
}
10101010
}
10111011

1012+
// Retain removes from the BSI all values whose column IDs are not in retain,
1013+
// modifying the BSI in place. It returns the number of column IDs dropped.
1014+
//
1015+
// This is the in-place equivalent of NewBSIRetainSet. Prefer it when no copy
1016+
// is needed, such as when the BSI will be immediately re-serialized — it
1017+
// avoids the allocation of a new BSI and all its bit planes.
1018+
//
1019+
// The bit planes (bA) are only updated when the existence bitmap actually
1020+
// shrinks. This is safe because BSI consistency guarantees that bA contains no
1021+
// set bits for column IDs absent from eBM; if eBM is unchanged after the
1022+
// intersection then retain covers all existing column IDs and bA needs no
1023+
// update.
1024+
func (b *BSI) Retain(retain *Bitmap) (dropped uint64) {
1025+
preCard := b.eBM.GetCardinality()
1026+
b.eBM.And(retain)
1027+
dropped = preCard - b.eBM.GetCardinality()
1028+
if dropped == 0 {
1029+
return
1030+
}
1031+
for i := range b.bA {
1032+
b.bA[i].And(retain)
1033+
}
1034+
return
1035+
}
1036+
10121037
// NewBSIRetainSet - Construct a new BSI from a clone of existing BSI, retain only values contained in foundSet
10131038
func (b *BSI) NewBSIRetainSet(foundSet *Bitmap) *BSI {
10141039

roaring64/bsi64_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,6 +836,23 @@ func TestRangeNilBig(t *testing.T) {
836836
assert.Equal(t, tmpAll.GetCardinality(), setAll.GetCardinality())
837837
}
838838

839+
func TestRetain(t *testing.T) {
840+
bsi := setup() // values 0..100 inclusive = 101 entries
841+
retain := BitmapOf(50)
842+
dropped := bsi.Retain(retain)
843+
assert.Equal(t, uint64(100), dropped)
844+
assert.Equal(t, uint64(1), bsi.GetCardinality())
845+
val, ok := bsi.GetValue(50)
846+
assert.True(t, ok)
847+
assert.Equal(t, int64(50), val)
848+
849+
// When retain covers all existing column IDs, nothing is dropped and bA is
850+
// not touched (the dropped==0 early-return path).
851+
dropped = bsi.Retain(BitmapOf(50, 99))
852+
assert.Equal(t, uint64(0), dropped)
853+
assert.Equal(t, uint64(1), bsi.GetCardinality())
854+
}
855+
839856
func BenchmarkClearValues(b *testing.B) {
840857
bsi := setupLargeBSI(b)
841858
if bsi == nil {

0 commit comments

Comments
 (0)