Skip to content

Commit acc264b

Browse files
committed
perf: optimize AddRange in RoaringPositionBitmap
Replace the per-position loop in AddRange with direct roaring::Roaring::addRange calls. For ranges within a single 32-bit key, a single addRange call suffices. For ranges spanning multiple keys, the range is split into the first partial key, full middle keys, and the last partial key.
1 parent 5666e67 commit acc264b

3 files changed

Lines changed: 89 additions & 2 deletions

File tree

src/iceberg/deletes/roaring_position_bitmap.cc

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,31 @@ void RoaringPositionBitmap::Add(int64_t pos) {
128128
}
129129

130130
void RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
131-
for (int64_t pos = pos_start; pos < pos_end; ++pos) {
132-
Add(pos);
131+
if (pos_start >= pos_end) {
132+
return;
133+
}
134+
if (pos_start < 0 || pos_end - 1 > kMaxPosition) {
135+
return;
136+
}
137+
138+
int32_t start_key = Key(pos_start);
139+
int32_t end_key = Key(pos_end - 1);
140+
impl_->AllocateBitmapsIfNeeded(end_key + 1);
141+
142+
if (start_key == end_key) {
143+
uint64_t low_start = Pos32Bits(pos_start);
144+
uint64_t low_end = static_cast<uint64_t>(Pos32Bits(pos_end - 1)) + 1;
145+
impl_->bitmaps[start_key].addRange(low_start, low_end);
146+
} else {
147+
uint64_t first_low_start = Pos32Bits(pos_start);
148+
impl_->bitmaps[start_key].addRange(first_low_start, uint64_t{1} << 32);
149+
150+
for (int32_t key = start_key + 1; key < end_key; ++key) {
151+
impl_->bitmaps[key].addRange(uint64_t{0}, uint64_t{1} << 32);
152+
}
153+
154+
uint64_t last_low_end = static_cast<uint64_t>(Pos32Bits(pos_end - 1)) + 1;
155+
impl_->bitmaps[end_key].addRange(uint64_t{0}, last_low_end);
133156
}
134157
}
135158

src/iceberg/deletes/roaring_position_bitmap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class ICEBERG_EXPORT RoaringPositionBitmap {
6565
void Add(int64_t pos);
6666

6767
/// \brief Sets a range of positions [pos_start, pos_end).
68+
/// If pos_start >= pos_end, this method does nothing.
6869
/// \note Invalid positions are silently ignored
6970
void AddRange(int64_t pos_start, int64_t pos_end);
7071

src/iceberg/test/roaring_position_bitmap_test.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,72 @@ INSTANTIATE_TEST_SUITE_P(
128128
.start = (int64_t{1} << 32) - 5,
129129
.end = (int64_t{1} << 32) + 5,
130130
.absent_positions = {0, (int64_t{1} << 32) + 5},
131+
},
132+
AddRangeParams{
133+
.name = "single_position",
134+
.start = 42,
135+
.end = 43,
136+
.absent_positions = {41, 43},
131137
}),
132138
[](const ::testing::TestParamInfo<AddRangeParams>& info) { return info.param.name; });
133139

140+
TEST(RoaringPositionBitmapTest, TestAddRangeLargeContiguous) {
141+
RoaringPositionBitmap bitmap;
142+
bitmap.AddRange(500, 200500);
143+
144+
ASSERT_EQ(bitmap.Cardinality(), 200000u);
145+
ASSERT_TRUE(bitmap.Contains(500));
146+
ASSERT_TRUE(bitmap.Contains(200499));
147+
ASSERT_FALSE(bitmap.Contains(499));
148+
ASSERT_FALSE(bitmap.Contains(200500));
149+
}
150+
151+
TEST(RoaringPositionBitmapTest, TestAddRangeSpanningThreeKeys) {
152+
RoaringPositionBitmap bitmap;
153+
154+
int64_t start = (int64_t{0} << 32) | int64_t{0xFFFFFFF0};
155+
int64_t end = (int64_t{2} << 32) | int64_t{0x10};
156+
bitmap.AddRange(start, end);
157+
158+
ASSERT_EQ(bitmap.Cardinality(), static_cast<size_t>(end - start));
159+
ASSERT_TRUE(bitmap.Contains(start));
160+
ASSERT_TRUE(bitmap.Contains(end - 1));
161+
ASSERT_FALSE(bitmap.Contains(start - 1));
162+
ASSERT_FALSE(bitmap.Contains(end));
163+
ASSERT_TRUE(bitmap.Contains(int64_t{1} << 32));
164+
// Verify a sample near the end of the middle key is also present
165+
ASSERT_TRUE(bitmap.Contains((int64_t{1} << 32) | int64_t{0xFFFFFFF0}));
166+
}
167+
168+
TEST(RoaringPositionBitmapTest, TestAddRangeInvalidNegativeStartIsNoOp) {
169+
RoaringPositionBitmap bitmap;
170+
bitmap.AddRange(-1, 10);
171+
ASSERT_TRUE(bitmap.IsEmpty());
172+
ASSERT_EQ(bitmap.Cardinality(), 0u);
173+
}
174+
175+
TEST(RoaringPositionBitmapTest, TestAddRangeInvalidBeyondMaxPositionIsNoOp) {
176+
RoaringPositionBitmap bitmap;
177+
bitmap.AddRange(0, RoaringPositionBitmap::kMaxPosition + 2);
178+
ASSERT_TRUE(bitmap.IsEmpty());
179+
ASSERT_EQ(bitmap.Cardinality(), 0u);
180+
}
181+
182+
TEST(RoaringPositionBitmapTest, TestAddRangeReversedIsNoOp) {
183+
RoaringPositionBitmap bitmap;
184+
bitmap.AddRange(100, 50);
185+
ASSERT_TRUE(bitmap.IsEmpty());
186+
ASSERT_EQ(bitmap.Cardinality(), 0u);
187+
}
188+
189+
TEST(RoaringPositionBitmapTest, TestAddRangeEqualStartEndIsNoOp) {
190+
RoaringPositionBitmap bitmap;
191+
bitmap.AddRange(100, 100);
192+
ASSERT_TRUE(bitmap.IsEmpty());
193+
ASSERT_EQ(bitmap.Cardinality(), 0u);
194+
ASSERT_FALSE(bitmap.Contains(100));
195+
}
196+
134197
struct OrParams {
135198
const char* name;
136199
std::set<int64_t> lhs_input;

0 commit comments

Comments
 (0)