Skip to content

Commit 42b00d4

Browse files
authored
feat(perf): optimize AddRange in RoaringPositionBitmap (#608)
Replace the per-position loop in `RoaringPositionBitmap::AddRange` with direct calls to `roaring::Roaring::addRange`, matching the optimization in the Java implementation (apache/iceberg#15791). Added tests for single-position ranges, large contiguous ranges, three-key spanning ranges, and invalid inputs.
1 parent a1dc2f8 commit 42b00d4

File tree

3 files changed

+111
-4
lines changed

3 files changed

+111
-4
lines changed

src/iceberg/deletes/roaring_position_bitmap.cc

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,22 @@ void RoaringPositionBitmap::Add(int64_t pos) {
128128
}
129129

130130
void RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
131-
for (int64_t pos = pos_start; pos < pos_end; ++pos) {
132-
Add(pos);
131+
pos_start = std::max(pos_start, int64_t{0});
132+
pos_end = std::min(pos_end, kMaxPosition + 1);
133+
if (pos_start >= pos_end) {
134+
return;
135+
}
136+
137+
int64_t pos_last = pos_end - 1;
138+
int32_t start_key = Key(pos_start);
139+
int32_t end_key = Key(pos_last);
140+
impl_->AllocateBitmapsIfNeeded(end_key + 1);
141+
142+
for (int32_t key = start_key; key <= end_key; ++key) {
143+
uint64_t low_start = (key == start_key) ? Pos32Bits(pos_start) : uint64_t{0};
144+
uint64_t low_end = (key == end_key) ? static_cast<uint64_t>(Pos32Bits(pos_last)) + 1
145+
: (uint64_t{1} << 32);
146+
impl_->bitmaps[key].addRange(low_start, low_end);
133147
}
134148
}
135149

src/iceberg/deletes/roaring_position_bitmap.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,11 @@ class ICEBERG_EXPORT RoaringPositionBitmap {
6565
void Add(int64_t pos);
6666

6767
/// \brief Sets a range of positions [pos_start, pos_end).
68-
/// \note Invalid positions are silently ignored
68+
/// \param pos_start the start of the range (inclusive), clamped to 0
69+
/// \param pos_end the end of the range (exclusive), clamped to kMaxPosition + 1
70+
/// \note If pos_start > pos_end, the call is silently ignored.
71+
/// If pos_start == pos_end, this method does nothing.
72+
/// Positions outside [0, kMaxPosition] are silently ignored.
6973
void AddRange(int64_t pos_start, int64_t pos_end);
7074

7175
/// \brief Checks if a position is set in the bitmap.

src/iceberg/test/roaring_position_bitmap_test.cc

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,94 @@ INSTANTIATE_TEST_SUITE_P(
128128
.start = (int64_t{1} << 32) - 5,
129129
.end = (int64_t{1} << 32) + 5,
130130
.absent_positions = {0, (int64_t{1} << 32) + 5},
131+
},
132+
AddRangeParams{
133+
.name = "single_position",
134+
.start = 42,
135+
.end = 43,
136+
.absent_positions = {41, 43},
131137
}),
132138
[](const ::testing::TestParamInfo<AddRangeParams>& info) { return info.param.name; });
133139

140+
TEST(RoaringPositionBitmapTest, TestAddRangeLargeContiguous) {
141+
RoaringPositionBitmap bitmap;
142+
bitmap.AddRange(500, 200500);
143+
144+
ASSERT_EQ(bitmap.Cardinality(), 200000u);
145+
ASSERT_TRUE(bitmap.Contains(500));
146+
ASSERT_TRUE(bitmap.Contains(200499));
147+
ASSERT_FALSE(bitmap.Contains(499));
148+
ASSERT_FALSE(bitmap.Contains(200500));
149+
}
150+
151+
TEST(RoaringPositionBitmapTest, TestAddRangeSpanningThreeKeys) {
152+
RoaringPositionBitmap bitmap;
153+
154+
int64_t start = (int64_t{0} << 32) | int64_t{0xFFFFFFF0};
155+
int64_t end = (int64_t{2} << 32) | int64_t{0x10};
156+
bitmap.AddRange(start, end);
157+
158+
ASSERT_EQ(bitmap.Cardinality(), static_cast<size_t>(end - start));
159+
ASSERT_TRUE(bitmap.Contains(start));
160+
ASSERT_TRUE(bitmap.Contains(end - 1));
161+
ASSERT_FALSE(bitmap.Contains(start - 1));
162+
ASSERT_FALSE(bitmap.Contains(end));
163+
ASSERT_TRUE(bitmap.Contains(int64_t{1} << 32));
164+
// Verify a sample near the end of the middle key is also present
165+
ASSERT_TRUE(bitmap.Contains((int64_t{1} << 32) | int64_t{0xFFFFFFF0}));
166+
}
167+
168+
TEST(RoaringPositionBitmapTest, TestAddRangeClampNegativeStart) {
169+
RoaringPositionBitmap bitmap;
170+
bitmap.AddRange(-1, 10);
171+
ASSERT_EQ(bitmap.Cardinality(), 10u);
172+
ASSERT_TRUE(bitmap.Contains(0));
173+
ASSERT_TRUE(bitmap.Contains(9));
174+
ASSERT_FALSE(bitmap.Contains(-1));
175+
}
176+
177+
TEST(RoaringPositionBitmapTest, TestAddRangeClampBeyondMaxPosition) {
178+
RoaringPositionBitmap bitmap;
179+
// Range entirely beyond kMaxPosition: after clamping both endpoints the range
180+
// becomes empty, so no allocation or insertion happens.
181+
bitmap.AddRange(RoaringPositionBitmap::kMaxPosition + 1,
182+
RoaringPositionBitmap::kMaxPosition + 10);
183+
ASSERT_TRUE(bitmap.IsEmpty());
184+
}
185+
186+
struct AddRangeNoOpParams {
187+
const char* name;
188+
int64_t start;
189+
int64_t end;
190+
};
191+
192+
class RoaringPositionBitmapAddRangeNoOpTest
193+
: public ::testing::TestWithParam<AddRangeNoOpParams> {};
194+
195+
TEST_P(RoaringPositionBitmapAddRangeNoOpTest, IsNoOp) {
196+
const auto& param = GetParam();
197+
RoaringPositionBitmap bitmap;
198+
bitmap.AddRange(param.start, param.end);
199+
ASSERT_TRUE(bitmap.IsEmpty());
200+
ASSERT_EQ(bitmap.Cardinality(), 0u);
201+
}
202+
203+
INSTANTIATE_TEST_SUITE_P(
204+
AddRangeNoOpScenarios, RoaringPositionBitmapAddRangeNoOpTest,
205+
::testing::Values(
206+
AddRangeNoOpParams{.name = "equal", .start = 100, .end = 100},
207+
AddRangeNoOpParams{.name = "zero_length_at_zero", .start = 0, .end = 0},
208+
AddRangeNoOpParams{.name = "negative_both", .start = -10, .end = -5}),
209+
[](const ::testing::TestParamInfo<AddRangeNoOpParams>& info) {
210+
return info.param.name;
211+
});
212+
213+
TEST(RoaringPositionBitmapTest, TestAddRangeReversedIsNoOp) {
214+
RoaringPositionBitmap bitmap;
215+
bitmap.AddRange(100, 50);
216+
ASSERT_TRUE(bitmap.IsEmpty());
217+
}
218+
134219
struct OrParams {
135220
const char* name;
136221
std::set<int64_t> lhs_input;
@@ -398,7 +483,11 @@ TEST(RoaringPositionBitmapTest, TestIsEmpty) {
398483

399484
TEST(RoaringPositionBitmapTest, TestOptimize) {
400485
RoaringPositionBitmap bitmap;
401-
bitmap.AddRange(0, 10000);
486+
// Use Add() instead of AddRange() because addRange() creates run-length
487+
// encoded containers directly, leaving nothing for Optimize() to compress.
488+
for (int64_t i = 0; i < 10000; ++i) {
489+
bitmap.Add(i);
490+
}
402491
size_t size_before = bitmap.SerializedSizeInBytes();
403492

404493
bool changed = bitmap.Optimize();

0 commit comments

Comments
 (0)