Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions src/iceberg/deletes/roaring_position_bitmap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,31 @@ void RoaringPositionBitmap::Add(int64_t pos) {
}

void RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
for (int64_t pos = pos_start; pos < pos_end; ++pos) {
Add(pos);
if (pos_start >= pos_end) {
return;
}
if (pos_start < 0 || pos_end - 1 > kMaxPosition) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its not clear to me this is correct, don't we need to fill in valid values?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

We need to ignore invalid positions but set values for valid ones.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, i follow the style of the normal operation, to ignore invalid positions, and clamp inputs to correct values.

However, this is inconsistent with the behaviour of core Java, where positions gets validated, and errors out if they are out of range.

return;
}
Comment thread
Baunsgaard marked this conversation as resolved.
Outdated

int32_t start_key = Key(pos_start);
int32_t end_key = Key(pos_end - 1);
impl_->AllocateBitmapsIfNeeded(end_key + 1);

if (start_key == end_key) {
Comment thread
Baunsgaard marked this conversation as resolved.
Outdated
uint64_t low_start = Pos32Bits(pos_start);
uint64_t low_end = static_cast<uint64_t>(Pos32Bits(pos_end - 1)) + 1;
impl_->bitmaps[start_key].addRange(low_start, low_end);
} else {
uint64_t first_low_start = Pos32Bits(pos_start);
impl_->bitmaps[start_key].addRange(first_low_start, uint64_t{1} << 32);

for (int32_t key = start_key + 1; key < end_key; ++key) {
impl_->bitmaps[key].addRange(uint64_t{0}, uint64_t{1} << 32);
}

uint64_t last_low_end = static_cast<uint64_t>(Pos32Bits(pos_end - 1)) + 1;
impl_->bitmaps[end_key].addRange(uint64_t{0}, last_low_end);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/iceberg/deletes/roaring_position_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class ICEBERG_EXPORT RoaringPositionBitmap {
void Add(int64_t pos);

/// \brief Sets a range of positions [pos_start, pos_end).
/// If pos_start >= pos_end, this method does nothing.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This contradicts the note sentence below.

/// \note Invalid positions are silently ignored
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rephrase may be state pos_start >= pos_end is considered invalid.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have made it invalid if pos_start > pos_end, technically I can imagine cases where pos_start == pos_end, therefore i keep that as a no-op.

Let me know what you think.

void AddRange(int64_t pos_start, int64_t pos_end);

Expand Down
69 changes: 68 additions & 1 deletion src/iceberg/test/roaring_position_bitmap_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,72 @@ INSTANTIATE_TEST_SUITE_P(
.start = (int64_t{1} << 32) - 5,
.end = (int64_t{1} << 32) + 5,
.absent_positions = {0, (int64_t{1} << 32) + 5},
},
AddRangeParams{
.name = "single_position",
.start = 42,
.end = 43,
.absent_positions = {41, 43},
}),
[](const ::testing::TestParamInfo<AddRangeParams>& info) { return info.param.name; });

TEST(RoaringPositionBitmapTest, TestAddRangeLargeContiguous) {
RoaringPositionBitmap bitmap;
bitmap.AddRange(500, 200500);

ASSERT_EQ(bitmap.Cardinality(), 200000u);
ASSERT_TRUE(bitmap.Contains(500));
ASSERT_TRUE(bitmap.Contains(200499));
ASSERT_FALSE(bitmap.Contains(499));
ASSERT_FALSE(bitmap.Contains(200500));
}

TEST(RoaringPositionBitmapTest, TestAddRangeSpanningThreeKeys) {
RoaringPositionBitmap bitmap;

int64_t start = (int64_t{0} << 32) | int64_t{0xFFFFFFF0};
int64_t end = (int64_t{2} << 32) | int64_t{0x10};
bitmap.AddRange(start, end);

ASSERT_EQ(bitmap.Cardinality(), static_cast<size_t>(end - start));
ASSERT_TRUE(bitmap.Contains(start));
ASSERT_TRUE(bitmap.Contains(end - 1));
ASSERT_FALSE(bitmap.Contains(start - 1));
ASSERT_FALSE(bitmap.Contains(end));
ASSERT_TRUE(bitmap.Contains(int64_t{1} << 32));
// Verify a sample near the end of the middle key is also present
ASSERT_TRUE(bitmap.Contains((int64_t{1} << 32) | int64_t{0xFFFFFFF0}));
}

TEST(RoaringPositionBitmapTest, TestAddRangeInvalidNegativeStartIsNoOp) {
RoaringPositionBitmap bitmap;
bitmap.AddRange(-1, 10);
ASSERT_TRUE(bitmap.IsEmpty());
ASSERT_EQ(bitmap.Cardinality(), 0u);
}

TEST(RoaringPositionBitmapTest, TestAddRangeInvalidBeyondMaxPositionIsNoOp) {
RoaringPositionBitmap bitmap;
bitmap.AddRange(0, RoaringPositionBitmap::kMaxPosition + 2);
ASSERT_TRUE(bitmap.IsEmpty());
ASSERT_EQ(bitmap.Cardinality(), 0u);
}

TEST(RoaringPositionBitmapTest, TestAddRangeReversedIsNoOp) {
RoaringPositionBitmap bitmap;
bitmap.AddRange(100, 50);
ASSERT_TRUE(bitmap.IsEmpty());
ASSERT_EQ(bitmap.Cardinality(), 0u);
}

TEST(RoaringPositionBitmapTest, TestAddRangeEqualStartEndIsNoOp) {
RoaringPositionBitmap bitmap;
bitmap.AddRange(100, 100);
ASSERT_TRUE(bitmap.IsEmpty());
ASSERT_EQ(bitmap.Cardinality(), 0u);
ASSERT_FALSE(bitmap.Contains(100));
}
Comment thread
Baunsgaard marked this conversation as resolved.
Outdated

struct OrParams {
const char* name;
std::set<int64_t> lhs_input;
Expand Down Expand Up @@ -398,7 +461,11 @@ TEST(RoaringPositionBitmapTest, TestIsEmpty) {

TEST(RoaringPositionBitmapTest, TestOptimize) {
RoaringPositionBitmap bitmap;
bitmap.AddRange(0, 10000);
// Use Add() instead of AddRange() because addRange() creates run-length
// encoded containers directly, leaving nothing for Optimize() to compress.
for (int64_t i = 0; i < 10000; ++i) {
bitmap.Add(i);
}
size_t size_before = bitmap.SerializedSizeInBytes();

bool changed = bitmap.Optimize();
Expand Down
Loading