Skip to content

Commit cd04a9d

Browse files
committed
feat: add position delete index
1 parent 133742d commit cd04a9d

File tree

9 files changed

+399
-83
lines changed

9 files changed

+399
-83
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ set(ICEBERG_SOURCES
2626
data/writer.cc
2727
delete_file_index.cc
2828
deletes/roaring_position_bitmap.cc
29+
deletes/position_delete_index.cc
2930
expression/aggregate.cc
3031
expression/binder.cc
3132
expression/evaluator.cc
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/deletes/position_delete_index.h"
21+
22+
namespace iceberg {
23+
24+
void PositionDeleteIndex::Delete(int64_t pos) { bitmap_.Add(pos); }
25+
26+
void PositionDeleteIndex::Delete(int64_t pos_start, int64_t pos_end) {
27+
bitmap_.AddRange(pos_start, pos_end);
28+
}
29+
30+
bool PositionDeleteIndex::IsDeleted(int64_t pos) const { return bitmap_.Contains(pos); }
31+
32+
bool PositionDeleteIndex::IsEmpty() const { return bitmap_.IsEmpty(); }
33+
34+
int64_t PositionDeleteIndex::Cardinality() const {
35+
return static_cast<int64_t>(bitmap_.Cardinality());
36+
}
37+
38+
void PositionDeleteIndex::Merge(const PositionDeleteIndex& other) {
39+
bitmap_.Or(other.bitmap_);
40+
}
41+
42+
} // namespace iceberg
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
/// \file iceberg/deletes/position_delete_index.h
23+
/// Index of deleted row positions for a data file.
24+
25+
#include <cstdint>
26+
#include <memory>
27+
28+
#include "iceberg/deletes/roaring_position_bitmap.h"
29+
#include "iceberg/iceberg_export.h"
30+
31+
namespace iceberg {
32+
33+
/// \brief Tracks deleted row positions using a bitmap.
34+
///
35+
/// This class provides a domain-specific API for position deletes
36+
/// in Iceberg MOR (merge-on-read) tables. Positions are 0-based
37+
/// row indices within a data file.
38+
class ICEBERG_EXPORT PositionDeleteIndex {
39+
public:
40+
PositionDeleteIndex() = default;
41+
~PositionDeleteIndex() = default;
42+
43+
/// \brief Mark a position as deleted.
44+
/// \param pos The 0-based row position to delete
45+
void Delete(int64_t pos);
46+
47+
/// \brief Mark a range of positions as deleted [pos_start, pos_end).
48+
/// \param pos_start Start position (inclusive)
49+
/// \param pos_end End position (exclusive)
50+
void Delete(int64_t pos_start, int64_t pos_end);
51+
52+
/// \brief Check if a position is deleted.
53+
/// \param pos The 0-based row position to check
54+
/// \return true if the position is deleted, false otherwise
55+
bool IsDeleted(int64_t pos) const;
56+
57+
/// \brief Check if the index is empty (no positions deleted).
58+
bool IsEmpty() const;
59+
60+
/// \brief Get the number of deleted positions.
61+
int64_t Cardinality() const;
62+
63+
/// \brief Merge another index into this one.
64+
/// \param other The index to merge (union operation)
65+
void Merge(const PositionDeleteIndex& other);
66+
67+
private:
68+
RoaringPositionBitmap bitmap_;
69+
};
70+
71+
} // namespace iceberg

src/iceberg/deletes/roaring_position_bitmap.cc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -117,24 +117,26 @@ RoaringPositionBitmap& RoaringPositionBitmap::operator=(
117117
RoaringPositionBitmap::RoaringPositionBitmap(std::unique_ptr<Impl> impl)
118118
: impl_(std::move(impl)) {}
119119

120-
Status RoaringPositionBitmap::Add(int64_t pos) {
121-
ICEBERG_RETURN_UNEXPECTED(ValidatePosition(pos));
120+
void RoaringPositionBitmap::Add(int64_t pos) {
121+
if (pos < 0 || pos > kMaxPosition) {
122+
return; // Silently ignore invalid positions
123+
}
122124
int32_t key = Key(pos);
123125
uint32_t pos32 = Pos32Bits(pos);
124126
impl_->AllocateBitmapsIfNeeded(key + 1);
125127
impl_->bitmaps[key].add(pos32);
126-
return {};
127128
}
128129

129-
Status RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
130+
void RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
130131
for (int64_t pos = pos_start; pos < pos_end; ++pos) {
131-
ICEBERG_RETURN_UNEXPECTED(Add(pos));
132+
Add(pos);
132133
}
133-
return {};
134134
}
135135

136-
Result<bool> RoaringPositionBitmap::Contains(int64_t pos) const {
137-
ICEBERG_RETURN_UNEXPECTED(ValidatePosition(pos));
136+
bool RoaringPositionBitmap::Contains(int64_t pos) const {
137+
if (pos < 0 || pos > kMaxPosition) {
138+
return false; // Invalid positions are not contained
139+
}
138140
int32_t key = Key(pos);
139141
uint32_t pos32 = Pos32Bits(pos);
140142
return std::cmp_less(key, impl_->bitmaps.size()) && impl_->bitmaps[key].contains(pos32);

src/iceberg/deletes/roaring_position_bitmap.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,17 @@ class ICEBERG_EXPORT RoaringPositionBitmap {
6161

6262
/// \brief Sets a position in the bitmap.
6363
/// \param pos the position (must be >= 0 and <= kMaxPosition)
64-
/// \return Status indicating success or InvalidArgument error
65-
[[nodiscard]] Status Add(int64_t pos);
64+
/// \note Invalid positions are silently ignored
65+
void Add(int64_t pos);
6666

6767
/// \brief Sets a range of positions [pos_start, pos_end).
68-
/// \return Status indicating success or InvalidArgument error
69-
[[nodiscard]] Status AddRange(int64_t pos_start, int64_t pos_end);
68+
/// \note Invalid positions are silently ignored
69+
void AddRange(int64_t pos_start, int64_t pos_end);
7070

7171
/// \brief Checks if a position is set in the bitmap.
72-
/// \return Result<bool> or InvalidArgument error
73-
[[nodiscard]] Result<bool> Contains(int64_t pos) const;
72+
/// \param pos the position to check
73+
/// \return true if the position is set, false otherwise (including invalid positions)
74+
bool Contains(int64_t pos) const;
7475

7576
/// \brief Returns true if the bitmap has no positions set.
7677
bool IsEmpty() const;

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ add_iceberg_test(util_test
116116
formatter_test.cc
117117
location_util_test.cc
118118
roaring_position_bitmap_test.cc
119+
position_delete_index_test.cc
119120
string_util_test.cc
120121
transform_util_test.cc
121122
truncate_util_test.cc

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ iceberg_tests = {
9090
'endian_test.cc',
9191
'formatter_test.cc',
9292
'location_util_test.cc',
93+
'position_delete_index_test.cc',
9394
'roaring_position_bitmap_test.cc',
9495
'string_util_test.cc',
9596
'transform_util_test.cc',

0 commit comments

Comments
 (0)