Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set(ICEBERG_SOURCES
data/writer.cc
delete_file_index.cc
deletes/roaring_position_bitmap.cc
deletes/position_delete_index.cc
expression/aggregate.cc
expression/binder.cc
expression/evaluator.cc
Expand Down
42 changes: 42 additions & 0 deletions src/iceberg/deletes/position_delete_index.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/deletes/position_delete_index.h"

namespace iceberg {

void PositionDeleteIndex::Delete(int64_t pos) { bitmap_.Add(pos); }

void PositionDeleteIndex::Delete(int64_t pos_start, int64_t pos_end) {
bitmap_.AddRange(pos_start, pos_end);
}

bool PositionDeleteIndex::IsDeleted(int64_t pos) const { return bitmap_.Contains(pos); }

bool PositionDeleteIndex::IsEmpty() const { return bitmap_.IsEmpty(); }

int64_t PositionDeleteIndex::Cardinality() const {
return static_cast<int64_t>(bitmap_.Cardinality());
}

void PositionDeleteIndex::Merge(const PositionDeleteIndex& other) {
bitmap_.Or(other.bitmap_);
}

} // namespace iceberg
71 changes: 71 additions & 0 deletions src/iceberg/deletes/position_delete_index.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

/// \file iceberg/deletes/position_delete_index.h
/// Index of deleted row positions for a data file.

#include <cstdint>
#include <memory>

#include "iceberg/deletes/roaring_position_bitmap.h"
#include "iceberg/iceberg_export.h"

namespace iceberg {

/// \brief Tracks deleted row positions using a bitmap.
///
/// This class provides a domain-specific API for position deletes
/// in Iceberg MOR (merge-on-read) tables. Positions are 0-based
/// row indices within a data file.
class ICEBERG_EXPORT PositionDeleteIndex {
public:
PositionDeleteIndex() = default;
~PositionDeleteIndex() = default;

/// \brief Mark a position as deleted.
/// \param pos The 0-based row position to delete
void Delete(int64_t pos);

/// \brief Mark a range of positions as deleted [pos_start, pos_end).
/// \param pos_start Start position (inclusive)
/// \param pos_end End position (exclusive)
void Delete(int64_t pos_start, int64_t pos_end);

/// \brief Check if a position is deleted.
/// \param pos The 0-based row position to check
/// \return true if the position is deleted, false otherwise
bool IsDeleted(int64_t pos) const;

/// \brief Check if the index is empty (no positions deleted).
bool IsEmpty() const;

/// \brief Get the number of deleted positions.
int64_t Cardinality() const;

/// \brief Merge another index into this one.
/// \param other The index to merge (union operation)
void Merge(const PositionDeleteIndex& other);

private:
RoaringPositionBitmap bitmap_;
};

} // namespace iceberg
18 changes: 10 additions & 8 deletions src/iceberg/deletes/roaring_position_bitmap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,24 +117,26 @@ RoaringPositionBitmap& RoaringPositionBitmap::operator=(
RoaringPositionBitmap::RoaringPositionBitmap(std::unique_ptr<Impl> impl)
: impl_(std::move(impl)) {}

Status RoaringPositionBitmap::Add(int64_t pos) {
ICEBERG_RETURN_UNEXPECTED(ValidatePosition(pos));
void RoaringPositionBitmap::Add(int64_t pos) {
if (pos < 0 || pos > kMaxPosition) {
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@emkornfield I've changed this to not fail on invalid inputs.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm OK with this, i think this case is a little bit less clear then the read side though.

return; // Silently ignore invalid positions
}
int32_t key = Key(pos);
uint32_t pos32 = Pos32Bits(pos);
impl_->AllocateBitmapsIfNeeded(key + 1);
impl_->bitmaps[key].add(pos32);
return {};
}

Status RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
void RoaringPositionBitmap::AddRange(int64_t pos_start, int64_t pos_end) {
for (int64_t pos = pos_start; pos < pos_end; ++pos) {
ICEBERG_RETURN_UNEXPECTED(Add(pos));
Add(pos);
}
return {};
}

Result<bool> RoaringPositionBitmap::Contains(int64_t pos) const {
ICEBERG_RETURN_UNEXPECTED(ValidatePosition(pos));
bool RoaringPositionBitmap::Contains(int64_t pos) const {
if (pos < 0 || pos > kMaxPosition) {
return false; // Invalid positions are not contained
}
int32_t key = Key(pos);
uint32_t pos32 = Pos32Bits(pos);
return std::cmp_less(key, impl_->bitmaps.size()) && impl_->bitmaps[key].contains(pos32);
Expand Down
13 changes: 7 additions & 6 deletions src/iceberg/deletes/roaring_position_bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,16 +61,17 @@ class ICEBERG_EXPORT RoaringPositionBitmap {

/// \brief Sets a position in the bitmap.
/// \param pos the position (must be >= 0 and <= kMaxPosition)
/// \return Status indicating success or InvalidArgument error
[[nodiscard]] Status Add(int64_t pos);
/// \note Invalid positions are silently ignored
void Add(int64_t pos);

/// \brief Sets a range of positions [pos_start, pos_end).
/// \return Status indicating success or InvalidArgument error
[[nodiscard]] Status AddRange(int64_t pos_start, int64_t pos_end);
/// \note Invalid positions are silently ignored
void AddRange(int64_t pos_start, int64_t pos_end);

/// \brief Checks if a position is set in the bitmap.
/// \return Result<bool> or InvalidArgument error
[[nodiscard]] Result<bool> Contains(int64_t pos) const;
/// \param pos the position to check
/// \return true if the position is set, false otherwise (including invalid positions)
bool Contains(int64_t pos) const;

/// \brief Returns true if the bitmap has no positions set.
bool IsEmpty() const;
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ add_iceberg_test(util_test
formatter_test.cc
location_util_test.cc
roaring_position_bitmap_test.cc
position_delete_index_test.cc
string_util_test.cc
transform_util_test.cc
truncate_util_test.cc
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ iceberg_tests = {
'endian_test.cc',
'formatter_test.cc',
'location_util_test.cc',
'position_delete_index_test.cc',
'roaring_position_bitmap_test.cc',
'string_util_test.cc',
'transform_util_test.cc',
Expand Down
Loading
Loading