Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ set(ICEBERG_SOURCES
json_internal.cc
manifest/manifest_adapter.cc
manifest/manifest_entry.cc
manifest/manifest_group.cc
manifest/manifest_list.cc
manifest/manifest_reader.cc
manifest/manifest_writer.cc
Expand Down
12 changes: 12 additions & 0 deletions src/iceberg/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,24 @@

#pragma once

/// \file iceberg/constants.h
/// This file defines constants used commonly and shared across multiple
/// source files. It is mostly useful to add constants that are used as
/// default values in the class definitions in the header files without
/// including other headers just for the constant definitions.

#include <cstdint>
#include <string_view>

namespace iceberg {

constexpr std::string_view kParquetFieldIdKey = "PARQUET:field_id";
constexpr int64_t kInvalidSnapshotId = -1;
/// \brief Stand-in for the current sequence number that will be assigned when the commit
/// is successful. This is replaced when writing a manifest list by the ManifestFile
/// wrapper
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: wrapper -> adapter, which matches the code better.

constexpr int64_t kUnassignedSequenceNumber = -1;

// TODO(gangwu): move other commonly used constants here.

} // namespace iceberg
36 changes: 15 additions & 21 deletions src/iceberg/delete_file_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -453,34 +453,32 @@ Result<std::shared_ptr<DataFile>> DeleteFileIndex::FindDV(
}

Result<DeleteFileIndex::Builder> DeleteFileIndex::BuilderFor(
std::shared_ptr<FileIO> io, std::vector<ManifestFile> delete_manifests) {
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests) {
ICEBERG_PRECHECK(io != nullptr, "FileIO cannot be null");
return Builder(std::move(io), std::move(delete_manifests));
ICEBERG_PRECHECK(schema != nullptr, "Schema cannot be null");
ICEBERG_PRECHECK(!specs_by_id.empty(), "Partition specs cannot be empty");
return Builder(std::move(io), std::move(schema), std::move(specs_by_id),
std::move(delete_manifests));
}

// Builder implementation

DeleteFileIndex::Builder::Builder(std::shared_ptr<FileIO> io,
std::vector<ManifestFile> delete_manifests)
: io_(std::move(io)), delete_manifests_(std::move(delete_manifests)) {}
DeleteFileIndex::Builder::Builder(
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests)
: io_(std::move(io)),
schema_(std::move(schema)),
specs_by_id_(std::move(specs_by_id)),
delete_manifests_(std::move(delete_manifests)) {}

DeleteFileIndex::Builder::~Builder() = default;
DeleteFileIndex::Builder::Builder(Builder&&) noexcept = default;
DeleteFileIndex::Builder& DeleteFileIndex::Builder::operator=(Builder&&) noexcept =
default;

DeleteFileIndex::Builder& DeleteFileIndex::Builder::SpecsById(
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id) {
specs_by_id_ = std::move(specs_by_id);
return *this;
}

DeleteFileIndex::Builder& DeleteFileIndex::Builder::WithSchema(
std::shared_ptr<Schema> schema) {
schema_ = std::move(schema);
return *this;
}

DeleteFileIndex::Builder& DeleteFileIndex::Builder::AfterSequenceNumber(int64_t seq) {
min_sequence_number_ = seq;
return *this;
Expand Down Expand Up @@ -721,10 +719,6 @@ Status DeleteFileIndex::Builder::AddEqualityDelete(

Result<std::unique_ptr<DeleteFileIndex>> DeleteFileIndex::Builder::Build() {
ICEBERG_RETURN_UNEXPECTED(CheckErrors());
ICEBERG_PRECHECK(io_ != nullptr, "FileIO is required to load delete files");
ICEBERG_PRECHECK(schema_ != nullptr, "Schema is required to load delete files");
ICEBERG_PRECHECK(!specs_by_id_.empty(),
"Partition specs are required to load delete files");

std::vector<ManifestEntry> entries;
if (!delete_manifests_.empty()) {
Expand Down
25 changes: 11 additions & 14 deletions src/iceberg/delete_file_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,14 @@ class ICEBERG_EXPORT DeleteFileIndex {
/// \brief Create a builder for constructing a DeleteFileIndex from manifest files.
///
/// \param io The FileIO to use for reading manifests
/// \param schema Current table schema
/// \param specs_by_id Partition specs by their IDs
/// \param delete_manifests The delete manifests to index
/// \return A Builder instance
static Result<Builder> BuilderFor(std::shared_ptr<FileIO> io,
std::vector<ManifestFile> delete_manifests);
static Result<Builder> BuilderFor(
std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests);

private:
friend class Builder;
Expand Down Expand Up @@ -318,7 +322,9 @@ class ICEBERG_EXPORT DeleteFileIndex {
class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
public:
/// \brief Construct a builder from manifest files.
Builder(std::shared_ptr<FileIO> io, std::vector<ManifestFile> delete_manifests);
Builder(std::shared_ptr<FileIO> io, std::shared_ptr<Schema> schema,
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id,
std::vector<ManifestFile> delete_manifests);

~Builder() override;

Expand All @@ -327,15 +333,6 @@ class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
Builder(const Builder&) = delete;
Builder& operator=(const Builder&) = delete;

/// \brief Set the partition specs by ID.
Builder& SpecsById(
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id);

/// \brief Set the table schema.
///
/// Required for filtering and expression evaluation.
Builder& WithSchema(std::shared_ptr<Schema> schema);

/// \brief Set the minimum sequence number for delete files.
///
/// Only delete files with sequence number > min_sequence_number will be included.
Expand Down Expand Up @@ -384,10 +381,10 @@ class ICEBERG_EXPORT DeleteFileIndex::Builder : public ErrorCollector {
ManifestEntry&& entry);

std::shared_ptr<FileIO> io_;
std::shared_ptr<Schema> schema_;
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
std::vector<ManifestFile> delete_manifests_;
int64_t min_sequence_number_ = 0;
std::unordered_map<int32_t, std::shared_ptr<PartitionSpec>> specs_by_id_;
std::shared_ptr<Schema> schema_;
std::shared_ptr<Expression> data_filter_;
std::shared_ptr<Expression> partition_filter_;
std::shared_ptr<PartitionSet> partition_set_;
Expand Down
Loading
Loading