Skip to content

Commit 5daadd5

Browse files
committed
fix some issues and use std::string_view wherever possible
1 parent 974cd40 commit 5daadd5

9 files changed

Lines changed: 160 additions & 137 deletions

File tree

src/iceberg/partition_spec.cc

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "iceberg/partition_spec.h"
2121

2222
#include <algorithm>
23+
#include <cstddef>
2324
#include <cstdint>
2425
#include <format>
2526
#include <memory>
@@ -95,9 +96,25 @@ Result<std::unique_ptr<StructType>> PartitionSpec::PartitionType(
9596
return std::make_unique<StructType>(std::move(partition_fields));
9697
}
9798

98-
bool PartitionSpec::SameSpec(const PartitionSpec& other) const {
99-
return fields_ == other.fields_ &&
100-
last_assigned_field_id_ == other.last_assigned_field_id_;
99+
bool PartitionSpec::CompatibleWith(const PartitionSpec& other) const {
100+
if (Equals(other)) {
101+
return true;
102+
}
103+
104+
if (fields_.size() != other.fields_.size()) {
105+
return false;
106+
}
107+
108+
for (const auto& [lhs, rhs] :
109+
std::ranges::zip_view<std::span<const PartitionField>,
110+
std::span<const PartitionField>>{fields_, other.fields_}) {
111+
if (lhs.source_id() != rhs.source_id() || *lhs.transform() != *rhs.transform() ||
112+
lhs.name() != rhs.name()) {
113+
return false;
114+
}
115+
}
116+
117+
return true;
101118
}
102119

103120
std::string PartitionSpec::ToString() const {
@@ -196,4 +213,13 @@ Result<std::unique_ptr<PartitionSpec>> PartitionSpec::Make(
196213
new PartitionSpec(spec_id, std::move(fields), last_assigned_field_id));
197214
}
198215

216+
bool PartitionSpec::HasSequentialFieldIds(const PartitionSpec& spec) {
217+
for (size_t i = 0; i < spec.fields().size(); i += 1) {
218+
if (spec.fields()[i].field_id() != PartitionSpec::kLegacyPartitionDataIdStart + i) {
219+
return false;
220+
}
221+
}
222+
return true;
223+
}
224+
199225
} // namespace iceberg

src/iceberg/partition_spec.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ class ICEBERG_EXPORT PartitionSpec : public util::Formattable {
6464
/// \brief Get the partition type binding to the input schema.
6565
Result<std::unique_ptr<StructType>> PartitionType(const Schema& schema) const;
6666

67-
/// \brief Checks whether this partition spec is equivalent to another partition spec
68-
/// while ignoring the spec id.
69-
bool SameSpec(const PartitionSpec& other) const;
67+
/// \brief Returns true if this spec is equivalent to the other, with partition field
68+
/// ids ignored. That is, if both specs have the same number of fields, field order,
69+
/// field name, source columns, and transforms.
70+
bool CompatibleWith(const PartitionSpec& other) const;
7071

7172
std::string ToString() const override;
7273

@@ -115,6 +116,8 @@ class ICEBERG_EXPORT PartitionSpec : public util::Formattable {
115116
int32_t spec_id, std::vector<PartitionField> fields,
116117
std::optional<int32_t> last_assigned_field_id = std::nullopt);
117118

119+
static bool HasSequentialFieldIds(const PartitionSpec& spec);
120+
118121
private:
119122
/// \brief Create a new partition spec.
120123
///

src/iceberg/table_metadata.cc

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,15 @@
3737
#include "iceberg/exception.h"
3838
#include "iceberg/file_io.h"
3939
#include "iceberg/json_internal.h"
40+
#include "iceberg/partition_field.h"
4041
#include "iceberg/partition_spec.h"
4142
#include "iceberg/result.h"
4243
#include "iceberg/schema.h"
4344
#include "iceberg/snapshot.h"
4445
#include "iceberg/sort_order.h"
4546
#include "iceberg/table_properties.h"
4647
#include "iceberg/table_update.h"
48+
#include "iceberg/util/checked_cast.h"
4749
#include "iceberg/util/error_collector.h"
4850
#include "iceberg/util/gzip_internal.h"
4951
#include "iceberg/util/location_util.h"
@@ -547,9 +549,10 @@ Result<int32_t> TableMetadataBuilder::Impl::AddSortOrder(const SortOrder& order)
547549
bool is_new_order =
548550
last_added_order_id_.has_value() &&
549551
std::ranges::find_if(changes_, [new_order_id](const auto& change) {
550-
auto* add_sort_order = dynamic_cast<table::AddSortOrder*>(change.get());
551-
return add_sort_order &&
552-
add_sort_order->sort_order()->order_id() == new_order_id;
552+
return change->kind() == TableUpdate::Kind::kAddSortOrder &&
553+
internal::checked_cast<const table::AddSortOrder&>(*change)
554+
.sort_order()
555+
->order_id() == new_order_id;
553556
}) != changes_.cend();
554557
last_added_order_id_ = is_new_order ? std::make_optional(new_order_id) : std::nullopt;
555558
return new_order_id;
@@ -582,52 +585,63 @@ Result<int32_t> TableMetadataBuilder::Impl::AddSortOrder(const SortOrder& order)
582585
Status TableMetadataBuilder::Impl::SetDefaultPartitionSpec(int32_t spec_id) {
583586
if (spec_id == -1) {
584587
if (!last_added_spec_id_.has_value()) {
585-
return InvalidArgument(
588+
return ValidationFailed(
586589
"Cannot set last added partition spec: no partition spec has been added");
587590
}
588591
return SetDefaultPartitionSpec(last_added_spec_id_.value());
589592
}
590593

591594
if (spec_id == metadata_.default_spec_id) {
595+
// the new spec is already current and no change is needed
592596
return {};
593597
}
594598

595599
metadata_.default_spec_id = spec_id;
596-
597-
changes_.push_back(std::make_unique<table::SetDefaultPartitionSpec>(spec_id));
600+
if (last_added_spec_id_ == std::make_optional(spec_id)) {
601+
changes_.push_back(std::make_unique<table::SetDefaultPartitionSpec>(kLastAdded));
602+
} else {
603+
changes_.push_back(std::make_unique<table::SetDefaultPartitionSpec>(spec_id));
604+
}
598605
return {};
599606
}
600607

601608
Result<int32_t> TableMetadataBuilder::Impl::AddPartitionSpec(const PartitionSpec& spec) {
602609
int32_t new_spec_id = ReuseOrCreateNewPartitionSpecId(spec);
603610

604-
if (specs_by_id_.find(new_spec_id) != specs_by_id_.end()) {
611+
if (specs_by_id_.contains(new_spec_id)) {
605612
// update last_added_spec_id if the spec was added in this set of changes (since it
606613
// is now the last)
607-
bool is_new_spec = last_added_spec_id_.has_value() &&
608-
std::ranges::find_if(changes_, [new_spec_id](const auto& change) {
609-
auto* add_spec =
610-
dynamic_cast<table::AddPartitionSpec*>(change.get());
611-
return add_spec && add_spec->spec()->spec_id() == new_spec_id;
612-
}) != changes_.cend();
614+
bool is_new_spec =
615+
last_added_spec_id_.has_value() &&
616+
std::ranges::find_if(changes_, [new_spec_id](const auto& change) {
617+
return change->kind() == TableUpdate::Kind::kAddPartitionSpec &&
618+
internal::checked_cast<const table::AddPartitionSpec&>(*change)
619+
.spec()
620+
->spec_id() == new_spec_id;
621+
}) != changes_.cend();
613622
last_added_spec_id_ = is_new_spec ? std::make_optional(new_spec_id) : std::nullopt;
614623
return new_spec_id;
615624
}
616625

617626
// Get current schema and validate the partition spec against it
618627
ICEBERG_ASSIGN_OR_RAISE(auto schema, metadata_.Schema());
619628
ICEBERG_RETURN_UNEXPECTED(spec.Validate(*schema, /*allow_missing_fields=*/false));
629+
ICEBERG_CHECK(
630+
metadata_.format_version > 1 || PartitionSpec::HasSequentialFieldIds(spec),
631+
"Spec does not use sequential IDs that are required in v1: {}", spec.ToString());
620632

621-
std::shared_ptr<PartitionSpec> new_spec;
622633
ICEBERG_ASSIGN_OR_RAISE(
623-
new_spec,
634+
std::shared_ptr<PartitionSpec> new_spec,
624635
PartitionSpec::Make(new_spec_id, std::vector<PartitionField>(spec.fields().begin(),
625636
spec.fields().end())));
637+
metadata_.last_partition_id =
638+
std::max(metadata_.last_partition_id, new_spec->last_assigned_field_id());
626639
metadata_.partition_specs.push_back(new_spec);
627640
specs_by_id_.emplace(new_spec_id, new_spec);
628641

629642
changes_.push_back(std::make_unique<table::AddPartitionSpec>(new_spec));
630643
last_added_spec_id_ = new_spec_id;
644+
631645
return new_spec_id;
632646
}
633647

@@ -714,10 +728,10 @@ int32_t TableMetadataBuilder::Impl::ReuseOrCreateNewSortOrderId(
714728

715729
int32_t TableMetadataBuilder::Impl::ReuseOrCreateNewPartitionSpecId(
716730
const PartitionSpec& new_spec) {
717-
// determine the next spec id
731+
// if the spec already exists, use the same ID. otherwise, use the highest ID + 1.
718732
int32_t new_spec_id = PartitionSpec::kInitialSpecId;
719733
for (const auto& spec : metadata_.partition_specs) {
720-
if (spec->SameSpec(new_spec)) {
734+
if (new_spec.CompatibleWith(*spec)) {
721735
return spec->spec_id();
722736
} else if (new_spec_id <= spec->spec_id()) {
723737
new_spec_id = spec->spec_id() + 1;

src/iceberg/transaction.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,10 @@ Status Transaction::Apply(PendingUpdate& update) {
8989
case PendingUpdate::Kind::kUpdatePartitionSpec: {
9090
auto& update_partition_spec = internal::checked_cast<UpdatePartitionSpec&>(update);
9191
ICEBERG_ASSIGN_OR_RAISE(auto result, update_partition_spec.Apply());
92-
93-
metadata_builder_->SetDefaultPartitionSpec(result.spec);
9492
if (result.set_as_default) {
95-
metadata_builder_->SetDefaultPartitionSpec(result.spec);
93+
metadata_builder_->SetDefaultPartitionSpec(std::move(result.spec));
9694
} else {
97-
metadata_builder_->AddPartitionSpec(result.spec);
95+
metadata_builder_->AddPartitionSpec(std::move(result.spec));
9896
}
9997
} break;
10098
default:

src/iceberg/type_fwd.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,12 @@ struct TableMetadata;
123123

124124
/// \brief Expression.
125125
class BoundPredicate;
126+
class BoundReference;
127+
class BoundTransform;
126128
class Expression;
127129
class Literal;
130+
class Term;
128131
class UnboundPredicate;
129-
class BoundReference;
130-
class BoundTransform;
131-
template <typename B>
132-
class UnboundTerm;
133-
class NamedReference;
134-
class UnboundTransform;
135132

136133
/// \brief Scan.
137134
class DataTableScan;

0 commit comments

Comments
 (0)