Skip to content

Commit 012079a

Browse files
committed
resolve some comments
1 parent 0e5c5b2 commit 012079a

6 files changed

Lines changed: 48 additions & 45 deletions

File tree

src/iceberg/partition_spec.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,8 @@ Status PartitionSpec::ValidatePartitionName(const Schema& schema) const {
159159
std::unordered_set<std::string> partition_names;
160160
for (const auto& partition_field : fields_) {
161161
auto name = std::string(partition_field.name());
162-
if (name.empty()) {
163-
return InvalidArgument("Cannot use empty partition name: {}", name);
164-
}
162+
ICEBERG_PRECHECK(!name.empty(), "Cannot use empty partition name: {}", name);
163+
165164
if (partition_names.contains(name)) {
166165
return InvalidArgument("Cannot use partition name more than once: {}", name);
167166
}

src/iceberg/schema.cc

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@
2525
#include "iceberg/result.h"
2626
#include "iceberg/row/struct_like.h"
2727
#include "iceberg/schema_internal.h"
28+
#include "iceberg/table_metadata.h"
2829
#include "iceberg/type.h"
2930
#include "iceberg/util/formatter.h" // IWYU pragma: keep
3031
#include "iceberg/util/macros.h"
3132
#include "iceberg/util/type_util.h"
3233
#include "iceberg/util/visit_type.h"
33-
#include "table_metadata.h"
3434

3535
namespace iceberg {
3636

@@ -148,6 +148,20 @@ Result<std::unordered_map<int32_t, std::vector<size_t>>> Schema::InitIdToPositio
148148
return visitor.Finish();
149149
}
150150

151+
Result<int32_t> Schema::InitHighestFieldId(const Schema& self) {
152+
ICEBERG_ASSIGN_OR_RAISE(auto id_to_field, self.id_to_field_.Get(self));
153+
154+
if (id_to_field.get().empty()) {
155+
return kInitialColumnId;
156+
}
157+
158+
auto max_it = std::ranges::max_element(
159+
id_to_field.get(),
160+
[](const auto& lhs, const auto& rhs) { return lhs.first < rhs.first; });
161+
162+
return max_it->first;
163+
}
164+
151165
Result<std::unique_ptr<StructLikeAccessor>> Schema::GetAccessorById(
152166
int32_t field_id) const {
153167
ICEBERG_ASSIGN_OR_RAISE(auto id_to_position_path, id_to_position_path_.Get(*this));
@@ -229,22 +243,12 @@ Result<std::vector<std::string>> Schema::IdentifierFieldNames() const {
229243
return names;
230244
}
231245

232-
Result<int32_t> Schema::HighestFieldId() const {
233-
ICEBERG_ASSIGN_OR_RAISE(auto id_to_field, id_to_field_.Get(*this));
234-
235-
if (id_to_field.get().empty()) {
236-
return kInitialColumnId;
237-
}
246+
Result<int32_t> Schema::HighestFieldId() const { return highest_field_id_.Get(*this); }
238247

239-
auto max_it = std::ranges::max_element(
240-
id_to_field.get(),
241-
[](const auto& lhs, const auto& rhs) { return lhs.first < rhs.first; });
242-
243-
return max_it->first;
248+
bool Schema::SameSchema(const Schema& other) const {
249+
return fields_ == other.fields_ && identifier_field_ids_ == other.identifier_field_ids_;
244250
}
245251

246-
bool Schema::SameSchema(const Schema& other) const { return fields_ == other.fields_; }
247-
248252
Status Schema::Validate(int32_t format_version) const {
249253
// Get all fields including nested ones
250254
ICEBERG_ASSIGN_OR_RAISE(auto id_to_field, id_to_field_.Get(*this));

src/iceberg/schema.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ class ICEBERG_EXPORT Schema : public StructType {
173173
InitLowerCaseNameToIdMap(const Schema&);
174174
static Result<std::unordered_map<int32_t, std::vector<size_t>>> InitIdToPositionPath(
175175
const Schema&);
176+
static Result<int32_t> InitHighestFieldId(const Schema&);
176177

177178
const std::optional<int32_t> schema_id_;
178179
/// Field IDs that uniquely identify rows in the table.
@@ -185,6 +186,8 @@ class ICEBERG_EXPORT Schema : public StructType {
185186
Lazy<InitLowerCaseNameToIdMap> lowercase_name_to_id_;
186187
/// Mapping from field id to (nested) position path to access the field.
187188
Lazy<InitIdToPositionPath> id_to_position_path_;
189+
/// Highest field ID in the schema.
190+
Lazy<InitHighestFieldId> highest_field_id_;
188191
};
189192

190193
} // namespace iceberg

src/iceberg/table_metadata.cc

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ class TableMetadataBuilder::Impl {
429429
Status SetProperties(const std::unordered_map<std::string, std::string>& updated);
430430
Status RemoveProperties(const std::unordered_set<std::string>& removed);
431431
Status SetCurrentSchema(int32_t schema_id);
432-
Status RemoveSchemas(const std::vector<int32_t>& schema_ids);
432+
Status RemoveSchemas(const std::unordered_set<int32_t>& schema_ids);
433433
Result<int32_t> AddSchema(const Schema& schema, int32_t new_last_column_id);
434434

435435
Result<std::unique_ptr<TableMetadata>> Build();
@@ -634,7 +634,7 @@ Status TableMetadataBuilder::Impl::RemoveProperties(
634634
Status TableMetadataBuilder::Impl::SetCurrentSchema(int32_t schema_id) {
635635
if (schema_id == kLastAdded) {
636636
if (!last_added_schema_id_.has_value()) {
637-
return InvalidArgument("Cannot set last added schema: no schema has been added");
637+
return ValidationFailed("Cannot set last added schema: no schema has been added");
638638
}
639639
return SetCurrentSchema(last_added_schema_id_.value());
640640
}
@@ -686,57 +686,55 @@ Status TableMetadataBuilder::Impl::SetCurrentSchema(int32_t schema_id) {
686686
return {};
687687
}
688688

689-
Status TableMetadataBuilder::Impl::RemoveSchemas(const std::vector<int32_t>& schema_ids) {
690-
std::unordered_set<int32_t> schema_ids_to_remove(schema_ids.begin(), schema_ids.end());
689+
Status TableMetadataBuilder::Impl::RemoveSchemas(
690+
const std::unordered_set<int32_t>& schema_ids) {
691691
auto current_schema_id = metadata_.current_schema_id.value_or(Schema::kInitialSchemaId);
692-
if (!schema_ids_to_remove.contains(current_schema_id)) {
693-
return InvalidArgument("Cannot remove current schema: {}", current_schema_id);
694-
}
692+
ICEBERG_PRECHECK(!schema_ids.contains(current_schema_id),
693+
"Cannot remove current schema: {}", current_schema_id);
695694

696-
if (!schema_ids_to_remove.empty()) {
695+
if (!schema_ids.empty()) {
697696
metadata_.schemas = metadata_.schemas | std::views::filter([&](const auto& schema) {
698-
return !schema_ids_to_remove.contains(
697+
return !schema_ids.contains(
699698
schema->schema_id().value_or(Schema::kInitialSchemaId));
700699
}) |
701700
std::ranges::to<std::vector<std::shared_ptr<Schema>>>();
702-
changes_.push_back(
703-
std::make_unique<table::RemoveSchemas>(std::move(schema_ids_to_remove)));
701+
changes_.push_back(std::make_unique<table::RemoveSchemas>(schema_ids));
704702
}
705703

706704
return {};
707705
}
708706

709707
Result<int32_t> TableMetadataBuilder::Impl::AddSchema(const Schema& schema,
710708
int32_t new_last_column_id) {
711-
if (new_last_column_id < metadata_.last_column_id) {
712-
return InvalidArgument("Invalid last column ID: {} < {} (previous last column ID)",
713-
new_last_column_id, metadata_.last_column_id);
714-
}
709+
ICEBERG_PRECHECK(new_last_column_id >= metadata_.last_column_id,
710+
"Invalid last column ID: {} < {} (previous last column ID)",
711+
new_last_column_id, metadata_.last_column_id);
715712

716713
ICEBERG_RETURN_UNEXPECTED(schema.Validate(metadata_.format_version));
717714

718715
auto new_schema_id = ReuseOrCreateNewSchemaId(schema);
719-
if (schemas_by_id_.find(new_schema_id) != schemas_by_id_.end()) {
716+
if (schemas_by_id_.find(new_schema_id) != schemas_by_id_.end() &&
717+
new_last_column_id == metadata_.last_column_id) {
720718
// update last_added_schema_id if the schema was added in this set of changes (since
721719
// it is now the last)
722720
bool is_new_schema =
723721
last_added_schema_id_.has_value() &&
724-
std::ranges::find_if(changes_, [new_schema_id](const auto& change) {
722+
std::ranges::any_of(changes_, [new_schema_id](const auto& change) {
725723
if (change->kind() != TableUpdate::Kind::kAddSchema) {
726724
return false;
727725
}
728726
auto* add_schema = dynamic_cast<table::AddSchema*>(change.get());
729727
return add_schema->schema()->schema_id().value_or(Schema::kInitialSchemaId) ==
730728
new_schema_id;
731-
}) != changes_.cend();
729+
});
732730
last_added_schema_id_ =
733731
is_new_schema ? std::make_optional(new_schema_id) : std::nullopt;
734732
return new_schema_id;
735733
}
736734

737-
auto new_schema = std::make_shared<Schema>(
738-
std::vector<SchemaField>(schema.fields().begin(), schema.fields().end()),
739-
new_schema_id);
735+
auto new_schema =
736+
std::make_shared<Schema>(schema.fields() | std::ranges::to<std::vector>(),
737+
new_schema_id, schema.IdentifierFieldIds());
740738

741739
metadata_.schemas.push_back(new_schema);
742740
schemas_by_id_.emplace(new_schema_id, new_schema);
@@ -934,8 +932,7 @@ TableMetadataBuilder& TableMetadataBuilder::AddSchema(
934932
std::shared_ptr<Schema> const& schema) {
935933
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto highest_field_id, schema->HighestFieldId());
936934
auto new_last_column_id = std::max(impl_->metadata().last_column_id, highest_field_id);
937-
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto schema_id,
938-
impl_->AddSchema(*schema, new_last_column_id));
935+
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->AddSchema(*schema, new_last_column_id));
939936
return *this;
940937
}
941938

@@ -959,7 +956,7 @@ TableMetadataBuilder& TableMetadataBuilder::RemovePartitionSpecs(
959956
}
960957

961958
TableMetadataBuilder& TableMetadataBuilder::RemoveSchemas(
962-
const std::vector<int32_t>& schema_ids) {
959+
const std::unordered_set<int32_t>& schema_ids) {
963960
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->RemoveSchemas(schema_ids));
964961
return *this;
965962
}

src/iceberg/table_metadata.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ class ICEBERG_EXPORT TableMetadataBuilder : public ErrorCollector {
292292
///
293293
/// \param schema_ids The IDs of schemas to remove
294294
/// \return Reference to this builder for method chaining
295-
TableMetadataBuilder& RemoveSchemas(const std::vector<int32_t>& schema_ids);
295+
TableMetadataBuilder& RemoveSchemas(const std::unordered_set<int32_t>& schema_ids);
296296

297297
/// \brief Set the default sort order for the table
298298
///

src/iceberg/table_update.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ void UpgradeFormatVersion::GenerateRequirements(TableUpdateContext& context) con
5252
// AddSchema
5353

5454
void AddSchema::ApplyTo(TableMetadataBuilder& builder) const {
55-
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
55+
builder.AddSchema(schema_);
5656
}
5757

5858
void AddSchema::GenerateRequirements(TableUpdateContext& context) const {
@@ -62,7 +62,7 @@ void AddSchema::GenerateRequirements(TableUpdateContext& context) const {
6262
// SetCurrentSchema
6363

6464
void SetCurrentSchema::ApplyTo(TableMetadataBuilder& builder) const {
65-
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
65+
builder.SetCurrentSchema(schema_id_);
6666
}
6767

6868
void SetCurrentSchema::GenerateRequirements(TableUpdateContext& context) const {
@@ -103,7 +103,7 @@ void RemovePartitionSpecs::GenerateRequirements(TableUpdateContext& context) con
103103
// RemoveSchemas
104104

105105
void RemoveSchemas::ApplyTo(TableMetadataBuilder& builder) const {
106-
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
106+
builder.RemoveSchemas(schema_ids_);
107107
}
108108

109109
void RemoveSchemas::GenerateRequirements(TableUpdateContext& context) const {

0 commit comments

Comments
 (0)