Skip to content

Commit d026845

Browse files
committed
resolve some comments
1 parent e5b1be3 commit d026845

3 files changed

Lines changed: 62 additions & 66 deletions

File tree

src/iceberg/update/update_schema.cc

Lines changed: 49 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,15 @@ class ApplyChangesVisitor {
5757
/// \brief Apply changes to a type using schema visitor pattern
5858
Result<std::shared_ptr<Type>> ApplyChanges(const std::shared_ptr<Type>& type,
5959
int32_t parent_id) {
60-
return VisitSchemaInline(*type, this, type, parent_id);
60+
return VisitTypeCategory(*type, this, type, parent_id);
6161
}
6262

6363
/// \brief Apply changes to a struct type
6464
Result<std::shared_ptr<Type>> VisitStruct(const StructType& struct_type,
6565
const std::shared_ptr<Type>& base_type,
6666
int32_t parent_id) {
6767
std::vector<SchemaField> new_fields;
68+
bool has_changes = false;
6869

6970
// Process existing fields
7071
for (const auto& field : struct_type.fields()) {
@@ -77,13 +78,23 @@ class ApplyChangesVisitor {
7778
ProcessField(field, field_type_result));
7879

7980
if (processed_field.has_value()) {
80-
new_fields.push_back(std::move(processed_field.value()));
81+
const auto& new_field = processed_field.value();
82+
new_fields.push_back(new_field);
83+
84+
// Check if this field changed
85+
if (new_field != field) {
86+
has_changes = true;
87+
}
88+
} else {
89+
// Field was deleted
90+
has_changes = true;
8191
}
8292
}
8393

8494
// Add new fields for this struct
8595
auto adds_it = parent_to_added_ids_.find(parent_id);
86-
if (adds_it != parent_to_added_ids_.end()) {
96+
if (adds_it != parent_to_added_ids_.end() && !adds_it->second.empty()) {
97+
has_changes = true;
8798
for (int32_t added_id : adds_it->second) {
8899
auto added_field_it = updates_.find(added_id);
89100
if (added_field_it != updates_.end()) {
@@ -92,6 +103,11 @@ class ApplyChangesVisitor {
92103
}
93104
}
94105

106+
// Return original type if nothing changed
107+
if (!has_changes) {
108+
return base_type;
109+
}
110+
95111
return std::make_shared<StructType>(std::move(new_fields));
96112
}
97113

@@ -253,7 +269,7 @@ UpdateSchema::UpdateSchema(std::shared_ptr<Transaction> transaction)
253269
AddError(identifier_names_result.error());
254270
return;
255271
}
256-
identifier_field_names_ = identifier_names_result.value();
272+
identifier_field_names_ = std::move(identifier_names_result.value());
257273

258274
// Initialize id_to_parent map from the schema
259275
id_to_parent_ = IndexParents(*schema_);
@@ -349,11 +365,8 @@ UpdateSchema& UpdateSchema::DeleteColumn(std::string_view name) {
349365
const auto& field = field_opt->get();
350366
int32_t field_id = field.field_id();
351367

352-
// Check the field doesn't have additions
353368
ICEBERG_BUILDER_CHECK(!parent_to_added_ids_.contains(field_id),
354369
"Cannot delete a column that has additions: {}", name);
355-
356-
// Check the field doesn't have updates
357370
ICEBERG_BUILDER_CHECK(!updates_.contains(field_id),
358371
"Cannot delete a column that has updates: {}", name);
359372

@@ -405,7 +418,6 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
405418
const auto& field = field_opt->get();
406419
auto field_id = field.field_id();
407420

408-
// Check the field itself is not deleted
409421
ICEBERG_CHECK(!deletes_.contains(field_id),
410422
"Cannot delete identifier field {}. To force deletion, also call "
411423
"SetIdentifierFields to update identifier fields.",
@@ -415,35 +427,24 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
415427
auto parent_it = id_to_parent_.find(field_id);
416428
while (parent_it != id_to_parent_.end()) {
417429
int32_t parent_id = parent_it->second;
418-
ICEBERG_ASSIGN_OR_RAISE(auto parent_field_opt, schema_->FindFieldById(parent_id));
419430
ICEBERG_CHECK(
420431
!deletes_.contains(parent_id),
421-
"Cannot delete field {} as it will delete nested identifier field {}",
422-
parent_field_opt.has_value() ? std::string(parent_field_opt->get().name())
423-
: std::to_string(parent_id),
424-
name);
432+
"Cannot delete field with id {} as it will delete nested identifier field {}",
433+
parent_id, name);
425434
parent_it = id_to_parent_.find(parent_id);
426435
}
427436
}
428437
}
429438

430-
// Apply schema changes using visitor pattern
431-
// Create a temporary struct type from the schema to use with the visitor
432-
auto schema_struct_type = std::make_shared<StructType>(
433-
schema_->fields() | std::ranges::to<std::vector<SchemaField>>());
434-
435439
// Apply changes recursively using the visitor
436440
ApplyChangesVisitor visitor(deletes_, updates_, parent_to_added_ids_);
437-
ICEBERG_ASSIGN_OR_RAISE(auto new_type,
438-
visitor.ApplyChanges(schema_struct_type, kTableRootId));
441+
ICEBERG_ASSIGN_OR_RAISE(auto new_type, visitor.ApplyChanges(schema_, kTableRootId));
439442

440443
// Cast result back to StructType and extract fields
441444
auto new_struct_type = internal::checked_pointer_cast<StructType>(new_type);
442-
std::vector<SchemaField> new_fields(new_struct_type->fields() |
443-
std::ranges::to<std::vector<SchemaField>>());
444445

445446
// Convert identifier field names to IDs
446-
auto temp_schema = std::make_shared<Schema>(new_fields);
447+
auto temp_schema = new_struct_type->ToSchema();
447448
std::vector<int32_t> fresh_identifier_ids;
448449
for (const auto& name : identifier_field_names_) {
449450
ICEBERG_ASSIGN_OR_RAISE(auto field_opt,
@@ -456,6 +457,7 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
456457
}
457458

458459
// Create the new schema
460+
auto new_fields = temp_schema->fields() | std::ranges::to<std::vector<SchemaField>>();
459461
ICEBERG_ASSIGN_OR_RAISE(
460462
auto new_schema,
461463
Schema::Make(std::move(new_fields), schema_->schema_id(), fresh_identifier_ids));
@@ -464,6 +466,7 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
464466
.new_last_column_id = last_column_id_};
465467
}
466468

469+
// TODO(Guotao Yu): v3 default value is not yet supported
467470
UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> parent,
468471
std::string_view name, bool is_optional,
469472
std::shared_ptr<Type> type,
@@ -472,7 +475,8 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
472475
std::string full_name;
473476

474477
// Handle parent field
475-
if (parent.has_value() && !parent->empty()) {
478+
if (parent.has_value()) {
479+
ICEBERG_BUILDER_CHECK(!parent->empty(), "Parent name cannot be empty");
476480
// Find parent field
477481
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto parent_field_opt, FindField(*parent));
478482
ICEBERG_BUILDER_CHECK(parent_field_opt.has_value(), "Cannot find parent struct: {}",
@@ -483,22 +487,19 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
483487

484488
// Get the actual field to add to (handle map/list)
485489
const SchemaField* target_field = &parent_field;
486-
if (parent_type->is_nested()) {
487-
const auto& nested = internal::checked_cast<const NestedType&>(*parent_type);
488-
if (nested.type_id() == TypeId::kMap) {
489-
// For maps, add to value field
490-
const auto& map_type = internal::checked_cast<const MapType&>(nested);
491-
target_field = &map_type.value();
492-
} else if (nested.type_id() == TypeId::kList) {
493-
// For lists, add to element field
494-
const auto& list_type = internal::checked_cast<const ListType&>(nested);
495-
target_field = &list_type.element();
496-
}
490+
491+
if (parent_type->type_id() == TypeId::kMap) {
492+
// For maps, add to value field
493+
const auto& map_type = internal::checked_cast<const MapType&>(*parent_type);
494+
target_field = &map_type.value();
495+
} else if (parent_type->type_id() == TypeId::kList) {
496+
// For lists, add to element field
497+
const auto& list_type = internal::checked_cast<const ListType&>(*parent_type);
498+
target_field = &list_type.element();
497499
}
498500

499501
// Validate target is a struct
500-
ICEBERG_BUILDER_CHECK(target_field->type()->is_nested() &&
501-
target_field->type()->type_id() == TypeId::kStruct,
502+
ICEBERG_BUILDER_CHECK(target_field->type()->type_id() == TypeId::kStruct,
502503
"Cannot add to non-struct column: {}: {}", *parent,
503504
target_field->type()->ToString());
504505

@@ -510,28 +511,25 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
510511

511512
// Check field doesn't already exist (unless it's being deleted)
512513
std::string nested_name = std::format("{}.{}", *parent, name);
513-
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto current_field_opt, FindField(nested_name));
514-
if (current_field_opt.has_value()) {
515-
const auto& current_field = current_field_opt->get();
516-
ICEBERG_BUILDER_CHECK(deletes_.contains(current_field.field_id()),
517-
"Cannot add column, name already exists: {}.{}", *parent,
518-
name);
519-
}
514+
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto current_field, FindField(nested_name));
515+
ICEBERG_BUILDER_CHECK(
516+
!current_field.has_value() || deletes_.contains(current_field->get().field_id()),
517+
"Cannot add column, name already exists: {}.{}", *parent, name);
520518

521519
// Build full name using canonical name of parent
522520
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto parent_name_opt,
523521
schema_->FindColumnNameById(parent_id));
524522
ICEBERG_BUILDER_CHECK(parent_name_opt.has_value(),
525523
"Cannot find column name for parent id: {}", parent_id);
524+
526525
full_name = std::format("{}.{}", *parent_name_opt, name);
527526
} else {
528527
// Top-level field
529-
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto current_field_opt, FindField(name));
530-
if (current_field_opt.has_value()) {
531-
const auto& current_field = current_field_opt->get();
532-
ICEBERG_BUILDER_CHECK(deletes_.contains(current_field.field_id()),
533-
"Cannot add column, name already exists: {}", name);
534-
}
528+
ICEBERG_BUILDER_ASSIGN_OR_RETURN(auto current_field, FindField(name));
529+
ICEBERG_BUILDER_CHECK(
530+
!current_field.has_value() || deletes_.contains(current_field->get().field_id()),
531+
"Cannot add column, name already exists: {}", name);
532+
535533
full_name = std::string(name);
536534
}
537535

@@ -567,11 +565,7 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
567565
return *this;
568566
}
569567

570-
int32_t UpdateSchema::AssignNewColumnId() {
571-
int32_t next = last_column_id_ + 1;
572-
last_column_id_ = next;
573-
return next;
574-
}
568+
int32_t UpdateSchema::AssignNewColumnId() { return ++last_column_id_; }
575569

576570
Result<std::optional<std::reference_wrapper<const SchemaField>>> UpdateSchema::FindField(
577571
std::string_view name) const {

src/iceberg/util/visit_type.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ inline Status VisitTypeIdInline(TypeId id, VISITOR* visitor, ARGS&&... args) {
124124

125125
#undef TYPE_ID_VISIT_INLINE
126126

127-
/// \brief Visit a type using a schema visitor pattern
127+
/// \brief Visit a type using a categorical visitor pattern
128128
///
129129
/// This function provides a simplified visitor interface that groups Iceberg types into
130130
/// four categories based on their structural properties:
@@ -149,13 +149,15 @@ inline Status VisitTypeIdInline(TypeId id, VISITOR* visitor, ARGS&&... args) {
149149
/// \param args Additional arguments forwarded to the Visit methods
150150
/// \return The return value from the invoked Visit method
151151
template <typename VISITOR, typename... ARGS>
152-
inline auto VisitSchemaInline(const Type& type, VISITOR* visitor, ARGS&&... args) {
152+
inline auto VisitTypeCategory(const Type& type, VISITOR* visitor, ARGS&&... args) {
153153
#define SCHEMA_VISIT_ACTION(TYPE_CLASS) \
154154
return visitor->Visit##TYPE_CLASS( \
155155
internal::checked_cast<const TYPE_CLASS##Type&>(type), \
156156
std::forward<ARGS>(args)...);
157157

158-
switch (type.type_id()) { ICEBERG_GENERATE_SCHEMA_VISITOR_CASES(SCHEMA_VISIT_ACTION) }
158+
switch (type.type_id()) {
159+
ICEBERG_TYPE_SWITCH_WITH_PRIMITIVE_DEFAULT(SCHEMA_VISIT_ACTION)
160+
}
159161

160162
#undef SCHEMA_VISIT_ACTION
161163
}

src/iceberg/util/visitor_generate.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ namespace iceberg {
4747
/// - List types -> calls ACTION with List
4848
/// - Map types -> calls ACTION with Map
4949
/// - All primitive types (default) -> calls ACTION with Primitive
50-
#define ICEBERG_GENERATE_SCHEMA_VISITOR_CASES(ACTION) \
51-
case ::iceberg::TypeId::kStruct: \
52-
ACTION(Struct) \
53-
case ::iceberg::TypeId::kList: \
54-
ACTION(List) \
55-
case ::iceberg::TypeId::kMap: \
56-
ACTION(Map) \
57-
default: \
50+
#define ICEBERG_TYPE_SWITCH_WITH_PRIMITIVE_DEFAULT(ACTION) \
51+
case ::iceberg::TypeId::kStruct: \
52+
ACTION(Struct) \
53+
case ::iceberg::TypeId::kList: \
54+
ACTION(List) \
55+
case ::iceberg::TypeId::kMap: \
56+
ACTION(Map) \
57+
default: \
5858
ACTION(Primitive)
5959

6060
} // namespace iceberg

0 commit comments

Comments
 (0)