@@ -57,14 +57,15 @@ class ApplyChangesVisitor {
5757 // / \brief Apply changes to a type using schema visitor pattern
5858 Result<std::shared_ptr<Type>> ApplyChanges (const std::shared_ptr<Type>& type,
5959 int32_t parent_id) {
60- return VisitSchemaInline (*type, this , type, parent_id);
60+ return VisitTypeCategory (*type, this , type, parent_id);
6161 }
6262
6363 // / \brief Apply changes to a struct type
6464 Result<std::shared_ptr<Type>> VisitStruct (const StructType& struct_type,
6565 const std::shared_ptr<Type>& base_type,
6666 int32_t parent_id) {
6767 std::vector<SchemaField> new_fields;
68+ bool has_changes = false ;
6869
6970 // Process existing fields
7071 for (const auto & field : struct_type.fields ()) {
@@ -77,13 +78,23 @@ class ApplyChangesVisitor {
7778 ProcessField (field, field_type_result));
7879
7980 if (processed_field.has_value ()) {
80- new_fields.push_back (std::move (processed_field.value ()));
81+ const auto & new_field = processed_field.value ();
82+ new_fields.push_back (new_field);
83+
84+ // Check if this field changed
85+ if (new_field != field) {
86+ has_changes = true ;
87+ }
88+ } else {
89+ // Field was deleted
90+ has_changes = true ;
8191 }
8292 }
8393
8494 // Add new fields for this struct
8595 auto adds_it = parent_to_added_ids_.find (parent_id);
86- if (adds_it != parent_to_added_ids_.end ()) {
96+ if (adds_it != parent_to_added_ids_.end () && !adds_it->second .empty ()) {
97+ has_changes = true ;
8798 for (int32_t added_id : adds_it->second ) {
8899 auto added_field_it = updates_.find (added_id);
89100 if (added_field_it != updates_.end ()) {
@@ -92,6 +103,11 @@ class ApplyChangesVisitor {
92103 }
93104 }
94105
106+ // Return original type if nothing changed
107+ if (!has_changes) {
108+ return base_type;
109+ }
110+
95111 return std::make_shared<StructType>(std::move (new_fields));
96112 }
97113
@@ -253,7 +269,7 @@ UpdateSchema::UpdateSchema(std::shared_ptr<Transaction> transaction)
253269 AddError (identifier_names_result.error ());
254270 return ;
255271 }
256- identifier_field_names_ = identifier_names_result.value ();
272+ identifier_field_names_ = std::move ( identifier_names_result.value () );
257273
258274 // Initialize id_to_parent map from the schema
259275 id_to_parent_ = IndexParents (*schema_);
@@ -349,11 +365,8 @@ UpdateSchema& UpdateSchema::DeleteColumn(std::string_view name) {
349365 const auto & field = field_opt->get ();
350366 int32_t field_id = field.field_id ();
351367
352- // Check the field doesn't have additions
353368 ICEBERG_BUILDER_CHECK (!parent_to_added_ids_.contains (field_id),
354369 " Cannot delete a column that has additions: {}" , name);
355-
356- // Check the field doesn't have updates
357370 ICEBERG_BUILDER_CHECK (!updates_.contains (field_id),
358371 " Cannot delete a column that has updates: {}" , name);
359372
@@ -405,7 +418,6 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
405418 const auto & field = field_opt->get ();
406419 auto field_id = field.field_id ();
407420
408- // Check the field itself is not deleted
409421 ICEBERG_CHECK (!deletes_.contains (field_id),
410422 " Cannot delete identifier field {}. To force deletion, also call "
411423 " SetIdentifierFields to update identifier fields." ,
@@ -415,35 +427,24 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
415427 auto parent_it = id_to_parent_.find (field_id);
416428 while (parent_it != id_to_parent_.end ()) {
417429 int32_t parent_id = parent_it->second ;
418- ICEBERG_ASSIGN_OR_RAISE (auto parent_field_opt, schema_->FindFieldById (parent_id));
419430 ICEBERG_CHECK (
420431 !deletes_.contains (parent_id),
421- " Cannot delete field {} as it will delete nested identifier field {}" ,
422- parent_field_opt.has_value () ? std::string (parent_field_opt->get ().name ())
423- : std::to_string (parent_id),
424- name);
432+ " Cannot delete field with id {} as it will delete nested identifier field {}" ,
433+ parent_id, name);
425434 parent_it = id_to_parent_.find (parent_id);
426435 }
427436 }
428437 }
429438
430- // Apply schema changes using visitor pattern
431- // Create a temporary struct type from the schema to use with the visitor
432- auto schema_struct_type = std::make_shared<StructType>(
433- schema_->fields () | std::ranges::to<std::vector<SchemaField>>());
434-
435439 // Apply changes recursively using the visitor
436440 ApplyChangesVisitor visitor (deletes_, updates_, parent_to_added_ids_);
437- ICEBERG_ASSIGN_OR_RAISE (auto new_type,
438- visitor.ApplyChanges (schema_struct_type, kTableRootId ));
441+ ICEBERG_ASSIGN_OR_RAISE (auto new_type, visitor.ApplyChanges (schema_, kTableRootId ));
439442
440443 // Cast result back to StructType and extract fields
441444 auto new_struct_type = internal::checked_pointer_cast<StructType>(new_type);
442- std::vector<SchemaField> new_fields (new_struct_type->fields () |
443- std::ranges::to<std::vector<SchemaField>>());
444445
445446 // Convert identifier field names to IDs
446- auto temp_schema = std::make_shared<Schema>(new_fields );
447+ auto temp_schema = new_struct_type-> ToSchema ( );
447448 std::vector<int32_t > fresh_identifier_ids;
448449 for (const auto & name : identifier_field_names_) {
449450 ICEBERG_ASSIGN_OR_RAISE (auto field_opt,
@@ -456,6 +457,7 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
456457 }
457458
458459 // Create the new schema
460+ auto new_fields = temp_schema->fields () | std::ranges::to<std::vector<SchemaField>>();
459461 ICEBERG_ASSIGN_OR_RAISE (
460462 auto new_schema,
461463 Schema::Make (std::move (new_fields), schema_->schema_id (), fresh_identifier_ids));
@@ -464,6 +466,7 @@ Result<UpdateSchema::ApplyResult> UpdateSchema::Apply() {
464466 .new_last_column_id = last_column_id_};
465467}
466468
469+ // TODO(Guotao Yu): v3 default value is not yet supported
467470UpdateSchema& UpdateSchema::AddColumnInternal (std::optional<std::string_view> parent,
468471 std::string_view name, bool is_optional,
469472 std::shared_ptr<Type> type,
@@ -472,7 +475,8 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
472475 std::string full_name;
473476
474477 // Handle parent field
475- if (parent.has_value () && !parent->empty ()) {
478+ if (parent.has_value ()) {
479+ ICEBERG_BUILDER_CHECK (!parent->empty (), " Parent name cannot be empty" );
476480 // Find parent field
477481 ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto parent_field_opt, FindField (*parent));
478482 ICEBERG_BUILDER_CHECK (parent_field_opt.has_value (), " Cannot find parent struct: {}" ,
@@ -483,22 +487,19 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
483487
484488 // Get the actual field to add to (handle map/list)
485489 const SchemaField* target_field = &parent_field;
486- if (parent_type->is_nested ()) {
487- const auto & nested = internal::checked_cast<const NestedType&>(*parent_type);
488- if (nested.type_id () == TypeId::kMap ) {
489- // For maps, add to value field
490- const auto & map_type = internal::checked_cast<const MapType&>(nested);
491- target_field = &map_type.value ();
492- } else if (nested.type_id () == TypeId::kList ) {
493- // For lists, add to element field
494- const auto & list_type = internal::checked_cast<const ListType&>(nested);
495- target_field = &list_type.element ();
496- }
490+
491+ if (parent_type->type_id () == TypeId::kMap ) {
492+ // For maps, add to value field
493+ const auto & map_type = internal::checked_cast<const MapType&>(*parent_type);
494+ target_field = &map_type.value ();
495+ } else if (parent_type->type_id () == TypeId::kList ) {
496+ // For lists, add to element field
497+ const auto & list_type = internal::checked_cast<const ListType&>(*parent_type);
498+ target_field = &list_type.element ();
497499 }
498500
499501 // Validate target is a struct
500- ICEBERG_BUILDER_CHECK (target_field->type ()->is_nested () &&
501- target_field->type ()->type_id () == TypeId::kStruct ,
502+ ICEBERG_BUILDER_CHECK (target_field->type ()->type_id () == TypeId::kStruct ,
502503 " Cannot add to non-struct column: {}: {}" , *parent,
503504 target_field->type ()->ToString ());
504505
@@ -510,28 +511,25 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
510511
511512 // Check field doesn't already exist (unless it's being deleted)
512513 std::string nested_name = std::format (" {}.{}" , *parent, name);
513- ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto current_field_opt, FindField (nested_name));
514- if (current_field_opt.has_value ()) {
515- const auto & current_field = current_field_opt->get ();
516- ICEBERG_BUILDER_CHECK (deletes_.contains (current_field.field_id ()),
517- " Cannot add column, name already exists: {}.{}" , *parent,
518- name);
519- }
514+ ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto current_field, FindField (nested_name));
515+ ICEBERG_BUILDER_CHECK (
516+ !current_field.has_value () || deletes_.contains (current_field->get ().field_id ()),
517+ " Cannot add column, name already exists: {}.{}" , *parent, name);
520518
521519 // Build full name using canonical name of parent
522520 ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto parent_name_opt,
523521 schema_->FindColumnNameById (parent_id));
524522 ICEBERG_BUILDER_CHECK (parent_name_opt.has_value (),
525523 " Cannot find column name for parent id: {}" , parent_id);
524+
526525 full_name = std::format (" {}.{}" , *parent_name_opt, name);
527526 } else {
528527 // Top-level field
529- ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto current_field_opt, FindField (name));
530- if (current_field_opt.has_value ()) {
531- const auto & current_field = current_field_opt->get ();
532- ICEBERG_BUILDER_CHECK (deletes_.contains (current_field.field_id ()),
533- " Cannot add column, name already exists: {}" , name);
534- }
528+ ICEBERG_BUILDER_ASSIGN_OR_RETURN (auto current_field, FindField (name));
529+ ICEBERG_BUILDER_CHECK (
530+ !current_field.has_value () || deletes_.contains (current_field->get ().field_id ()),
531+ " Cannot add column, name already exists: {}" , name);
532+
535533 full_name = std::string (name);
536534 }
537535
@@ -567,11 +565,7 @@ UpdateSchema& UpdateSchema::AddColumnInternal(std::optional<std::string_view> pa
567565 return *this ;
568566}
569567
570- int32_t UpdateSchema::AssignNewColumnId () {
571- int32_t next = last_column_id_ + 1 ;
572- last_column_id_ = next;
573- return next;
574- }
568+ int32_t UpdateSchema::AssignNewColumnId () { return ++last_column_id_; }
575569
576570Result<std::optional<std::reference_wrapper<const SchemaField>>> UpdateSchema::FindField (
577571 std::string_view name) const {
0 commit comments