@@ -219,6 +219,11 @@ Status ToAvroNodeVisitor::Visit(const BinaryType& type, ::avro::NodePtr* node) {
219219 return {};
220220}
221221
222+ Status ToAvroNodeVisitor::Visit (const UnknownType&, ::avro::NodePtr* node) {
223+ *node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_NULL );
224+ return {};
225+ }
226+
222227Status ToAvroNodeVisitor::Visit (const StructType& type, ::avro::NodePtr* node) {
223228 *node = std::make_shared<::avro::NodeRecord>();
224229
@@ -320,7 +325,7 @@ Status ToAvroNodeVisitor::Visit(const SchemaField& field, ::avro::NodePtr* node)
320325 field_ids_.push (field.field_id ());
321326 ICEBERG_RETURN_UNEXPECTED (VisitTypeInline (*field.type (), /* visitor=*/ this , node));
322327
323- if (field.optional ()) {
328+ if (field.optional () && (*node)-> type () != ::avro:: AVRO_NULL ) {
324329 ::avro::MultiLeaves union_types;
325330 union_types.add (std::make_shared<::avro::NodePrimitive>(::avro::AVRO_NULL ));
326331 union_types.add (std::move (*node));
@@ -365,8 +370,8 @@ Status HasIdVisitor::Visit(const ::avro::NodePtr& node) {
365370 case ::avro::AVRO_STRING :
366371 case ::avro::AVRO_BYTES :
367372 case ::avro::AVRO_FIXED :
368- return {};
369373 case ::avro::AVRO_NULL :
374+ return {};
370375 case ::avro::AVRO_ENUM :
371376 default :
372377 return InvalidSchema (" Unsupported Avro type: {}" , static_cast <int >(node->type ()));
@@ -494,6 +499,10 @@ Result<int32_t> GetFieldId(const ::avro::NodePtr& node, size_t field_idx) {
494499
495500Status ValidateAvroSchemaEvolution (const Type& expected_type,
496501 const ::avro::NodePtr& avro_node) {
502+ if (avro_node->type () == ::avro::AVRO_NULL ) {
503+ return {};
504+ }
505+
497506 switch (expected_type.type_id ()) {
498507 case TypeId::kBoolean :
499508 if (avro_node->type () == ::avro::AVRO_BOOL ) {
@@ -583,6 +592,8 @@ Status ValidateAvroSchemaEvolution(const Type& expected_type,
583592 return {};
584593 }
585594 break ;
595+ case TypeId::kUnknown :
596+ return {};
586597 default :
587598 break ;
588599 }
@@ -618,6 +629,35 @@ Result<FieldProjection> ProjectNested(const Type& expected_type,
618629 const ::avro::NodePtr& avro_node,
619630 bool prune_source);
620631
632+ Result<FieldProjection> ProjectField (const SchemaField& expected_field,
633+ const ::avro::NodePtr& avro_node,
634+ size_t source_index, bool prune_source) {
635+ const Type& expected_type = *expected_field.type ();
636+ ::avro::NodePtr field_node;
637+ ICEBERG_RETURN_UNEXPECTED (UnwrapUnion (avro_node, &field_node));
638+
639+ FieldProjection projection;
640+ if (expected_type.type_id () == TypeId::kUnknown ||
641+ field_node->type () == ::avro::AVRO_NULL ) {
642+ if (!expected_field.optional ()) {
643+ return InvalidSchema (" Cannot project required field with ID: {} as null" ,
644+ expected_field.field_id ());
645+ }
646+ projection.kind = FieldProjection::Kind::kNull ;
647+ return projection;
648+ }
649+
650+ if (expected_type.is_nested ()) {
651+ ICEBERG_ASSIGN_OR_RAISE (projection,
652+ ProjectNested (expected_type, field_node, prune_source));
653+ } else {
654+ ICEBERG_RETURN_UNEXPECTED (ValidateAvroSchemaEvolution (expected_type, field_node));
655+ }
656+ projection.from = source_index;
657+ projection.kind = FieldProjection::Kind::kProjected ;
658+ return projection;
659+ }
660+
621661Result<FieldProjection> ProjectStruct (const StructType& struct_type,
622662 const ::avro::NodePtr& avro_node,
623663 bool prune_source) {
@@ -653,18 +693,9 @@ Result<FieldProjection> ProjectStruct(const StructType& struct_type,
653693 FieldProjection child_projection;
654694
655695 if (auto iter = node_info_map.find (field_id); iter != node_info_map.cend ()) {
656- ::avro::NodePtr field_node;
657- ICEBERG_RETURN_UNEXPECTED (UnwrapUnion (iter->second .field_node , &field_node));
658- if (expected_field.type ()->is_nested ()) {
659- ICEBERG_ASSIGN_OR_RAISE (
660- child_projection,
661- ProjectNested (*expected_field.type (), field_node, prune_source));
662- } else {
663- ICEBERG_RETURN_UNEXPECTED (
664- ValidateAvroSchemaEvolution (*expected_field.type (), field_node));
665- }
666- child_projection.from = iter->second .local_index ;
667- child_projection.kind = FieldProjection::Kind::kProjected ;
696+ ICEBERG_ASSIGN_OR_RAISE (child_projection,
697+ ProjectField (expected_field, iter->second .field_node ,
698+ iter->second .local_index , prune_source));
668699 } else if (MetadataColumns::IsMetadataColumn (field_id)) {
669700 child_projection.kind = FieldProjection::Kind::kMetadata ;
670701 } else if (expected_field.optional ()) {
@@ -701,20 +732,9 @@ Result<FieldProjection> ProjectList(const ListType& list_type,
701732 }
702733
703734 FieldProjection element_projection;
704- ::avro::NodePtr element_node;
705- ICEBERG_RETURN_UNEXPECTED (UnwrapUnion (avro_node->leafAt (0 ), &element_node));
706- if (expected_element_field.type ()->is_nested ()) {
707- ICEBERG_ASSIGN_OR_RAISE (
708- element_projection,
709- ProjectNested (*expected_element_field.type (), element_node, prune_source));
710- } else {
711- ICEBERG_RETURN_UNEXPECTED (
712- ValidateAvroSchemaEvolution (*expected_element_field.type (), element_node));
713- }
714-
715- // Set the element projection metadata but preserve its children
716- element_projection.kind = FieldProjection::Kind::kProjected ;
717- element_projection.from = size_t {0 };
735+ ICEBERG_ASSIGN_OR_RAISE (element_projection,
736+ ProjectField (expected_element_field, avro_node->leafAt (0 ),
737+ size_t {0 }, prune_source));
718738
719739 FieldProjection result;
720740 result.children .emplace_back (std::move (element_projection));
@@ -770,18 +790,10 @@ Result<FieldProjection> ProjectMap(const MapType& map_type,
770790
771791 for (size_t i = 0 ; i < map_node->leaves (); ++i) {
772792 FieldProjection sub_projection;
773- ::avro::NodePtr sub_node;
774- ICEBERG_RETURN_UNEXPECTED (UnwrapUnion (map_node->leafAt (i), &sub_node));
775793 const auto & expected_sub_field = map_type.fields ()[i];
776- if (expected_sub_field.type ()->is_nested ()) {
777- ICEBERG_ASSIGN_OR_RAISE (sub_projection, ProjectNested (*expected_sub_field.type (),
778- sub_node, prune_source));
779- } else {
780- ICEBERG_RETURN_UNEXPECTED (
781- ValidateAvroSchemaEvolution (*expected_sub_field.type (), sub_node));
782- }
783- sub_projection.kind = FieldProjection::Kind::kProjected ;
784- sub_projection.from = i;
794+ ICEBERG_ASSIGN_OR_RAISE (
795+ sub_projection,
796+ ProjectField (expected_sub_field, map_node->leafAt (i), i, prune_source));
785797 result.children .emplace_back (std::move (sub_projection));
786798 }
787799
@@ -1017,9 +1029,9 @@ Result<::avro::NodePtr> MakeAvroNodeWithFieldIds(const ::avro::NodePtr& original
10171029 case ::avro::AVRO_STRING :
10181030 case ::avro::AVRO_BYTES :
10191031 case ::avro::AVRO_FIXED :
1032+ case ::avro::AVRO_NULL :
10201033 // For primitive types, just return a copy
10211034 return original_node;
1022- case ::avro::AVRO_NULL :
10231035 case ::avro::AVRO_ENUM :
10241036 default :
10251037 return InvalidSchema (" Unsupported Avro type for field ID application: {}" ,
0 commit comments