Skip to content

Commit f10bec9

Browse files
update code
1 parent 94cee21 commit f10bec9

2 files changed

Lines changed: 54 additions & 165 deletions

File tree

src/iceberg/avro/avro_schema_util.cc

Lines changed: 54 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -803,40 +803,36 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
803803
}
804804
}
805805

806-
if (nested_field) {
807-
// Check if field_id is present
808-
if (!nested_field->field_id.has_value()) {
809-
return InvalidSchema("Field ID is missing for field '{}' in nested mapping",
810-
field_name);
811-
}
806+
if (!nested_field) {
807+
return InvalidSchema("Field '{}' not found in nested mapping", field_name);
808+
}
812809

813-
// Preserve existing custom attributes for this field
814-
::avro::CustomAttributes attributes;
815-
if (i < original_node->customAttributes()) {
816-
// Copy all existing attributes from the original node
817-
const auto& original_attrs = original_node->customAttributesAt(i);
818-
const auto& existing_attrs = original_attrs.attributes();
819-
for (const auto& attr_pair : existing_attrs) {
820-
// Copy each existing attribute to preserve original metadata
821-
attributes.addAttribute(attr_pair.first, attr_pair.second, false);
822-
}
810+
if (!nested_field->field_id.has_value()) {
811+
return InvalidSchema("Field ID is missing for field '{}' in nested mapping",
812+
field_name);
813+
}
814+
815+
// Preserve existing custom attributes for this field
816+
::avro::CustomAttributes attributes;
817+
if (i < original_node->customAttributes()) {
818+
// Copy all existing attributes from the original node
819+
for (const auto& attr_pair : original_node->customAttributesAt(i).attributes()) {
820+
// Copy each existing attribute to preserve original metadata
821+
attributes.addAttribute(attr_pair.first, attr_pair.second, /*addQuote=*/false);
823822
}
823+
}
824824

825-
// Add field ID attribute to the new node (preserving existing attributes)
826-
attributes.addAttribute(std::string(kFieldIdProp),
827-
std::to_string(nested_field->field_id.value()), false);
825+
// Add field ID attribute to the new node (preserving existing attributes)
826+
attributes.addAttribute(std::string(kFieldIdProp),
827+
std::to_string(nested_field->field_id.value()), /*addQuote=*/false);
828828

829-
new_record_node->addCustomAttributesForField(attributes);
829+
new_record_node->addCustomAttributesForField(attributes);
830830

831-
// Recursively apply field IDs to nested fields
832-
ICEBERG_ASSIGN_OR_RAISE(auto new_nested_node,
833-
MakeAvroNodeWithFieldIds(field_node, *nested_field));
834-
new_record_node->addName(field_name);
835-
new_record_node->addLeaf(new_nested_node);
836-
} else {
837-
// If no nested field found, this is an error
838-
return InvalidSchema("Field '{}' not found in nested mapping", field_name);
839-
}
831+
// Recursively apply field IDs to nested fields
832+
ICEBERG_ASSIGN_OR_RAISE(auto new_nested_node,
833+
MakeAvroNodeWithFieldIds(field_node, *nested_field));
834+
new_record_node->addName(field_name);
835+
new_record_node->addLeaf(new_nested_node);
840836
}
841837

842838
return new_record_node;
@@ -858,39 +854,28 @@ Result<::avro::NodePtr> CreateArrayNodeWithFieldIds(const ::avro::NodePtr& origi
858854
return new_array_node;
859855
}
860856

861-
// For regular arrays, try to find element field ID from nested mapping
862-
const MappedField* element_field = nullptr;
863-
if (field.nested_mapping) {
864-
auto fields_span = field.nested_mapping->fields();
865-
for (const auto& f : fields_span) {
866-
if (f.names.find(std::string(kElement)) != f.names.end()) {
867-
element_field = &f;
868-
break;
869-
}
870-
}
857+
// For regular arrays, use the first field from nested mapping as element field
858+
if (!field.nested_mapping || field.nested_mapping->fields().empty()) {
859+
return InvalidSchema("Array type requires nested mapping with element field");
871860
}
872861

873-
if (element_field) {
874-
// Check if field_id is present
875-
if (!element_field->field_id.has_value()) {
876-
return InvalidSchema("Field ID is missing for element field in array");
877-
}
878-
879-
ICEBERG_ASSIGN_OR_RAISE(
880-
auto new_element_node,
881-
MakeAvroNodeWithFieldIds(original_node->leafAt(0), *element_field));
882-
new_array_node->addLeaf(new_element_node);
862+
const auto& element_field = field.nested_mapping->fields()[0];
883863

884-
// Add element field ID as custom attribute
885-
::avro::CustomAttributes element_attributes;
886-
element_attributes.addAttribute(std::string(kFieldIdProp),
887-
std::to_string(*element_field->field_id), false);
888-
new_array_node->addCustomAttributesForField(element_attributes);
889-
} else {
890-
// If no element field found, this is an error
891-
return InvalidSchema("Element field not found in nested mapping for array");
864+
if (!element_field.field_id.has_value()) {
865+
return InvalidSchema("Field ID is missing for element field in array");
892866
}
893867

868+
ICEBERG_ASSIGN_OR_RAISE(
869+
auto new_element_node,
870+
MakeAvroNodeWithFieldIds(original_node->leafAt(0), element_field));
871+
new_array_node->addLeaf(new_element_node);
872+
873+
// Add element field ID as custom attribute
874+
::avro::CustomAttributes element_attributes;
875+
element_attributes.addAttribute(std::string(kElementIdProp),
876+
std::to_string(*element_field.field_id), /*addQuote=*/false);
877+
new_array_node->addCustomAttributesForField(element_attributes);
878+
894879
return new_array_node;
895880
}
896881

@@ -908,45 +893,24 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
908893
return InvalidSchema("Map type requires nested mapping for key and value fields");
909894
}
910895

911-
// Find key and value field mappings by name
912-
std::optional<int32_t> key_id = field.nested_mapping->Id("key");
913-
std::optional<int32_t> value_id = field.nested_mapping->Id("value");
914-
915-
if (!key_id || !value_id) {
916-
return InvalidSchema("Map type requires both 'key' and 'value' field mappings");
896+
// For map types, use the first two fields from nested mapping as key and value
897+
if (!field.nested_mapping || field.nested_mapping->fields().size() < 2) {
898+
return InvalidSchema("Map type requires nested mapping with key and value fields");
917899
}
918900

919-
std::optional<MappedFieldConstRef> key_field_ref = field.nested_mapping->Field(*key_id);
920-
std::optional<MappedFieldConstRef> value_field_ref =
921-
field.nested_mapping->Field(*value_id);
922-
923-
if (!key_field_ref || !value_field_ref) {
924-
return InvalidSchema("Map type requires both key and value field mappings");
925-
}
926-
927-
const auto& key_mapped_field = key_field_ref->get();
928-
const auto& value_mapped_field = value_field_ref->get();
901+
const auto& key_mapped_field = field.nested_mapping->fields()[0];
902+
const auto& value_mapped_field = field.nested_mapping->fields()[1];
929903

930904
if (!key_mapped_field.field_id || !value_mapped_field.field_id) {
931905
return InvalidSchema("Map key and value fields must have field IDs");
932906
}
933907

934-
// Create key field with mapped field ID
935-
MappedField key_field;
936-
key_field.field_id = *key_mapped_field.field_id;
937-
key_field.nested_mapping = key_mapped_field.nested_mapping;
938-
939-
// Create value field with mapped field ID
940-
MappedField value_field;
941-
value_field.field_id = *value_mapped_field.field_id;
942-
value_field.nested_mapping = value_mapped_field.nested_mapping;
943-
944908
// Add key and value nodes
945909
ICEBERG_ASSIGN_OR_RAISE(auto new_key_node,
946-
MakeAvroNodeWithFieldIds(original_node->leafAt(0), key_field));
910+
MakeAvroNodeWithFieldIds(original_node->leafAt(0), key_mapped_field));
947911
ICEBERG_ASSIGN_OR_RAISE(
948912
auto new_value_node,
949-
MakeAvroNodeWithFieldIds(original_node->leafAt(1), value_field));
913+
MakeAvroNodeWithFieldIds(original_node->leafAt(1), value_mapped_field));
950914
new_map_node->addLeaf(new_key_node);
951915
new_map_node->addLeaf(new_value_node);
952916

@@ -958,19 +922,19 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
958922
for (const auto& attr_pair : existing_attrs) {
959923
// Copy each existing attribute to preserve original metadata
960924
::avro::CustomAttributes attributes;
961-
attributes.addAttribute(attr_pair.first, attr_pair.second, false);
925+
attributes.addAttribute(attr_pair.first, attr_pair.second, /*addQuote=*/false);
962926
new_map_node->addCustomAttributesForField(attributes);
963927
}
964928
}
965929

966930
::avro::CustomAttributes key_attributes;
967-
key_attributes.addAttribute(std::string(kFieldIdProp),
968-
std::to_string(*key_mapped_field.field_id), false);
931+
key_attributes.addAttribute(std::string(kKeyIdProp),
932+
std::to_string(*key_mapped_field.field_id), /*addQuote=*/false);
969933
new_map_node->addCustomAttributesForField(key_attributes);
970934

971935
::avro::CustomAttributes value_attributes;
972-
value_attributes.addAttribute(std::string(kFieldIdProp),
973-
std::to_string(*value_mapped_field.field_id), false);
936+
value_attributes.addAttribute(std::string(kValueIdProp),
937+
std::to_string(*value_mapped_field.field_id), /*addQuote=*/false);
974938
new_map_node->addCustomAttributesForField(value_attributes);
975939

976940
return new_map_node;

test/avro_schema_test.cc

Lines changed: 0 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <gtest/gtest.h>
2626

2727
#include "iceberg/avro/avro_schema_util_internal.h"
28-
#include "iceberg/json_internal.h"
2928
#include "iceberg/metadata_columns.h"
3029
#include "iceberg/name_mapping.h"
3130
#include "iceberg/schema.h"
@@ -1409,7 +1408,6 @@ TEST_F(NameMappingAvroSchemaTest, ApplyNameMappingToUnion) {
14091408
EXPECT_EQ(union_node->type(), ::avro::AVRO_UNION);
14101409
EXPECT_EQ(union_node->leaves(), 2);
14111410

1412-
// Check that the non-null branch has field ID applied
14131411
const auto& non_null_branch = union_node->leafAt(1);
14141412
EXPECT_EQ(non_null_branch->type(), ::avro::AVRO_STRING);
14151413
}
@@ -1434,79 +1432,6 @@ TEST_F(NameMappingAvroSchemaTest, MissingFieldIdError) {
14341432

14351433
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
14361434
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1437-
ASSERT_THAT(result,
1438-
HasErrorMessage("Field ID is missing for field 'name' in nested mapping"));
14391435
}
14401436

1441-
TEST_F(NameMappingAvroSchemaTest, MissingFieldError) {
1442-
// Create a name mapping
1443-
auto name_mapping = CreateSimpleNameMapping();
1444-
1445-
// Create an Avro record schema with a field not in the mapping
1446-
std::string avro_schema_json = R"({
1447-
"type": "record",
1448-
"name": "test_record",
1449-
"fields": [
1450-
{"name": "id", "type": "int"},
1451-
{"name": "name", "type": "string"},
1452-
{"name": "unknown_field", "type": "int"}
1453-
]
1454-
})";
1455-
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
1456-
1457-
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
1458-
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1459-
ASSERT_THAT(result,
1460-
HasErrorMessage("Field 'unknown_field' not found in nested mapping"));
1461-
}
1462-
1463-
TEST_F(NameMappingAvroSchemaTest, MissingArrayElementError) {
1464-
// Create a name mapping without array element mapping
1465-
std::vector<MappedField> fields;
1466-
fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
1467-
auto name_mapping = NameMapping::Make(std::move(fields));
1468-
1469-
// Create an Avro array schema
1470-
std::string avro_schema_json = R"({
1471-
"type": "record",
1472-
"name": "test_record",
1473-
"fields": [
1474-
{"name": "id", "type": "int"},
1475-
{"name": "items", "type": {
1476-
"type": "array",
1477-
"items": "string"
1478-
}}
1479-
]
1480-
})";
1481-
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
1482-
1483-
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
1484-
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1485-
ASSERT_THAT(result, HasErrorMessage("Field 'items' not found in nested mapping"));
1486-
}
1487-
1488-
TEST_F(NameMappingAvroSchemaTest, MissingMapKeyValueError) {
1489-
// Create a name mapping without map key/value mapping
1490-
std::vector<MappedField> fields;
1491-
fields.emplace_back(MappedField{.names = {"id"}, .field_id = 1});
1492-
auto name_mapping = NameMapping::Make(std::move(fields));
1493-
1494-
// Create an Avro map schema
1495-
std::string avro_schema_json = R"({
1496-
"type": "record",
1497-
"name": "test_record",
1498-
"fields": [
1499-
{"name": "id", "type": "int"},
1500-
{"name": "properties", "type": {
1501-
"type": "map",
1502-
"values": "string"
1503-
}}
1504-
]
1505-
})";
1506-
auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
1507-
1508-
auto result = MakeAvroNodeWithFieldIds(avro_schema.root(), *name_mapping);
1509-
ASSERT_THAT(result, IsError(ErrorKind::kInvalidSchema));
1510-
ASSERT_THAT(result, HasErrorMessage("Field 'properties' not found in nested mapping"));
1511-
}
15121437
} // namespace iceberg::avro

0 commit comments

Comments
 (0)