Skip to content

Commit 1a61d45

Browse files
fix name mapping and schema
1 parent a7cf19b commit 1a61d45

File tree

4 files changed

+419
-477
lines changed

4 files changed

+419
-477
lines changed

src/iceberg/avro/avro_reader.cc

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,16 +117,6 @@ class AvroReader::Impl {
117117
// Create a new schema with the updated root node
118118
auto new_schema = ::avro::ValidSchema(new_root_node);
119119

120-
// Verify that all fields now have IDs after applying the name mapping
121-
HasIdVisitor verify_visitor;
122-
ICEBERG_RETURN_UNEXPECTED(verify_visitor.Visit(new_schema));
123-
if (!verify_visitor.AllHaveIds()) {
124-
// TODO(liuxiaoyu): Print detailed error message with missing field IDs
125-
// information in future
126-
return InvalidSchema(
127-
"Not all fields have field IDs after applying name mapping.");
128-
}
129-
130120
// Update the file schema to use the new schema with field IDs
131121
file_schema = new_schema;
132122
} else {

src/iceberg/avro/avro_schema_util.cc

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -784,11 +784,13 @@ Result<::avro::NodePtr> CreateRecordNodeWithFieldIds(const ::avro::NodePtr& orig
784784

785785
for (size_t i = 0; i < original_node->leaves(); ++i) {
786786
if (i >= original_node->names()) {
787-
return InvalidSchema(...);
787+
return InvalidSchema("Index {} is out of bounds for names (size: {})", i,
788+
original_node->names());
788789
}
789790
const std::string& field_name = original_node->nameAt(i);
790791
if (i >= original_node->leaves()) {
791-
return InvalidSchema(...);
792+
return InvalidSchema("Index {} is out of bounds for leaves (size: {})", i,
793+
original_node->leaves());
792794
}
793795
::avro::NodePtr field_node = original_node->leafAt(i);
794796

@@ -897,42 +899,29 @@ Result<::avro::NodePtr> CreateMapNodeWithFieldIds(const ::avro::NodePtr& origina
897899

898900
auto new_map_node = std::make_shared<::avro::NodeMap>();
899901

900-
// Try to find key and value fields from nested mapping
901-
const MappedField* key_field = nullptr;
902-
const MappedField* value_field = nullptr;
903-
if (field.nested_mapping) {
904-
auto fields_span = field.nested_mapping->fields();
905-
for (const auto& f : fields_span) {
906-
if (f.names.find(std::string(kKey)) != f.names.end()) {
907-
key_field = &f;
908-
} else if (f.names.find(std::string(kValue)) != f.names.end()) {
909-
value_field = &f;
910-
}
911-
}
912-
}
902+
// For map types, we use fixed field IDs for key and value
903+
// Key field gets field ID 0, value field gets field ID 1
904+
constexpr int32_t kMapKeyFieldId = 0;
905+
constexpr int32_t kMapValueFieldId = 1;
913906

914-
// Check if both key and value fields are found
915-
if (!key_field) {
916-
return InvalidSchema("Key field not found in nested mapping for map");
917-
}
918-
if (!value_field) {
919-
return InvalidSchema("Value field not found in nested mapping for map");
920-
}
907+
// Create key field with fixed field ID
908+
MappedField key_field;
909+
key_field.field_id = kMapKeyFieldId;
910+
key_field.nested_mapping =
911+
field.nested_mapping; // Pass through nested mapping for complex key types
921912

922-
// Check if field_ids are present
923-
if (!key_field->field_id.has_value()) {
924-
return InvalidSchema("Field ID is missing for key field in map");
925-
}
926-
if (!value_field->field_id.has_value()) {
927-
return InvalidSchema("Field ID is missing for value field in map");
928-
}
913+
// Create value field with fixed field ID
914+
MappedField value_field;
915+
value_field.field_id = kMapValueFieldId;
916+
value_field.nested_mapping =
917+
field.nested_mapping; // Pass through nested mapping for complex value types
929918

930919
// Add key and value nodes
931-
ICEBERG_ASSIGN_OR_RAISE(auto new_key_node, CreateAvroNodeWithFieldIds(
932-
original_node->leafAt(0), *key_field));
920+
ICEBERG_ASSIGN_OR_RAISE(
921+
auto new_key_node, CreateAvroNodeWithFieldIds(original_node->leafAt(0), key_field));
933922
ICEBERG_ASSIGN_OR_RAISE(
934923
auto new_value_node,
935-
CreateAvroNodeWithFieldIds(original_node->leafAt(1), *value_field));
924+
CreateAvroNodeWithFieldIds(original_node->leafAt(1), value_field));
936925
new_map_node->addLeaf(new_key_node);
937926
new_map_node->addLeaf(new_value_node);
938927

0 commit comments

Comments
 (0)