Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cmake_modules/IcebergThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ function(resolve_arrow_dependency)
set(ARROW_FILESYSTEM
ON
CACHE BOOL "" FORCE)
set(ARROW_JSON
ON
CACHE BOOL "" FORCE)
set(ARROW_PARQUET
ON
CACHE BOOL "" FORCE)
Expand All @@ -95,8 +98,8 @@ function(resolve_arrow_dependency)

fetchcontent_declare(VendoredArrow
${FC_DECLARE_COMMON_OPTIONS}
GIT_REPOSITORY https://github.com/wgtmac/arrow.git
GIT_TAG 7d50c4ac803ad983734de5f418b7cd18f25b0dc9
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG 5f0aeb5de53fb25b59a52661a80071faef99a4a4
Comment on lines +101 to +102
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

#URL ${ARROW_SOURCE_URL}
#URL_HASH "SHA256=${ICEBERG_ARROW_BUILD_SHA256_CHECKSUM}"
SOURCE_SUBDIR
Expand Down
371 changes: 369 additions & 2 deletions src/iceberg/avro/avro_data_util.cc

Large diffs are not rendered by default.

13 changes: 12 additions & 1 deletion src/iceberg/avro/avro_data_util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,21 @@

namespace iceberg::avro {

/// \brief Append an Avro datum to an Arrow array builder.
///
/// This function handles schema evolution by using the provided projection to map
/// fields from the Avro data to the expected Arrow schema.
///
/// \param avro_node The Avro schema node (must be a record at root level)
/// \param avro_datum The Avro data to append
/// \param projection Schema projection from `projected_schema` to `avro_node`
/// \param projected_schema The projected schema
/// \param array_builder The Arrow array builder to append to (must be a struct builder)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it's a undefined behavior to pass-in a different type builder? Can the argument here just a StructBuilder?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That requires additional cast before using this function.

/// \return Status indicating success or failure
Status AppendDatumToBuilder(const ::avro::NodePtr& avro_node,
const ::avro::GenericDatum& avro_datum,
const SchemaProjection& projection,
const Schema& arrow_schema,
const Schema& projected_schema,
::arrow::ArrayBuilder* array_builder);

} // namespace iceberg::avro
34 changes: 17 additions & 17 deletions src/iceberg/avro/avro_schema_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,22 @@ ::avro::CustomAttributes GetAttributesWithFieldId(int32_t field_id) {

} // namespace

std::string ToString(const ::avro::NodePtr& node) {
std::stringstream ss;
ss << *node;
return ss.str();
}

std::string ToString(const ::avro::LogicalType& logical_type) {
std::stringstream ss;
logical_type.printJson(ss);
return ss.str();
}

std::string ToString(const ::avro::LogicalType::Type& logical_type) {
return ToString(::avro::LogicalType(logical_type));
}

Status ToAvroNodeVisitor::Visit(const BooleanType& type, ::avro::NodePtr* node) {
*node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_BOOL);
return {};
Expand Down Expand Up @@ -383,22 +399,6 @@ Status HasIdVisitor::Visit(const ::avro::Schema& schema) { return Visit(schema.r

namespace {

std::string ToString(const ::avro::NodePtr& node) {
std::stringstream ss;
ss << *node;
return ss.str();
}

std::string ToString(const ::avro::LogicalType& logical_type) {
std::stringstream ss;
logical_type.printJson(ss);
return ss.str();
}

std::string ToString(const ::avro::LogicalType::Type& logical_type) {
return ToString(::avro::LogicalType(logical_type));
}

bool HasLogicalType(const ::avro::NodePtr& node,
::avro::LogicalType::Type expected_type) {
return node->logicalType().type() == expected_type;
Expand Down Expand Up @@ -501,7 +501,7 @@ Status ValidateAvroSchemaEvolution(const Type& expected_type,
case TypeId::kTimestamp:
if (avro_node->type() == ::avro::AVRO_LONG &&
HasLogicalType(avro_node, ::avro::LogicalType::TIMESTAMP_MICROS) &&
GetAdjustToUtc(avro_node).value_or("false") == "true") {
GetAdjustToUtc(avro_node).value_or("false") == "false") {
return {};
}
break;
Expand Down
4 changes: 4 additions & 0 deletions src/iceberg/avro/avro_schema_util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,8 @@ class HasIdVisitor {
Result<SchemaProjection> Project(const Schema& expected_schema,
const ::avro::NodePtr& avro_node, bool prune_source);

std::string ToString(const ::avro::NodePtr& node);
std::string ToString(const ::avro::LogicalType& logical_type);
std::string ToString(const ::avro::LogicalType::Type& logical_type);

} // namespace iceberg::avro
2 changes: 1 addition & 1 deletion src/iceberg/schema_util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
namespace iceberg {

// Fix `from` field of `FieldProjection` to use pruned field index.
void PruneFieldProjection(FieldProjection& field_projection) {
inline void PruneFieldProjection(FieldProjection& field_projection) {
std::map<size_t, size_t> local_index_to_pruned_index;
for (const auto& child_projection : field_projection.children) {
if (child_projection.kind == FieldProjection::Kind::kProjected) {
Expand Down
3 changes: 2 additions & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ add_test(NAME util_test COMMAND util_test)

if(ICEBERG_BUILD_BUNDLE)
add_executable(avro_test)
target_sources(avro_test PRIVATE avro_test.cc avro_schema_test.cc avro_stream_test.cc)
target_sources(avro_test PRIVATE avro_data_test.cc avro_test.cc avro_schema_test.cc
avro_stream_test.cc)
target_link_libraries(avro_test PRIVATE iceberg_bundle_static GTest::gtest_main
GTest::gmock)
add_test(NAME avro_test COMMAND avro_test)
Expand Down
Loading
Loading