Skip to content

Commit e292c79

Browse files
committed
Merge branch 'main' into logger
2 parents 9f1e7e8 + 8ecee31 commit e292c79

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1834
-222
lines changed

.github/workflows/cpp-linter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
mkdir build && cd build
4040
cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
4141
cmake --build .
42-
- uses: cpp-linter/cpp-linter-action@v2.13.3
42+
- uses: cpp-linter/cpp-linter-action@f91c446a32ae3eb9f98fef8c9ed4c7cb613a4f8a
4343
id: linter
4444
continue-on-error: true
4545
env:

cmake_modules/IcebergThirdpartyToolchain.cmake

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ set(ICEBERG_ARROW_INSTALL_INTERFACE_LIBS)
2323
# ----------------------------------------------------------------------
2424
# Versions and URLs for toolchain builds
2525

26-
set(ICEBERG_ARROW_BUILD_VERSION "19.0.1")
26+
set(ICEBERG_ARROW_BUILD_VERSION "21.0.0")
2727
set(ICEBERG_ARROW_BUILD_SHA256_CHECKSUM
28-
"acb76266e8b0c2fbb7eb15d542fbb462a73b3fd1e32b80fad6c2fafd95a51160")
28+
"5d3f8db7e72fb9f65f4785b7a1634522e8d8e9657a445af53d4a34a3849857b5")
2929

3030
if(DEFINED ENV{ICEBERG_ARROW_URL})
3131
set(ARROW_SOURCE_URL "$ENV{ICEBERG_ARROW_URL}")
@@ -65,8 +65,9 @@ function(resolve_arrow_dependency)
6565
set(ARROW_BUILD_STATIC
6666
ON
6767
CACHE BOOL "" FORCE)
68+
# Work around undefined symbol: arrow::ipc::ReadSchema(arrow::io::InputStream*, arrow::ipc::DictionaryMemo*)
6869
set(ARROW_IPC
69-
OFF
70+
ON
7071
CACHE BOOL "" FORCE)
7172
set(ARROW_FILESYSTEM
7273
ON
@@ -98,10 +99,8 @@ function(resolve_arrow_dependency)
9899

99100
fetchcontent_declare(VendoredArrow
100101
${FC_DECLARE_COMMON_OPTIONS}
101-
GIT_REPOSITORY https://github.com/apache/arrow.git
102-
GIT_TAG f12356adaaabea86638407e995e73215dbb58bb2
103-
#URL ${ARROW_SOURCE_URL}
104-
#URL_HASH "SHA256=${ICEBERG_ARROW_BUILD_SHA256_CHECKSUM}"
102+
URL ${ARROW_SOURCE_URL}
103+
URL_HASH "SHA256=${ICEBERG_ARROW_BUILD_SHA256_CHECKSUM}"
105104
SOURCE_SUBDIR
106105
cpp
107106
FIND_PACKAGE_ARGS

src/iceberg/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,11 @@ if(ICEBERG_BUILD_BUNDLE)
109109
avro/avro_data_util.cc
110110
avro/avro_reader.cc
111111
avro/avro_schema_util.cc
112-
avro/avro_stream_internal.cc)
112+
avro/avro_register.cc
113+
avro/avro_stream_internal.cc
114+
parquet/parquet_data_util.cc
115+
parquet/parquet_reader.cc
116+
parquet/parquet_schema_util.cc)
113117

114118
# Libraries to link with exported libiceberg_bundle.{so,a}.
115119
set(ICEBERG_BUNDLE_STATIC_BUILD_INTERFACE_LIBS)
@@ -161,6 +165,7 @@ if(ICEBERG_BUILD_BUNDLE)
161165

162166
add_subdirectory(arrow)
163167
add_subdirectory(avro)
168+
add_subdirectory(parquet)
164169

165170
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/iceberg_bundle_export.h
166171
DESTINATION ${ICEBERG_INSTALL_INCLUDEDIR}/iceberg)

src/iceberg/arrow/arrow_error_transform_internal.h

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,18 @@ inline ErrorKind ToErrorKind(const ::arrow::Status& status) {
4343
} \
4444
lhs = std::move(result_name).ValueOrDie();
4545

46-
#define ICEBERG_ARROW_ASSIGN_OR_RETURN(lhs, rexpr) \
47-
ICEBERG_ARROW_ASSIGN_OR_RETURN_IMPL( \
48-
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr, ToErrorKind)
49-
50-
#define ICEBERG_ARROW_RETURN_NOT_OK(expr) \
51-
do { \
52-
auto&& _status = (expr); \
53-
if (!_status.ok()) { \
54-
return std::unexpected<Error>{ \
55-
{.kind = ToErrorKind(_status), .message = _status.ToString()}}; \
56-
} \
46+
#define ICEBERG_ARROW_ASSIGN_OR_RETURN(lhs, rexpr) \
47+
ICEBERG_ARROW_ASSIGN_OR_RETURN_IMPL( \
48+
ARROW_ASSIGN_OR_RAISE_NAME(_error_or_value, __COUNTER__), lhs, rexpr, \
49+
::iceberg::arrow::ToErrorKind)
50+
51+
#define ICEBERG_ARROW_RETURN_NOT_OK(expr) \
52+
do { \
53+
auto&& _status = (expr); \
54+
if (!_status.ok()) { \
55+
return std::unexpected<Error>{{.kind = ::iceberg::arrow::ToErrorKind(_status), \
56+
.message = _status.ToString()}}; \
57+
} \
5758
} while (0)
5859

5960
} // namespace iceberg::arrow

src/iceberg/arrow/arrow_fs_file_io.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
* under the License.
1818
*/
1919

20-
#include "iceberg/arrow/arrow_fs_file_io.h"
20+
#include <chrono>
2121

2222
#include <arrow/filesystem/localfs.h>
23+
#include <arrow/filesystem/mockfs.h>
2324

2425
#include "iceberg/arrow/arrow_error_transform_internal.h"
26+
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
2527

2628
namespace iceberg::arrow {
2729

@@ -67,4 +69,15 @@ Status ArrowFileSystemFileIO::DeleteFile(const std::string& file_location) {
6769
return {};
6870
}
6971

72+
std::unique_ptr<FileIO> ArrowFileSystemFileIO::MakeMockFileIO() {
73+
return std::make_unique<ArrowFileSystemFileIO>(
74+
std::make_shared<::arrow::fs::internal::MockFileSystem>(
75+
std::chrono::system_clock::now()));
76+
}
77+
78+
std::unique_ptr<FileIO> ArrowFileSystemFileIO::MakeLocalFileIO() {
79+
return std::make_unique<ArrowFileSystemFileIO>(
80+
std::make_shared<::arrow::fs::LocalFileSystem>());
81+
}
82+
7083
} // namespace iceberg::arrow

src/iceberg/arrow/arrow_fs_file_io.h renamed to src/iceberg/arrow/arrow_fs_file_io_internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ class ICEBERG_BUNDLE_EXPORT ArrowFileSystemFileIO : public FileIO {
3434
explicit ArrowFileSystemFileIO(std::shared_ptr<::arrow::fs::FileSystem> arrow_fs)
3535
: arrow_fs_(std::move(arrow_fs)) {}
3636

37+
/// \brief Make an in-memory FileIO backed by arrow::fs::internal::MockFileSystem.
38+
static std::unique_ptr<FileIO> MakeMockFileIO();
39+
40+
/// \brief Make a local FileIO backed by arrow::fs::LocalFileSystem.
41+
static std::unique_ptr<FileIO> MakeLocalFileIO();
42+
3743
~ArrowFileSystemFileIO() override = default;
3844

3945
/// \brief Read the content of the file at the given location.

src/iceberg/avro/avro_reader.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#include <avro/Generic.hh>
3232
#include <avro/GenericDatum.hh>
3333

34-
#include "iceberg/arrow/arrow_fs_file_io.h"
34+
#include "iceberg/arrow/arrow_fs_file_io_internal.h"
3535
#include "iceberg/avro/avro_data_util_internal.h"
3636
#include "iceberg/avro/avro_schema_util_internal.h"
3737
#include "iceberg/avro/avro_stream_internal.h"

src/iceberg/avro/avro_register.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/avro/avro_register.h"
21+
22+
#include "iceberg/avro/avro_schema_util_internal.h"
23+
24+
namespace iceberg::avro {
25+
26+
void RegisterLogicalTypes() {
27+
static std::once_flag flag{};
28+
std::call_once(flag, []() {
29+
// Register the map logical type with the avro custom logical type registry.
30+
// See https://github.com/apache/avro/pull/3326 for details.
31+
::avro::CustomLogicalTypeRegistry::instance().registerType(
32+
"map", [](const std::string&) { return std::make_shared<MapLogicalType>(); });
33+
});
34+
}
35+
36+
} // namespace iceberg::avro

src/iceberg/avro/avro_register.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include "iceberg/iceberg_bundle_export.h"
23+
24+
namespace iceberg::avro {
25+
26+
ICEBERG_BUNDLE_EXPORT void RegisterLogicalTypes();
27+
28+
} // namespace iceberg::avro

src/iceberg/avro/avro_schema_util.cc

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@
2222
#include <string_view>
2323

2424
#include <arrow/type.h>
25-
#include <arrow/util/decimal.h>
2625
#include <avro/CustomAttributes.hh>
2726
#include <avro/LogicalType.hh>
2827
#include <avro/NodeImpl.hh>
2928
#include <avro/Schema.hh>
3029
#include <avro/Types.hh>
3130
#include <avro/ValidSchema.hh>
3231

32+
#include "iceberg/avro/avro_register.h"
3333
#include "iceberg/avro/avro_schema_util_internal.h"
3434
#include "iceberg/metadata_columns.h"
3535
#include "iceberg/schema.h"
@@ -49,18 +49,8 @@ constexpr std::string_view kValueIdProp = "value-id";
4949
constexpr std::string_view kElementIdProp = "element-id";
5050
constexpr std::string_view kAdjustToUtcProp = "adjust-to-utc";
5151

52-
struct MapLogicalType : public ::avro::CustomLogicalType {
53-
MapLogicalType() : ::avro::CustomLogicalType("map") {}
54-
};
55-
5652
::avro::LogicalType GetMapLogicalType() {
57-
static std::once_flag flag{};
58-
std::call_once(flag, []() {
59-
// Register the map logical type with the avro custom logical type registry.
60-
// See https://github.com/apache/avro/pull/3326 for details.
61-
::avro::CustomLogicalTypeRegistry::instance().registerType(
62-
"map", [](const std::string&) { return std::make_shared<MapLogicalType>(); });
63-
});
53+
RegisterLogicalTypes();
6454
return ::avro::LogicalType(std::make_shared<MapLogicalType>());
6555
}
6656

0 commit comments

Comments
 (0)