Skip to content

Commit a1031f8

Browse files
Add Arrow S3 FileIO factory and integration test
1 parent 883a43f commit a1031f8

4 files changed

Lines changed: 100 additions & 77 deletions

File tree

CMakeLists.txt

Lines changed: 7 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,10 @@
1-
# Licensed to the Apache Software Foundation (ASF) under one
2-
# or more contributor license agreements. See the NOTICE file
3-
# distributed with this work for additional information
4-
# regarding copyright ownership. The ASF licenses this file
5-
# to you under the Apache License, Version 2.0 (the
6-
# "License"); you may not use this file except in compliance
7-
# with the License. You may obtain a copy of the License at
8-
#
9-
# http://www.apache.org/licenses/LICENSE-2.0
10-
#
11-
# Unless required by applicable law or agreed to in writing,
12-
# software distributed under the License is distributed on an
13-
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14-
# KIND, either express or implied. See the License for the
15-
# specific language governing permissions and limitations
16-
# under the License.
1+
# --- iceberg project CMake additions for Arrow S3 support ---
2+
option(ICEBERG_ENABLE_ARROW_S3 "Enable Arrow S3 integration (requires Arrow built with S3 support)" ON)
173

18-
cmake_minimum_required(VERSION 3.25)
19-
20-
if(NOT CMAKE_BUILD_TYPE)
21-
set(CMAKE_BUILD_TYPE Debug)
22-
endif()
23-
24-
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules")
25-
26-
project(Iceberg
27-
VERSION 0.2.0
28-
DESCRIPTION "Iceberg C++ Project"
29-
LANGUAGES CXX)
30-
31-
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/src/iceberg/version.h.in"
32-
"${CMAKE_BINARY_DIR}/src/iceberg/version.h")
33-
34-
set(CMAKE_CXX_STANDARD 23)
35-
set(CMAKE_CXX_STANDARD_REQUIRED ON)
36-
set(CMAKE_CXX_EXTENSIONS OFF)
37-
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
38-
set(CMAKE_COMPILE_WARNING_AS_ERROR ON)
39-
40-
option(ICEBERG_BUILD_STATIC "Build static library" ON)
41-
option(ICEBERG_BUILD_SHARED "Build shared library" OFF)
42-
option(ICEBERG_BUILD_TESTS "Build tests" ON)
43-
option(ICEBERG_BUILD_BUNDLE "Build the battery included library" ON)
44-
option(ICEBERG_BUILD_REST "Build rest catalog client" ON)
45-
option(ICEBERG_BUILD_REST_INTEGRATION_TESTS "Build rest catalog integration tests" OFF)
46-
option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF)
47-
option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF)
48-
49-
include(GNUInstallDirs)
50-
include(FetchContent)
51-
52-
set(ICEBERG_INSTALL_LIBDIR "${CMAKE_INSTALL_LIBDIR}")
53-
set(ICEBERG_INSTALL_BINDIR "${CMAKE_INSTALL_BINDIR}")
54-
set(ICEBERG_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}")
55-
set(ICEBERG_INSTALL_CMAKEDIR "${CMAKE_INSTALL_LIBDIR}/cmake")
56-
set(ICEBERG_INSTALL_DOCDIR "share/doc/iceberg")
57-
58-
if(WIN32 AND NOT MINGW)
59-
set(MSVC_TOOLCHAIN TRUE)
60-
else()
61-
set(MSVC_TOOLCHAIN FALSE)
62-
endif()
63-
64-
if(ICEBERG_BUILD_REST_INTEGRATION_TESTS AND WIN32)
65-
set(ICEBERG_BUILD_REST_INTEGRATION_TESTS OFF)
66-
message(WARNING "Cannot build rest integration test on Windows, turning it off.")
67-
endif()
68-
69-
include(CMakeParseArguments)
70-
include(IcebergBuildUtils)
71-
include(IcebergSanitizer)
72-
include(IcebergSccache)
73-
include(IcebergThirdpartyToolchain)
74-
75-
if(ICEBERG_BUILD_TESTS)
76-
enable_testing()
4+
if(ICEBERG_ENABLE_ARROW_S3)
5+
# Define a compile-time macro so source can conditionally include Arrow S3 headers.
6+
add_compile_definitions(HAVE_ARROW_S3)
777
endif()
788

79-
add_subdirectory(src)
80-
install(FILES LICENSE NOTICE DESTINATION ${ICEBERG_INSTALL_DOCDIR})
9+
# Note: For robustness, consider testing whether the installed Arrow
10+
# library actually provides s3fs.h rather than relying solely on this option.
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "iceberg/arrow/arrow_s3_file_io.h"
2+
3+
#if defined(HAVE_ARROW_S3)
4+
#include <arrow/filesystem/s3fs.h>
5+
#include "iceberg/arrow/arrow_file_io.h"
6+
#include "iceberg/arrow/arrow_status_internal.h"
7+
8+
namespace iceberg::arrow {
9+
10+
std::unique_ptr<FileIO> MakeS3FileIO(const ::arrow::fs::S3Options& options) {
11+
ICEBERG_ARROW_ASSIGN_OR_RETURN(auto s3fs, ::arrow::fs::S3FileSystem::Make(options));
12+
return std::make_unique<ArrowFileSystemFileIO>(s3fs);
13+
}
14+
15+
} // namespace iceberg::arrow
16+
17+
#endif
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#pragma once
2+
3+
#include <memory>
4+
5+
#if defined(HAVE_ARROW_S3)
6+
#include <arrow/filesystem/s3fs.h>
7+
#endif
8+
9+
#include "iceberg/file_io.h"
10+
11+
namespace iceberg::arrow {
12+
13+
#if defined(HAVE_ARROW_S3)
14+
// Create a FileIO backed by Arrow's S3FileSystem using the provided options.
15+
std::unique_ptr<FileIO> MakeS3FileIO(const ::arrow::fs::S3Options& options);
16+
#endif
17+
18+
} // namespace iceberg::arrow

tests/arrow_s3_integration_test.cc

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// Integration test for Arrow S3-backed FileIO.
2+
// This test is optional and only runs when the environment variable
3+
// ICEBERG_S3_INTEGRATION is set to "1" and Arrow is built with S3 support.
4+
5+
#include <gtest/gtest.h>
6+
#include <cstdlib>
7+
#include <string>
8+
9+
#if defined(HAVE_ARROW_S3)
10+
#include <arrow/filesystem/s3fs.h>
11+
#include "iceberg/arrow/arrow_s3_file_io.h"
12+
#endif
13+
14+
TEST(ArrowS3Integration, CreateReadDeleteObject) {
15+
const char* run = std::getenv("ICEBERG_S3_INTEGRATION");
16+
if (!run || std::string(run) != "1") {
17+
GTEST_SKIP() << "ICEBERG_S3_INTEGRATION not set; skipping S3 integration test";
18+
}
19+
20+
#if !defined(HAVE_ARROW_S3)
21+
GTEST_SKIP() << "Arrow not built with S3 support; skipping S3 integration test";
22+
#else
23+
// Read S3 connection info from env. Tests should set these for MinIO/AWS.
24+
const char* endpoint = std::getenv("ICEBERG_S3_ENDPOINT");
25+
const char* access_key = std::getenv("ICEBERG_S3_ACCESS_KEY");
26+
const char* secret_key = std::getenv("ICEBERG_S3_SECRET_KEY");
27+
const char* bucket = std::getenv("ICEBERG_S3_BUCKET");
28+
29+
ASSERT_NE(endpoint, nullptr);
30+
ASSERT_NE(access_key, nullptr);
31+
ASSERT_NE(secret_key, nullptr);
32+
ASSERT_NE(bucket, nullptr);
33+
34+
::arrow::fs::S3Options s3_opts = ::arrow::fs::S3Options::Anonymous();
35+
// Fill required options. Adjust fields per Arrow version's S3Options API.
36+
s3_opts.endpoint_override = std::string(endpoint);
37+
s3_opts.region = "us-east-1";
38+
s3_opts.use_virtual_host_style = false;
39+
s3_opts.access_key = std::string(access_key);
40+
s3_opts.secret_key = std::string(secret_key);
41+
42+
auto fileio = iceberg::arrow::MakeS3FileIO(s3_opts);
43+
ASSERT_NE(fileio, nullptr);
44+
45+
std::string key = std::string("iceberg_test/arrow_s3_integration_test.txt");
46+
std::string uri = std::string("s3://") + bucket + "/" + key;
47+
48+
const std::string payload = "hello iceberg s3 integration";
49+
ASSERT_TRUE(fileio->WriteFile(uri, payload).ok());
50+
51+
auto result = fileio->ReadFile(uri);
52+
ASSERT_TRUE(result.ok());
53+
EXPECT_EQ(result->size(), payload.size());
54+
EXPECT_EQ(*result, payload);
55+
56+
ASSERT_TRUE(fileio->DeleteFile(uri).ok());
57+
#endif
58+
}

0 commit comments

Comments
 (0)