Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
cb6400e
feat(auth): implement SigV4 authentication for REST catalog
plusplusjiajia Apr 11, 2026
9bc49f3
fix(ci): enable SigV4 build in cpp-linter workflow
plusplusjiajia Apr 11, 2026
a25e7f5
address review feedback
plusplusjiajia Apr 14, 2026
26848b1
add single-arg Authenticate() overload
plusplusjiajia Apr 14, 2026
51c1b4a
sigv4 x-amz-content-sha256 must be Base64 in canonical headers
plusplusjiajia Apr 14, 2026
4977e83
adopt request-in/request-out Authenticate interface
plusplusjiajia Apr 14, 2026
213cd37
move MakeSigV4AuthManager to sigv4_auth_manager.cc
plusplusjiajia Apr 14, 2026
1971920
Meson: wire SigV4 behind a feature option
plusplusjiajia Apr 14, 2026
23c3e6d
drop unnecessary signing mutex
plusplusjiajia Apr 15, 2026
9c7622d
address review feedback
plusplusjiajia Apr 15, 2026
358f957
ci: drop redundant ninja-build install from cpp-linter
plusplusjiajia Apr 25, 2026
fc3dcd5
address review feedback
plusplusjiajia May 12, 2026
1e60f45
expose explicit AWS SDK lifecycle for SigV4
plusplusjiajia May 17, 2026
6dad865
fold sigv4_auth_manager.cc into the rest sources set
plusplusjiajia May 17, 2026
8794353
wrap AWS SDK lifecycle globals in AwsSdkLifecycle singleton
plusplusjiajia May 17, 2026
ac6a123
address review feedback
plusplusjiajia May 21, 2026
7fc69d3
address review feedback: SigV4 region, session lifecycle, S3+SigV4, M…
plusplusjiajia Jun 7, 2026
b730178
feat(cmake): reuse Arrow's bundled AWS SDK for SigV4 (no system SDK f…
plusplusjiajia Jun 8, 2026
ced9849
fix(rest): define ToString(HttpMethod) inline in header
plusplusjiajia Jun 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions .github/workflows/aws_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# AWS-related tests. ICEBERG_S3 and ICEBERG_SIGV4 are exercised individually and
# together; with both on, Arrow's S3 reuses SigV4's system AWS SDK
# (AWSSDK_SOURCE=SYSTEM) so a single AWS SDK is linked (no ODR).
name: AWS Tests

on:
push:
branches:
- '**'
- '!dependabot/**'
tags:
- '**'
pull_request:
types: [opened, synchronize, reopened, ready_for_review]

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

permissions:
contents: read

env:
ICEBERG_HOME: /tmp/iceberg

jobs:
aws:
if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }}
name: AWS (${{ matrix.title }})
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
include:
- title: AMD64 Ubuntu 24.04, S3
runs-on: ubuntu-24.04
CC: gcc-14
CXX: g++-14
s3: "ON"
sigv4: "OFF"
- title: AMD64 Ubuntu 24.04, SigV4
runs-on: ubuntu-24.04
CC: gcc-14
CXX: g++-14
s3: "OFF"
sigv4: "ON"
aws-sdk-features: core
- title: AMD64 Ubuntu 24.04, S3 + SigV4
runs-on: ubuntu-24.04
CC: gcc-14
CXX: g++-14
s3: "ON"
sigv4: "ON"
# Arrow's S3 filesystem consumes this same AWS SDK, so it needs the
# S3-related components in addition to core (config is required by
# Arrow's FindAWSSDKAlt).
aws-sdk-features: core,config,s3,identity-management,sts,transfer
- title: AArch64 macOS 26, S3
runs-on: macos-26
s3: "ON"
sigv4: "OFF"
env:
ICEBERG_TEST_S3_URI: s3://iceberg-test
AWS_ACCESS_KEY_ID: minio
AWS_SECRET_ACCESS_KEY: minio123
AWS_DEFAULT_REGION: us-east-1
AWS_ENDPOINT_URL: http://127.0.0.1:9000
AWS_EC2_METADATA_DISABLED: "TRUE"
steps:
- name: Checkout iceberg-cpp
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install dependencies on Ubuntu
if: ${{ startsWith(matrix.runs-on, 'ubuntu') }}
shell: bash
run: sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev
- name: Cache vcpkg packages
if: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' }}
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
id: vcpkg-cache
with:
path: /usr/local/share/vcpkg/installed
key: vcpkg-x64-linux-aws-sdk-cpp-s3-${{ matrix.s3 }}-sigv4-${{ matrix.sigv4 }}-${{ hashFiles('.github/workflows/aws_test.yml') }}
- name: Install AWS SDK via vcpkg
if: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' && steps.vcpkg-cache.outputs.cache-hit != 'true' }}
shell: bash
# Retry to ride out transient GitHub/mirror download failures (504s).
run: |
for attempt in 1 2 3; do
if vcpkg install "aws-sdk-cpp[${{ matrix.aws-sdk-features }}]:x64-linux"; then
exit 0
fi
echo "::warning::vcpkg install failed (attempt ${attempt}/3), retrying in 30s"
sleep 30
done
echo "::error::vcpkg install failed after 3 attempts"
exit 1
- name: Set Ubuntu Compilers
if: ${{ startsWith(matrix.runs-on, 'ubuntu') }}
run: |
echo "CC=${{ matrix.CC }}" >> $GITHUB_ENV
echo "CXX=${{ matrix.CXX }}" >> $GITHUB_ENV
- name: Start MinIO
if: ${{ matrix.s3 == 'ON' }}
shell: bash
run: bash ci/scripts/start_minio.sh
- name: Build and test Iceberg
shell: bash
env:
CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && matrix.s3 == 'OFF' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }}
run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }}

# Exercise the Meson build with SigV4 enabled (resolves aws-cpp-sdk-core via
# its CMake config, not pkg-config whose Cflags force -std=c++11).
meson-sigv4:
if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }}
name: Meson SigV4 (AMD64 Ubuntu 24.04)
runs-on: ubuntu-24.04
timeout-minutes: 45
steps:
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.x'
- name: Checkout iceberg-cpp
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- name: Install build dependencies
shell: bash
run: |
sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev
python3 -m pip install --upgrade pip
python3 -m pip install -r requirements.txt
- name: Cache vcpkg packages
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
id: vcpkg-cache
with:
path: /usr/local/share/vcpkg/installed
key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/aws_test.yml') }}
- name: Install AWS SDK via vcpkg
if: ${{ steps.vcpkg-cache.outputs.cache-hit != 'true' }}
shell: bash
# Retry to ride out transient GitHub/mirror download failures (504s).
run: |
for attempt in 1 2 3; do
if vcpkg install aws-sdk-cpp[core]:x64-linux; then
exit 0
fi
echo "::warning::vcpkg install failed (attempt ${attempt}/3), retrying in 30s"
sleep 30
done
echo "::error::vcpkg install failed after 3 attempts"
exit 1
- name: Set Ubuntu Compilers
run: |
echo "CC=gcc-14" >> $GITHUB_ENV
echo "CXX=g++-14" >> $GITHUB_ENV
- name: Build and test Iceberg
shell: bash
env:
CMAKE_PREFIX_PATH: /usr/local/share/vcpkg/installed/x64-linux
run: |
meson setup builddir -Dsigv4=enabled
meson compile -C builddir
meson test -C builddir --timeout-multiplier 0 --print-errorlogs
83 changes: 0 additions & 83 deletions .github/workflows/s3_test.yml

This file was deleted.

7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ option(ICEBERG_SQL_SQLITE "Build the SQLite connector for the SQL catalog" OFF)
option(ICEBERG_SQL_POSTGRESQL "Build the PostgreSQL connector for the SQL catalog" OFF)
option(ICEBERG_SQL_MYSQL "Build the MySQL connector for the SQL catalog" OFF)
option(ICEBERG_S3 "Build with S3 support" OFF)
option(ICEBERG_SIGV4 "Build SigV4 authentication support (requires AWS SDK)" OFF)
set(ICEBERG_AWSSDK_SOURCE
"AUTO"
CACHE STRING "AWS SDK source for SigV4: AUTO (reuse Arrow's bundled AWS SDK when \
ICEBERG_S3 is ON, otherwise SYSTEM), SYSTEM (find an installed AWS SDK), or \
BUNDLED (reuse Arrow's bundled AWS SDK; requires ICEBERG_S3)")
set_property(CACHE ICEBERG_AWSSDK_SOURCE PROPERTY STRINGS AUTO SYSTEM BUNDLED)
option(ICEBERG_ENABLE_ASAN "Enable Address Sanitizer" OFF)
option(ICEBERG_ENABLE_UBSAN "Enable Undefined Behavior Sanitizer" OFF)

Expand Down
9 changes: 8 additions & 1 deletion ci/scripts/build_iceberg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.
#
# Usage: build_iceberg.sh <source_dir> [rest_integration_tests=OFF] [sccache=OFF] [s3=OFF]
# Usage: build_iceberg.sh <source_dir> [rest_integration_tests=OFF] [sccache=OFF] [s3=OFF] [sigv4=OFF]

set -eux

Expand All @@ -26,6 +26,7 @@ build_dir=${1}/build
build_rest_integration_test=${2:-OFF}
build_enable_sccache=${3:-OFF}
build_enable_s3=${4:-OFF}
build_enable_sigv4=${5:-OFF}

mkdir ${build_dir}
pushd ${build_dir}
Expand All @@ -48,6 +49,12 @@ else
CMAKE_ARGS+=("-DICEBERG_S3=OFF")
fi

if [[ "${build_enable_sigv4}" == "ON" ]]; then
CMAKE_ARGS+=("-DICEBERG_SIGV4=ON")
else
CMAKE_ARGS+=("-DICEBERG_SIGV4=OFF")
fi

if is_windows; then
CMAKE_ARGS+=("-DCMAKE_TOOLCHAIN_FILE=C:/vcpkg/scripts/buildsystems/vcpkg.cmake")
CMAKE_ARGS+=("-DCMAKE_BUILD_TYPE=Release")
Expand Down
48 changes: 48 additions & 0 deletions cmake_modules/IcebergThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,22 @@
set(ICEBERG_SYSTEM_DEPENDENCIES)
set(ICEBERG_ARROW_INSTALL_INTERFACE_LIBS)

if(ICEBERG_SIGV4)
set(ICEBERG_AWSSDK_SOURCE_RESOLVED "${ICEBERG_AWSSDK_SOURCE}")
if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "AUTO")
if(ICEBERG_S3)
set(ICEBERG_AWSSDK_SOURCE_RESOLVED "BUNDLED")
else()
set(ICEBERG_AWSSDK_SOURCE_RESOLVED "SYSTEM")
endif()
endif()
if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "BUNDLED" AND NOT ICEBERG_S3)
message(FATAL_ERROR "ICEBERG_AWSSDK_SOURCE=BUNDLED requires ICEBERG_S3=ON: "
"the bundled AWS SDK is provided by Arrow's S3 support.")
endif()
message(STATUS "AWS SDK source for SigV4: ${ICEBERG_AWSSDK_SOURCE_RESOLVED}")
endif()

# ----------------------------------------------------------------------
# Versions and URLs for toolchain builds
#
Expand Down Expand Up @@ -110,6 +126,11 @@ function(resolve_arrow_dependency)
set(ARROW_RUNTIME_SIMD_LEVEL "NONE")
set(ARROW_POSITION_INDEPENDENT_CODE ON)
set(ARROW_DEPENDENCY_SOURCE "BUNDLED")
if(ICEBERG_S3
AND ICEBERG_SIGV4
AND ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "SYSTEM")
set(AWSSDK_SOURCE "SYSTEM")
endif()
set(ARROW_WITH_ZLIB ON)
set(ZLIB_SOURCE "SYSTEM")
set(ARROW_VERBOSE_THIRDPARTY_BUILD OFF)
Expand Down Expand Up @@ -628,3 +649,30 @@ endif()
if(ICEBERG_BUILD_SQL_CATALOG)
resolve_sql_catalog_dependencies()
endif()

# ----------------------------------------------------------------------
# AWS SDK for C++

function(resolve_aws_sdk_dependency)
if(ICEBERG_AWSSDK_SOURCE_RESOLVED STREQUAL "BUNDLED")
message(STATUS "SigV4 reuses Arrow's bundled AWS SDK (aws-cpp-sdk-core)")
return()
endif()
find_package(AWSSDK REQUIRED COMPONENTS core)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still resolves SigV4 against a separate system/vcpkg AWS SDK, while ICEBERG_S3 uses Arrow built with its own bundled AWS SDK. The PR currently tests those modes separately because enabling both causes ODR conflicts, but ICEBERG_S3=ON + ICEBERG_SIGV4=ON should be a supported configuration. If fully vendoring AWS SDK here is too large for this PR, can we at least make Arrow S3 consume the same resolved AWS SDK so both features link against one AWS SDK instance, and track full vendoring as a follow-up?

list(APPEND ICEBERG_SYSTEM_DEPENDENCIES AWSSDK)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here it records only AWSSDK for installed-package dependency discovery, while src/iceberg/catalog/rest/CMakeLists.txt exports aws-cpp-sdk-core in the REST install interface. The generated iceberg-config.cmake can only call find_dependency(AWSSDK) without COMPONENTS core, but AWS SDK’s CMake config loads component packages from AWSSDK_FIND_COMPONENTS. A downstream installed SigV4 build can therefore fail to find/link AWS core unless it happens to be on the default linker path.

I'd suggest to special-case find_dependency(AWSSDK COMPONENTS core) in the iceberg-config.cmake.in or otherwise export the AWS SDK dependency component-aware.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @wgtmac , good point — handled in iceberg-config.cmake.in by special-casing AWSSDK to call find_dependency(AWSSDK COMPONENTS core) so downstream installed builds bring in aws-cpp-sdk-core.

set(ICEBERG_SYSTEM_DEPENDENCIES
${ICEBERG_SYSTEM_DEPENDENCIES}
PARENT_SCOPE)
# Forwarded to find_dependency(AWSSDK ...) in iceberg-config.cmake.in so
# downstream installed builds load aws-cpp-sdk-core via AWSSDK_FIND_COMPONENTS.
set(ICEBERG_FIND_EXTRA_ARGS_AWSSDK
"COMPONENTS;core"
PARENT_SCOPE)
endfunction()

if(ICEBERG_SIGV4)
if(NOT ICEBERG_BUILD_REST)
message(FATAL_ERROR "ICEBERG_SIGV4 requires ICEBERG_BUILD_REST to be ON")
endif()
resolve_aws_sdk_dependency()
endif()
7 changes: 7 additions & 0 deletions meson.options
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,11 @@ option(
value: 'disabled',
)

option(
'sigv4',
type: 'feature',
description: 'Build AWS SigV4 authentication support for rest catalog',
value: 'disabled',
)

option('tests', type: 'feature', description: 'Build tests', value: 'enabled')
Loading
Loading