Skip to content

Commit 91eea39

Browse files
address review feedback: SigV4 region, session lifecycle, S3+SigV4, Meson
1 parent 4386638 commit 91eea39

8 files changed

Lines changed: 147 additions & 22 deletions

File tree

.github/workflows/aws_test.yml

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
# AWS-related tests. ICEBERG_S3 (Arrow's bundled AWS SDK) and ICEBERG_SIGV4
19-
# (vcpkg-installed aws-cpp-sdk-core) are tested in separate jobs: linking
20-
# both into one binary causes ODR conflicts on the shared AWS SDK symbols.
18+
# AWS-related tests. ICEBERG_S3 and ICEBERG_SIGV4 are exercised individually and
19+
# together; with both on, Arrow's S3 reuses SigV4's system AWS SDK
20+
# (AWSSDK_SOURCE=SYSTEM) so a single AWS SDK is linked (no ODR).
2121
name: AWS Tests
2222

2323
on:
@@ -62,6 +62,16 @@ jobs:
6262
CXX: g++-14
6363
s3: "OFF"
6464
sigv4: "ON"
65+
aws-sdk-features: core
66+
- title: AMD64 Ubuntu 24.04, S3 + SigV4
67+
runs-on: ubuntu-24.04
68+
CC: gcc-14
69+
CXX: g++-14
70+
s3: "ON"
71+
sigv4: "ON"
72+
# Arrow's S3 filesystem consumes this same AWS SDK, so it needs the
73+
# S3-related components in addition to core.
74+
aws-sdk-features: core,s3,identity-management,sts,transfer
6575
- title: AArch64 macOS 26, S3
6676
runs-on: macos-26
6777
s3: "ON"
@@ -88,11 +98,11 @@ jobs:
8898
id: vcpkg-cache
8999
with:
90100
path: /usr/local/share/vcpkg/installed
91-
key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/aws_test.yml') }}
101+
key: vcpkg-x64-linux-aws-sdk-cpp-s3-${{ matrix.s3 }}-sigv4-${{ matrix.sigv4 }}-${{ hashFiles('.github/workflows/aws_test.yml') }}
92102
- name: Install AWS SDK via vcpkg
93103
if: ${{ matrix.sigv4 == 'ON' && steps.vcpkg-cache.outputs.cache-hit != 'true' }}
94104
shell: bash
95-
run: vcpkg install aws-sdk-cpp[core]:x64-linux
105+
run: vcpkg install "aws-sdk-cpp[${{ matrix.aws-sdk-features }}]:x64-linux"
96106
- name: Set Ubuntu Compilers
97107
if: ${{ startsWith(matrix.runs-on, 'ubuntu') }}
98108
run: |
@@ -113,3 +123,47 @@ jobs:
113123
env:
114124
CMAKE_TOOLCHAIN_FILE: ${{ matrix.sigv4 == 'ON' && '/usr/local/share/vcpkg/scripts/buildsystems/vcpkg.cmake' || '' }}
115125
run: ci/scripts/build_iceberg.sh "$(pwd)" OFF OFF ${{ matrix.s3 }} ${{ matrix.sigv4 }}
126+
127+
# Exercise the Meson build with SigV4 enabled (resolves aws-cpp-sdk-core via
128+
# its CMake config, not pkg-config whose Cflags force -std=c++11).
129+
meson-sigv4:
130+
if: ${{ github.event_name != 'pull_request' || github.event.pull_request.draft == false }}
131+
name: Meson SigV4 (AMD64 Ubuntu 24.04)
132+
runs-on: ubuntu-24.04
133+
timeout-minutes: 45
134+
steps:
135+
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
136+
with:
137+
python-version: '3.x'
138+
- name: Checkout iceberg-cpp
139+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
140+
with:
141+
persist-credentials: false
142+
- name: Install build dependencies
143+
shell: bash
144+
run: |
145+
sudo apt-get update && sudo apt-get install -y libcurl4-openssl-dev
146+
python3 -m pip install --upgrade pip
147+
python3 -m pip install -r requirements.txt
148+
- name: Cache vcpkg packages
149+
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
150+
id: vcpkg-cache
151+
with:
152+
path: /usr/local/share/vcpkg/installed
153+
key: vcpkg-x64-linux-aws-sdk-cpp-core-${{ hashFiles('.github/workflows/aws_test.yml') }}
154+
- name: Install AWS SDK via vcpkg
155+
if: ${{ steps.vcpkg-cache.outputs.cache-hit != 'true' }}
156+
shell: bash
157+
run: vcpkg install aws-sdk-cpp[core]:x64-linux
158+
- name: Set Ubuntu Compilers
159+
run: |
160+
echo "CC=gcc-14" >> $GITHUB_ENV
161+
echo "CXX=g++-14" >> $GITHUB_ENV
162+
- name: Build and test Iceberg
163+
shell: bash
164+
env:
165+
CMAKE_PREFIX_PATH: /usr/local/share/vcpkg/installed/x64-linux
166+
run: |
167+
meson setup builddir -Dsigv4=enabled
168+
meson compile -C builddir
169+
meson test -C builddir --timeout-multiplier 0 --print-errorlogs

cmake_modules/IcebergThirdpartyToolchain.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ function(resolve_arrow_dependency)
110110
set(ARROW_RUNTIME_SIMD_LEVEL "NONE")
111111
set(ARROW_POSITION_INDEPENDENT_CODE ON)
112112
set(ARROW_DEPENDENCY_SOURCE "BUNDLED")
113+
# With SigV4 also on, make Arrow's S3 reuse the system AWS SDK (the one SigV4
114+
# finds) instead of bundling its own, so only one AWS SDK is linked (no ODR).
115+
if(ICEBERG_S3 AND ICEBERG_SIGV4)
116+
set(AWSSDK_SOURCE "SYSTEM")
117+
endif()
113118
set(ARROW_WITH_ZLIB ON)
114119
set(ZLIB_SOURCE "SYSTEM")
115120
set(ARROW_VERBOSE_THIRDPARTY_BUILD OFF)

src/iceberg/catalog/rest/auth/auth_session.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,13 @@ class OAuth2AuthSession : public AuthSession,
7878
return session;
7979
}
8080

81-
Status Authenticate(std::unordered_map<std::string, std::string>& headers) override {
81+
Result<HttpRequest> Authenticate(const HttpRequest& request) override {
82+
HttpRequest authenticated = request;
8283
std::shared_lock lock(mutex_);
8384
for (const auto& [key, value] : headers_) {
84-
headers.try_emplace(key, value);
85+
authenticated.headers.try_emplace(key, value);
8586
}
86-
return {};
87+
return authenticated;
8788
}
8889

8990
Status Close() override { return CloseImpl(); }

src/iceberg/catalog/rest/auth/sigv4_auth_manager.cc

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
# include <aws/core/auth/AWSCredentialsProvider.h>
3333
# include <aws/core/auth/AWSCredentialsProviderChain.h>
3434
# include <aws/core/client/ClientConfiguration.h>
35+
# include <aws/core/config/ConfigAndCredentialsCacheManager.h>
3536
# include <aws/core/http/standard/StandardHttpRequest.h>
37+
# include <aws/core/platform/Environment.h>
3638
# include <aws/core/utils/HashingUtils.h>
3739

3840
# include "iceberg/catalog/rest/auth/auth_managers.h"
@@ -170,7 +172,11 @@ SigV4AuthSession::SigV4AuthSession(
170172
signer_(std::make_unique<RestSigV4Signer>(
171173
credentials_provider_, signing_name_.c_str(), signing_region_.c_str())) {}
172174

173-
SigV4AuthSession::~SigV4AuthSession() { AwsSdkLifecycle::Instance().UnregisterSession(); }
175+
SigV4AuthSession::~SigV4AuthSession() {
176+
if (owns_sdk_registration_) {
177+
AwsSdkLifecycle::Instance().UnregisterSession();
178+
}
179+
}
174180

175181
Result<HttpRequest> SigV4AuthSession::Authenticate(const HttpRequest& request) {
176182
ICEBERG_ASSIGN_OR_RAISE(auto delegate_request, delegate_->Authenticate(request));
@@ -338,18 +344,36 @@ SigV4AuthManager::MakeCredentialsProvider(
338344
return std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>();
339345
}
340346

341-
std::string SigV4AuthManager::ResolveSigningRegion(
347+
Result<std::string> SigV4AuthManager::ResolveSigningRegion(
342348
const std::unordered_map<std::string, std::string>& properties) {
343349
if (auto it = properties.find(AuthProperties::kSigV4SigningRegion);
344350
it != properties.end() && !it->second.empty()) {
345351
return it->second;
346352
}
347-
// ClientConfiguration() walks env / profile / IMDS / us-east-1; the IMDS
348-
// step can block for seconds on non-EC2 hosts. Resolve once per process
349-
// (set AWS_EC2_METADATA_DISABLED=true to skip IMDS).
350-
static const std::string kSdkResolvedRegion =
351-
std::string(Aws::Client::ClientConfiguration().region.c_str());
352-
return kSdkResolvedRegion;
353+
// Resolve from env then the shared config profile (skip IMDS — it can block
354+
// on non-EC2 hosts), and fail rather than silently defaulting to us-east-1.
355+
// Resolved once per process.
356+
static const std::string kResolvedRegion = []() -> std::string {
357+
Aws::String region = Aws::Environment::GetEnv("AWS_REGION");
358+
if (region.empty()) {
359+
region = Aws::Environment::GetEnv("AWS_DEFAULT_REGION");
360+
}
361+
if (region.empty()) {
362+
const auto& profiles = Aws::Config::GetCachedConfigProfiles();
363+
if (auto it = profiles.find(Aws::Auth::GetConfigProfileName());
364+
it != profiles.end()) {
365+
region = it->second.GetRegion();
366+
}
367+
}
368+
return std::string(region.c_str());
369+
}();
370+
if (kResolvedRegion.empty()) {
371+
return InvalidArgument(
372+
"SigV4: could not resolve a signing region; set the '{}' property or the "
373+
"AWS_REGION environment variable",
374+
AuthProperties::kSigV4SigningRegion);
375+
}
376+
return kResolvedRegion;
353377
}
354378

355379
std::string SigV4AuthManager::ResolveSigningName(
@@ -392,7 +416,7 @@ Result<std::shared_ptr<AuthSession>> SigV4AuthManager::WrapSession(
392416
const std::unordered_map<std::string, std::string>& properties,
393417
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> reuse_credentials) {
394418
ICEBERG_ASSIGN_OR_RAISE(auto slot, SessionSlot::Reserve());
395-
auto region = ResolveSigningRegion(properties);
419+
ICEBERG_ASSIGN_OR_RAISE(auto region, ResolveSigningRegion(properties));
396420
auto service = ResolveSigningName(properties);
397421

398422
// Reuse the parent's provider unless properties override keys, avoiding a
@@ -409,6 +433,8 @@ Result<std::shared_ptr<AuthSession>> SigV4AuthManager::WrapSession(
409433
auto session =
410434
std::make_shared<SigV4AuthSession>(std::move(delegate_session), std::move(region),
411435
std::move(service), std::move(credentials));
436+
// The reserved slot's unregister responsibility now belongs to the session.
437+
session->owns_sdk_registration_ = true;
412438
slot.Release();
413439
return session;
414440
}

src/iceberg/catalog/rest/auth/sigv4_auth_manager_internal.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,17 @@ class ICEBERG_REST_EXPORT SigV4AuthSession : public AuthSession {
7878
}
7979

8080
private:
81+
// WrapSession() reserves an AwsSdkLifecycle slot and transfers it here.
82+
friend class SigV4AuthManager;
83+
8184
std::shared_ptr<AuthSession> delegate_;
8285
std::string signing_region_;
8386
std::string signing_name_;
8487
std::shared_ptr<Aws::Auth::AWSCredentialsProvider> credentials_provider_;
8588
std::unique_ptr<Aws::Client::AWSAuthV4Signer> signer_;
89+
// Only WrapSession()-created sessions registered a slot; directly-constructed
90+
// ones (e.g. tests) must not unregister and underflow the count.
91+
bool owns_sdk_registration_ = false;
8692
};
8793

8894
/// \brief An AuthManager that produces SigV4AuthSession instances.
@@ -114,7 +120,7 @@ class ICEBERG_REST_EXPORT SigV4AuthManager : public AuthManager {
114120
private:
115121
static Result<std::shared_ptr<Aws::Auth::AWSCredentialsProvider>>
116122
MakeCredentialsProvider(const std::unordered_map<std::string, std::string>& properties);
117-
static std::string ResolveSigningRegion(
123+
static Result<std::string> ResolveSigningRegion(
118124
const std::unordered_map<std::string, std::string>& properties);
119125
static std::string ResolveSigningName(
120126
const std::unordered_map<std::string, std::string>& properties);

src/iceberg/catalog/rest/meson.build

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,14 @@ iceberg_rest_build_deps = [iceberg_dep, cpr_dep]
4747
iceberg_rest_compile_defs = []
4848

4949
sigv4_opt = get_option('sigv4')
50-
aws_sdk_core_dep = dependency('aws-cpp-sdk-core', required: sigv4_opt)
50+
# Use the CMake config, not pkg-config: aws-cpp-sdk-core.pc Cflags force
51+
# -std=c++11 -fno-exceptions, which would override the project's C++23 build.
52+
aws_sdk_core_dep = dependency(
53+
'aws-cpp-sdk-core',
54+
method: 'cmake',
55+
modules: ['aws-cpp-sdk-core'],
56+
required: sigv4_opt,
57+
)
5158
if aws_sdk_core_dep.found()
5259
iceberg_rest_build_deps += aws_sdk_core_dep
5360
iceberg_rest_compile_defs += '-DICEBERG_SIGV4'

src/iceberg/test/auth_manager_test.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -496,9 +496,9 @@ TEST(OAuth2AuthSessionTest, InitialTokenIsUsed) {
496496
ASSERT_THAT(session_result, IsOk());
497497
auto session = session_result.value();
498498

499-
std::unordered_map<std::string, std::string> headers;
500-
ASSERT_THAT(session->Authenticate(headers), IsOk());
501-
EXPECT_EQ(headers["Authorization"], "Bearer initial-token-123");
499+
auto auth_result = session->Authenticate({});
500+
ASSERT_THAT(auth_result, IsOk());
501+
EXPECT_EQ(auth_result.value().headers.at("Authorization"), "Bearer initial-token-123");
502502

503503
session->Close();
504504
}

src/iceberg/test/sigv4_auth_test.cc

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,32 @@ TEST_F(SigV4AuthTest, LifecycleFinalizeRefusesWhileSessionsAlive) {
7272
EXPECT_TRUE(IsAwsSdkInitialized());
7373
}
7474

75+
TEST_F(SigV4AuthTest, DirectlyConstructedSessionDoesNotCorruptLifecycleCount) {
76+
// A directly-constructed session never registered with AwsSdkLifecycle, so
77+
// destroying it must not decrement the count. Otherwise it underflows, and a
78+
// later real session wraps it back to zero, letting FinalizeAwsSdk() shut the
79+
// SDK down while a session is still alive.
80+
{
81+
auto delegate = AuthSession::MakeDefault({});
82+
auto credentials = std::make_shared<Aws::Auth::SimpleAWSCredentialsProvider>(
83+
Aws::Auth::AWSCredentials("id", "secret"));
84+
auto direct = std::make_shared<SigV4AuthSession>(delegate, "us-east-1", "execute-api",
85+
credentials);
86+
} // destroyed here — must leave the lifecycle count untouched
87+
88+
auto properties = MakeSigV4Properties();
89+
auto manager_result = AuthManagers::Load("test-catalog", properties);
90+
ASSERT_THAT(manager_result, IsOk());
91+
auto session_result = manager_result.value()->CatalogSession(client_, properties);
92+
ASSERT_THAT(session_result, IsOk());
93+
94+
// Exactly one live (registered) session, so Finalize must refuse. With the
95+
// underflow bug the count would have wrapped and Finalize could wrongly
96+
// succeed and shut the SDK down.
97+
EXPECT_THAT(FinalizeAwsSdk(), IsError(ErrorKind::kInvalid));
98+
EXPECT_TRUE(IsAwsSdkInitialized());
99+
}
100+
75101
TEST_F(SigV4AuthTest, LoadSigV4AuthManager) {
76102
auto properties = MakeSigV4Properties();
77103
auto manager_result = AuthManagers::Load("test-catalog", properties);

0 commit comments

Comments
 (0)