Skip to content

Commit d6692fc

Browse files
fix(storage/s3): default to virtual-host-style addressing (#2330)
## Which issue does this PR close? N/A ## What changes are included in this PR? The Java Iceberg SDK defaults [`s3.path-style-access` to `false`](https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java#L238-L240) i.e. virtual-host-style addressing is the spec default. opendal's `S3Config::default()`, which the opendal storage backend inherits from, uses path-style. As a result, any user building an iceberg-rust `FileIO` against AWS S3 or an S3-compatible service has to explicitly set `s3.path-style-access=false` to get the same behavior they would get from Java out of the box — and several S3-compatible endpoints only accept virtual-host-style URLs and fail outright with `SecondLevelDomainForbidden` (or equivalent) under the current default. ## Are these changes tested? Yes: - `cargo test -p iceberg --lib io::storage::config::s3::tests` - `cargo test -p iceberg-storage-opendal --lib s3::tests
1 parent b06b573 commit d6692fc

7 files changed

Lines changed: 82 additions & 9 deletions

File tree

crates/catalog/hms/tests/hms_catalog_test.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,10 @@
2323
use std::collections::HashMap;
2424
use std::sync::Arc;
2525

26-
use iceberg::io::{FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
26+
use iceberg::io::{
27+
FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
28+
S3_SECRET_ACCESS_KEY,
29+
};
2730
use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent};
2831
use iceberg_catalog_hms::{
2932
HMS_CATALOG_PROP_THRIFT_TRANSPORT, HMS_CATALOG_PROP_URI, HMS_CATALOG_PROP_WAREHOUSE,
@@ -56,6 +59,7 @@ async fn get_catalog() -> HmsCatalog {
5659
(S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
5760
(S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
5861
(S3_REGION.to_string(), "us-east-1".to_string()),
62+
(S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
5963
]);
6064

6165
// Wait for bucket to actually exist

crates/catalog/loader/tests/common/mod.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ use std::fmt;
2424
use std::sync::Arc;
2525

2626
use iceberg::io::{
27-
FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION,
28-
S3_SECRET_ACCESS_KEY,
27+
FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS,
28+
S3_REGION, S3_SECRET_ACCESS_KEY,
2929
};
3030
use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
3131
use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
@@ -229,6 +229,7 @@ async fn glue_catalog() -> GlueCatalog {
229229
(S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
230230
(S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
231231
(S3_REGION.to_string(), "us-east-1".to_string()),
232+
(S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
232233
]);
233234

234235
let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
@@ -280,6 +281,7 @@ async fn hms_catalog() -> HmsCatalog {
280281
(S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
281282
(S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
282283
(S3_REGION.to_string(), "us-east-1".to_string()),
284+
(S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
283285
]);
284286

285287
let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {

crates/iceberg/src/io/storage/config/s3.rs

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,14 @@ pub const S3_DISABLE_CONFIG_LOAD: &str = "s3.disable-config-load";
6969
///
7070
/// This struct contains all the configuration options for connecting to Amazon S3.
7171
/// Use the builder pattern via `S3Config::builder()` to construct instances.
72-
/// ```
73-
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
72+
///
73+
/// Defaults follow the Iceberg `S3FileIOProperties` spec (see
74+
/// [`PATH_STYLE_ACCESS_DEFAULT = false`](https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java)),
75+
/// i.e. virtual-host-style addressing is enabled unless
76+
/// `s3.path-style-access=true` is explicitly set. This matches what
77+
/// Java clients do out of the box and is required for a number of
78+
/// S3-compatible stores that do not support path-style URLs.
79+
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
7480
pub struct S3Config {
7581
/// S3 endpoint URL.
7682
#[builder(default, setter(strip_option, into))]
@@ -88,7 +94,9 @@ pub struct S3Config {
8894
#[builder(default, setter(strip_option, into))]
8995
pub region: Option<String>,
9096
/// Enable virtual host style (opposite of path style access).
91-
#[builder(default)]
97+
///
98+
/// Defaults to `true` to match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`.
99+
#[builder(default = true)]
92100
pub enable_virtual_host_style: bool,
93101
/// Server side encryption type.
94102
#[builder(default, setter(strip_option, into))]
@@ -125,6 +133,12 @@ pub struct S3Config {
125133
pub disable_config_load: bool,
126134
}
127135

136+
impl Default for S3Config {
137+
fn default() -> Self {
138+
Self::builder().build()
139+
}
140+
}
141+
128142
impl TryFrom<&StorageConfig> for S3Config {
129143
type Error = crate::Error;
130144

@@ -267,6 +281,17 @@ mod tests {
267281
assert_eq!(s3_config.region.as_deref(), Some("eu-west-1"));
268282
}
269283

284+
#[test]
285+
fn test_s3_config_default_is_virtual_host_style() {
286+
// Matches Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
287+
assert!(S3Config::default().enable_virtual_host_style);
288+
assert!(
289+
S3Config::try_from(&StorageConfig::new())
290+
.unwrap()
291+
.enable_virtual_host_style
292+
);
293+
}
294+
270295
#[test]
271296
fn test_s3_config_path_style_access() {
272297
let storage_config = StorageConfig::new().with_prop(S3_PATH_STYLE_ACCESS, "true");

crates/integration_tests/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
use std::collections::HashMap;
1919
use std::sync::OnceLock;
2020

21-
use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
21+
use iceberg::io::{
22+
S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION, S3_SECRET_ACCESS_KEY,
23+
};
2224
use iceberg_catalog_rest::REST_CATALOG_PROP_URI;
2325
use iceberg_test_utils::{get_minio_endpoint, get_rest_catalog_endpoint, set_up};
2426

@@ -45,6 +47,7 @@ impl GlobalTestFixture {
4547
(S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
4648
(S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
4749
(S3_REGION.to_string(), "us-east-1".to_string()),
50+
(S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
4851
]);
4952

5053
GlobalTestFixture { catalog_config }

crates/storage/opendal/src/s3.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@ use crate::utils::{from_opendal_error, is_truthy};
3737
/// Parse iceberg props to s3 config.
3838
pub(crate) fn s3_config_parse(mut m: HashMap<String, String>) -> Result<S3Config> {
3939
let mut cfg = S3Config::default();
40+
// Match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`:
41+
// virtual-host-style addressing is the spec default. opendal's own
42+
// default is path-style, which disagrees with the Java SDK and breaks
43+
// S3-compatible stores that only accept virtual-hosted-style URLs.
44+
// Any explicit `s3.path-style-access` property below overrides this.
45+
cfg.enable_virtual_host_style = true;
4046
if let Some(endpoint) = m.remove(S3_ENDPOINT) {
4147
cfg.endpoint = Some(endpoint);
4248
};
@@ -177,3 +183,28 @@ impl AwsCredentialLoad for CustomAwsCredentialLoader {
177183
self.0.load_credential(client).await
178184
}
179185
}
186+
187+
#[cfg(test)]
188+
mod tests {
189+
use std::collections::HashMap;
190+
191+
use iceberg::io::S3_PATH_STYLE_ACCESS;
192+
193+
use super::s3_config_parse;
194+
195+
fn parse_with(prop: Option<&str>) -> bool {
196+
let mut props = HashMap::new();
197+
if let Some(v) = prop {
198+
props.insert(S3_PATH_STYLE_ACCESS.to_string(), v.to_string());
199+
}
200+
s3_config_parse(props).unwrap().enable_virtual_host_style
201+
}
202+
203+
#[test]
204+
fn s3_config_parse_path_style_access() {
205+
// Match Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
206+
assert!(parse_with(None));
207+
assert!(parse_with(Some("false")));
208+
assert!(!parse_with(Some("true")));
209+
}
210+
}

crates/storage/opendal/tests/file_io_s3_test.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ mod tests {
2626
use async_trait::async_trait;
2727
use futures::StreamExt;
2828
use iceberg::io::{
29-
FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
29+
FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
30+
S3_SECRET_ACCESS_KEY,
3031
};
3132
use iceberg_storage_opendal::{CustomAwsCredentialLoader, OpenDalStorageFactory};
3233
use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up};
@@ -47,6 +48,7 @@ mod tests {
4748
(S3_ACCESS_KEY_ID, "admin".to_string()),
4849
(S3_SECRET_ACCESS_KEY, "password".to_string()),
4950
(S3_REGION, "us-east-1".to_string()),
51+
(S3_PATH_STYLE_ACCESS, "true".to_string()),
5052
])
5153
.build()
5254
}
@@ -139,6 +141,7 @@ mod tests {
139141
(S3_ENDPOINT, "http://localhost:9000".to_string()),
140142
("bucket", "test-bucket".to_string()),
141143
(S3_REGION, "us-east-1".to_string()),
144+
(S3_PATH_STYLE_ACCESS, "true".to_string()),
142145
]);
143146
}
144147

@@ -160,6 +163,7 @@ mod tests {
160163
.with_props(vec![
161164
(S3_ENDPOINT, minio_endpoint),
162165
(S3_REGION, "us-east-1".to_string()),
166+
(S3_PATH_STYLE_ACCESS, "true".to_string()),
163167
])
164168
.build();
165169

@@ -188,6 +192,7 @@ mod tests {
188192
.with_props(vec![
189193
(S3_ENDPOINT, minio_endpoint),
190194
(S3_REGION, "us-east-1".to_string()),
195+
(S3_PATH_STYLE_ACCESS, "true".to_string()),
191196
])
192197
.build();
193198

crates/storage/opendal/tests/resolving_storage_test.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ mod tests {
2929
use std::sync::Arc;
3030

3131
use iceberg::io::{
32-
FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
32+
FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
33+
S3_SECRET_ACCESS_KEY,
3334
};
3435
use iceberg_storage_opendal::OpenDalResolvingStorageFactory;
3536
use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up};
@@ -45,6 +46,7 @@ mod tests {
4546
(S3_ACCESS_KEY_ID, "admin".to_string()),
4647
(S3_SECRET_ACCESS_KEY, "password".to_string()),
4748
(S3_REGION, "us-east-1".to_string()),
49+
(S3_PATH_STYLE_ACCESS, "true".to_string()),
4850
])
4951
.build()
5052
}
@@ -288,6 +290,7 @@ mod tests {
288290
.with_props(vec![
289291
(S3_ENDPOINT, minio_endpoint),
290292
(S3_REGION, "us-east-1".to_string()),
293+
(S3_PATH_STYLE_ACCESS, "true".to_string()),
291294
])
292295
.build();
293296

0 commit comments

Comments
 (0)