Skip to content

Commit a9cbf87

Browse files
fix(rest): skip Hadoop-only vended storage credentials during resolution
1 parent ec1413d commit a9cbf87

2 files changed

Lines changed: 80 additions & 4 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,11 @@ class ListViewsResponse(IcebergBaseModel):
396396
_PLANNING_RESPONSE_ADAPTER = TypeAdapter(PlanningResponse)
397397

398398

399+
def _is_hadoop_only_config(config: Properties) -> bool:
400+
"""Return True if every key is a Hadoop ``fs.*`` key — pyiceberg has no HadoopFileIO to consume them."""
401+
return bool(config) and all(k.startswith("fs.") for k in config)
402+
403+
399404
class RestCatalog(Catalog):
400405
uri: str
401406
_session: Session
@@ -462,22 +467,32 @@ def _create_session(self) -> Session:
462467

463468
@staticmethod
464469
def _resolve_storage_credentials(storage_credentials: list[StorageCredential], location: str | None) -> Properties:
465-
"""Resolve the best-matching storage credential by longest prefix match.
470+
"""Pick the longest-prefix storage credential for ``location``.
466471
467-
Mirrors the Java implementation in S3FileIO.clientForStoragePath() which iterates
468-
over storage credential prefixes and selects the one with the longest match.
472+
Mirrors Java ``S3FileIO.clientForStoragePath``. Hadoop-only (``fs.*``)
473+
credentials are filtered out since pyiceberg has no HadoopFileIO to
474+
consume them — otherwise a catalog vending both ``fs.*`` and ``s3.*``
475+
bundles per location could strand the FileIO with unusable keys.
469476
470477
See: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
471478
"""
472479
if not storage_credentials or not location:
473480
return {}
474481

482+
consumable = [c for c in storage_credentials if not _is_hadoop_only_config(c.config)]
483+
475484
best_match: StorageCredential | None = None
476-
for cred in storage_credentials:
485+
for cred in consumable:
477486
if location.startswith(cred.prefix):
478487
if best_match is None or len(cred.prefix) > len(best_match.prefix):
479488
best_match = cred
480489

490+
# Java S3FileIO falls back to the "s3" ROOT_PREFIX credential; scope it to
491+
# schemes pyarrow's S3FileSystem handles so non-S3 schemes (gs://, abfs://,
492+
# etc.) don't get handed s3.* keys.
493+
if best_match is None and location.startswith(("s3://", "s3a://", "s3n://", "oss://")):
494+
best_match = next((c for c in consumable if c.prefix == "s3"), None)
495+
481496
return best_match.config if best_match else {}
482497

483498
def _load_file_io(self, properties: Properties = EMPTY_DICT, location: str | None = None) -> FileIO:

tests/catalog/test_rest.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3109,6 +3109,67 @@ def test_resolve_storage_credentials_empty() -> None:
31093109
assert RestCatalog._resolve_storage_credentials([], None) == {}
31103110

31113111

3112+
def test_resolve_storage_credentials_skips_hadoop_only() -> None:
3113+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
3114+
3115+
# The longer fs.* prefix would win a blind longest-match; the filter drops it.
3116+
credentials = [
3117+
StorageCredential(prefix="s3://warehouse/jindo", config={"fs.s3.access-key": "hadoop-k"}),
3118+
StorageCredential(prefix="s3://warehouse", config={"s3.access-key-id": "native-k"}),
3119+
]
3120+
result = RestCatalog._resolve_storage_credentials(credentials, "s3://warehouse/jindo/table/data")
3121+
assert result == {"s3.access-key-id": "native-k"}
3122+
3123+
3124+
def test_resolve_storage_credentials_mixed_prefix_namespaces_preserved() -> None:
3125+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
3126+
3127+
credentials = [
3128+
StorageCredential(prefix="gs", config={"gs.oauth2.token": "tok"}),
3129+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
3130+
]
3131+
result = RestCatalog._resolve_storage_credentials(credentials, "gs://bucket/path")
3132+
assert result == {"gs.oauth2.token": "tok"}
3133+
3134+
3135+
def test_resolve_storage_credentials_all_hadoop_only_returns_empty() -> None:
3136+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
3137+
3138+
credentials = [
3139+
StorageCredential(prefix="custom", config={"fs.custom.access-key": "hadoop-k"}),
3140+
]
3141+
assert RestCatalog._resolve_storage_credentials(credentials, "custom://bucket/path") == {}
3142+
3143+
3144+
def test_resolve_storage_credentials_root_prefix_fallback_for_s3_compatible_scheme() -> None:
3145+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
3146+
3147+
# oss:// is routed through pyarrow's S3FileSystem, so ROOT_PREFIX "s3" applies.
3148+
credentials = [
3149+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
3150+
]
3151+
result = RestCatalog._resolve_storage_credentials(credentials, "oss://bucket/path")
3152+
assert result == {"s3.access-key-id": "native-k"}
3153+
3154+
3155+
def test_resolve_storage_credentials_root_prefix_fallback_respects_consumable() -> None:
3156+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
3157+
3158+
credentials = [
3159+
StorageCredential(prefix="s3", config={"fs.s3.access-key": "hadoop-k"}),
3160+
]
3161+
assert RestCatalog._resolve_storage_credentials(credentials, "s3://bucket/path") == {}
3162+
3163+
3164+
def test_resolve_storage_credentials_fallback_skipped_for_non_s3_scheme() -> None:
3165+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
3166+
3167+
credentials = [
3168+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
3169+
]
3170+
assert RestCatalog._resolve_storage_credentials(credentials, "gs://bucket/path") == {}
3171+
3172+
31123173
def test_load_table_with_storage_credentials(rest_mock: Mocker, example_table_metadata_with_snapshot_v1: dict[str, Any]) -> None:
31133174
metadata_location = "s3://warehouse/database/table/metadata/00001.metadata.json"
31143175
rest_mock.get(

0 commit comments

Comments
 (0)