Skip to content

Commit 8931d90

Browse files
committed
Arrow: Suppress warning and cache bucket location
Attemt to remove the unneccessary warning, and cache the location of the bucket independent of the FileIO. Fixes #1705 Fixes #1708
1 parent 06404a5 commit 8931d90

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,13 @@
194194
T = TypeVar("T")
195195

196196

197+
@lru_cache
198+
def _cached_resolve_s3_region(bucket: str) -> str:
199+
from pyarrow.fs import resolve_s3_region
200+
201+
return resolve_s3_region(bucket=bucket)
202+
203+
197204
class UnsupportedPyArrowTypeException(Exception):
198205
"""Cannot convert PyArrow type to corresponding Iceberg type."""
199206

@@ -414,19 +421,19 @@ def _initialize_oss_fs(self) -> FileSystem:
414421
return S3FileSystem(**client_kwargs)
415422

416423
def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem:
417-
from pyarrow.fs import S3FileSystem, resolve_s3_region
424+
from pyarrow.fs import S3FileSystem
418425

419426
# Resolve region from netloc(bucket), fallback to user-provided region
420427
provided_region = get_first_property_value(self.properties, S3_REGION, AWS_REGION)
421428

422429
try:
423-
bucket_region = resolve_s3_region(bucket=netloc)
430+
bucket_region = _cached_resolve_s3_region(bucket=netloc)
424431
except (OSError, TypeError):
425432
bucket_region = None
426433
logger.warning(f"Unable to resolve region for bucket {netloc}, using default region {provided_region}")
427434

428435
bucket_region = bucket_region or provided_region
429-
if bucket_region != provided_region:
436+
if provided_region is not None and bucket_region != provided_region:
430437
logger.warning(
431438
f"PyArrow FileIO overriding S3 bucket region for bucket {netloc}: "
432439
f"provided region {provided_region}, actual region {bucket_region}"

0 commit comments

Comments
 (0)