diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 606a18ce91..28b79792c7 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -159,7 +159,7 @@ For the FileIO there are several configuration options available: | gcs.cache-timeout | 60 | Configure the cache expiration time in seconds for object metadata cache | | gcs.requester-pays | False | Configure whether to use requester-pays requests | | gcs.session-kwargs | {} | Configure a dict of parameters to pass on to aiohttp.ClientSession; can contain, for example, proxy settings. | -| gcs.endpoint | | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. | +| gcs.service.host | | Configure an alternative endpoint for the GCS FileIO to access (format protocol://host:port) If not given, defaults to the value of environment variable "STORAGE_EMULATOR_HOST"; if that is not set either, will use the standard Google endpoint. | | gcs.default-location | US | Configure the default location where buckets are created, like 'US' or 'EUROPE-WEST3'. | | gcs.version-aware | False | Configure whether to support object versioning on the GCS bucket. | diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py index fe3ea43e10..80ea801fb0 100644 --- a/pyiceberg/io/__init__.py +++ b/pyiceberg/io/__init__.py @@ -90,6 +90,7 @@ GCS_REQUESTER_PAYS = "gcs.requester-pays" GCS_SESSION_KWARGS = "gcs.session-kwargs" GCS_ENDPOINT = "gcs.endpoint" +GCS_SERVICE_HOST = "gcs.service.host" GCS_DEFAULT_LOCATION = "gcs.default-bucket-location" GCS_VERSION_AWARE = "gcs.version-aware" PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read" diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py index 1632c4bb28..434ae67df0 100644 --- a/pyiceberg/io/fsspec.py +++ b/pyiceberg/io/fsspec.py @@ -64,6 +64,7 @@ GCS_ENDPOINT, GCS_PROJECT_ID, GCS_REQUESTER_PAYS, + GCS_SERVICE_HOST, GCS_SESSION_KWARGS, GCS_TOKEN, GCS_VERSION_AWARE, @@ -171,6 +172,12 @@ def _gs(properties: Properties) -> AbstractFileSystem: # https://gcsfs.readthedocs.io/en/latest/api.html#gcsfs.core.GCSFileSystem from gcsfs import GCSFileSystem + if properties.get(GCS_ENDPOINT): + deprecation_message( + deprecated_in="0.8.0", + removed_in="0.9.0", + help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead", + ) return GCSFileSystem( project=properties.get(GCS_PROJECT_ID), access=properties.get(GCS_ACCESS, "full_control"), @@ -179,7 +186,7 @@ def _gs(properties: Properties) -> AbstractFileSystem: cache_timeout=properties.get(GCS_CACHE_TIMEOUT), requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False), session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")), - endpoint_url=properties.get(GCS_ENDPOINT), + endpoint_url=get_first_property_value(properties, GCS_SERVICE_HOST, GCS_ENDPOINT), default_location=properties.get(GCS_DEFAULT_LOCATION), version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False), ) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index ab4de5185b..f221266eb5 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -89,6 +89,7 @@ AWS_SESSION_TOKEN, GCS_DEFAULT_LOCATION, GCS_ENDPOINT, + GCS_SERVICE_HOST, GCS_TOKEN, GCS_TOKEN_EXPIRES_AT_MS, HDFS_HOST, @@ -159,7 +160,7 @@ from pyiceberg.utils.concurrent import ExecutorFactory from pyiceberg.utils.config import Config from pyiceberg.utils.datetime import millis_to_datetime -from pyiceberg.utils.deprecated import deprecated +from pyiceberg.utils.deprecated import deprecated, deprecation_message from pyiceberg.utils.properties import get_first_property_value, property_as_bool, property_as_int from pyiceberg.utils.singleton import Singleton from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, truncate_upper_bound_text_string @@ -390,7 +391,13 @@ def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSyste gcs_kwargs["credential_token_expiration"] = millis_to_datetime(int(expiration)) if bucket_location := self.properties.get(GCS_DEFAULT_LOCATION): gcs_kwargs["default_bucket_location"] = bucket_location - if endpoint := self.properties.get(GCS_ENDPOINT): + if endpoint := get_first_property_value(self.properties, GCS_SERVICE_HOST, GCS_ENDPOINT): + if self.properties.get(GCS_ENDPOINT): + deprecation_message( + deprecated_in="0.8.0", + removed_in="0.9.0", + help_message=f"The property {GCS_ENDPOINT} is deprecated, please use {GCS_SERVICE_HOST} instead", + ) url_parts = urlparse(endpoint) gcs_kwargs["scheme"] = url_parts.scheme gcs_kwargs["endpoint_override"] = url_parts.netloc diff --git a/tests/conftest.py b/tests/conftest.py index e7e73375d7..9160a1435d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -53,8 +53,8 @@ from pyiceberg.catalog.noop import NoopCatalog from pyiceberg.expressions import BoundReference from pyiceberg.io import ( - GCS_ENDPOINT, GCS_PROJECT_ID, + GCS_SERVICE_HOST, GCS_TOKEN, GCS_TOKEN_EXPIRES_AT_MS, fsspec, @@ -1873,7 +1873,7 @@ def fsspec_fileio(request: pytest.FixtureRequest) -> FsspecFileIO: @pytest.fixture def fsspec_fileio_gcs(request: pytest.FixtureRequest) -> FsspecFileIO: properties = { - GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"), + GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"), GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"), GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"), } @@ -1885,7 +1885,7 @@ def pyarrow_fileio_gcs(request: pytest.FixtureRequest) -> "PyArrowFileIO": from pyiceberg.io.pyarrow import PyArrowFileIO properties = { - GCS_ENDPOINT: request.config.getoption("--gcs.endpoint"), + GCS_SERVICE_HOST: request.config.getoption("--gcs.endpoint"), GCS_TOKEN: request.config.getoption("--gcs.oauth2.token"), GCS_PROJECT_ID: request.config.getoption("--gcs.project-id"), GCS_TOKEN_EXPIRES_AT_MS: datetime_to_millis(datetime.now()) + 60 * 1000,