-
Notifications
You must be signed in to change notification settings - Fork 466
Standardize AWS credential names #922
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
1310b0e
953a883
f7384e9
0abae8f
94337c2
996c874
ce5b604
a67f549
7a85275
846fc08
c91aaeb
964132f
fd2af56
dd65c6d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -39,6 +39,12 @@ | |||
| ) | ||||
|
|
||||
| from pyiceberg.catalog import ( | ||||
| DEPRECATED_ACCESS_KEY_ID, | ||||
| DEPRECATED_BOTOCORE_SESSION, | ||||
| DEPRECATED_PROFILE_NAME, | ||||
| DEPRECATED_REGION, | ||||
| DEPRECATED_SECRET_ACCESS_KEY, | ||||
| DEPRECATED_SESSION_TOKEN, | ||||
| EXTERNAL_TABLE, | ||||
| ICEBERG, | ||||
| LOCATION, | ||||
|
|
@@ -58,6 +64,7 @@ | |||
| NoSuchTableError, | ||||
| TableAlreadyExistsError, | ||||
| ) | ||||
| from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN | ||||
| from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec | ||||
| from pyiceberg.schema import Schema, SchemaVisitor, visit | ||||
| from pyiceberg.serializers import FromInputFile | ||||
|
|
@@ -113,6 +120,20 @@ | |||
| ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional" | ||||
| ICEBERG_FIELD_CURRENT = "iceberg.field.current" | ||||
|
|
||||
| GLUE_PROFILE_NAME = "glue.profile-name" | ||||
| GLUE_REGION = "glue.region" | ||||
| GLUE_BOTOCORE_SESSION = "glue.botocore-session" | ||||
| GLUE_ACCESS_KEY_ID = "glue.access-key-id" | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How common is it to have a separate iceberg-python/pyiceberg/io/pyarrow.py Line 349 in 3f44dfe
This way you would need to set both I'm not an AWS expert, but my gut feeling is that normally people rely on
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the suggestions! I've updated the doc to explicitly indicating that
The I added a separate section
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense, I'm good with that now. At some point we should have a bigger conversation across languages to unify this. |
||||
| GLUE_SECRET_ACCESS_KEY = "glue.secret-access-key" | ||||
| GLUE_SESSION_TOKEN = "glue.session-token" | ||||
|
|
||||
| GLUE_PROFILE_NAME_PROPERTIES = (GLUE_PROFILE_NAME, DEPRECATED_PROFILE_NAME) | ||||
| GLUE_REGION_PROPERTIES = (GLUE_REGION, AWS_REGION, DEPRECATED_REGION) | ||||
| GLUE_BOTOCORE_SESSION_PROPERTIES = (GLUE_BOTOCORE_SESSION, DEPRECATED_BOTOCORE_SESSION) | ||||
| GLUE_ACCESS_KEY_ID_PROPERTIES = (GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, DEPRECATED_ACCESS_KEY_ID) | ||||
| GLUE_SECRET_ACCESS_KEY_PROPERTIES = (GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, DEPRECATED_SECRET_ACCESS_KEY) | ||||
| GLUE_SESSION_TOKEN_PROPERTIES = (GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN, DEPRECATED_SESSION_TOKEN) | ||||
|
|
||||
|
|
||||
| def _construct_parameters( | ||||
| metadata_location: str, glue_table: Optional[TableTypeDef] = None, prev_metadata_location: Optional[str] = None | ||||
|
|
@@ -282,12 +303,12 @@ def __init__(self, name: str, **properties: Any): | |||
| super().__init__(name, **properties) | ||||
|
|
||||
| session = boto3.Session( | ||||
| profile_name=properties.get("profile_name"), | ||||
| region_name=properties.get("region_name"), | ||||
| botocore_session=properties.get("botocore_session"), | ||||
| aws_access_key_id=properties.get("aws_access_key_id"), | ||||
| aws_secret_access_key=properties.get("aws_secret_access_key"), | ||||
| aws_session_token=properties.get("aws_session_token"), | ||||
| profile_name=self._get_first_property_value(GLUE_PROFILE_NAME_PROPERTIES), | ||||
| region_name=self._get_first_property_value(GLUE_REGION_PROPERTIES), | ||||
| botocore_session=self._get_first_property_value(GLUE_BOTOCORE_SESSION_PROPERTIES), | ||||
| aws_access_key_id=self._get_first_property_value(GLUE_ACCESS_KEY_ID_PROPERTIES), | ||||
| aws_secret_access_key=self._get_first_property_value(GLUE_SECRET_ACCESS_KEY_PROPERTIES), | ||||
| aws_session_token=self._get_first_property_value(GLUE_SESSION_TOKEN_PROPERTIES), | ||||
| ) | ||||
| self.glue: GlueClient = session.client("glue") | ||||
|
|
||||
|
|
||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -32,10 +32,12 @@ | |
| from io import SEEK_SET | ||
| from types import TracebackType | ||
| from typing import ( | ||
| Any, | ||
| Dict, | ||
| List, | ||
| Optional, | ||
| Protocol, | ||
| Tuple, | ||
| Type, | ||
| Union, | ||
| runtime_checkable, | ||
|
|
@@ -46,6 +48,10 @@ | |
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
| AWS_REGION = "client.region" | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I chose
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice, that's a good find 👍
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would be good to highlight this in the PR description! |
||
| AWS_ACCESS_KEY_ID = "client.access-key-id" | ||
| AWS_SECRET_ACCESS_KEY = "client.secret-access-key" | ||
| AWS_SESSION_TOKEN = "client.session-token" | ||
| S3_ENDPOINT = "s3.endpoint" | ||
| S3_ACCESS_KEY_ID = "s3.access-key-id" | ||
| S3_SECRET_ACCESS_KEY = "s3.secret-access-key" | ||
|
|
@@ -77,6 +83,11 @@ | |
| GCS_DEFAULT_LOCATION = "gcs.default-bucket-location" | ||
| GCS_VERSION_AWARE = "gcs.version-aware" | ||
|
|
||
| S3_REGION_PROPERTIES = (S3_REGION, AWS_REGION) | ||
| S3_ACCESS_KEY_ID_PROPERTIES = (S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID) | ||
| S3_SECRET_ACCESS_KEY_PROPERTIES = (S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY) | ||
| S3_SESSION_TOKEN_PROPERTIES = (S3_SESSION_TOKEN, AWS_SESSION_TOKEN) | ||
|
|
||
|
|
||
| @runtime_checkable | ||
| class InputStream(Protocol): | ||
|
|
@@ -320,6 +331,13 @@ def _infer_file_io_from_scheme(path: str, properties: Properties) -> Optional[Fi | |
| return None | ||
|
|
||
|
|
||
| def _get_first_property_value(properties: Properties, property_names: Tuple[str, ...]) -> Optional[Any]: | ||
|
HonahX marked this conversation as resolved.
Outdated
|
||
| for property_name in property_names: | ||
| if property_value := properties.get(property_name): | ||
| return property_value | ||
| return None | ||
|
|
||
|
|
||
| def load_file_io(properties: Properties = EMPTY_DICT, location: Optional[str] = None) -> FileIO: | ||
| # First look for the py-io-impl property to directly load the class | ||
| if io_impl := properties.get(PY_IO_IMPL): | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.