Skip to content

Commit 3b4f02e

Browse files
committed
feat: Add AWS profile support for GlueCatalog and internal S3FileIO
1 parent 2d6a1b9 commit 3b4f02e

File tree

7 files changed

+161
-5
lines changed

7 files changed

+161
-5
lines changed

mkdocs/docs/configuration.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ For the FileIO there are several configuration options available:
115115
| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. |
116116
| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
117117
| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
118+
| s3.profile-name | default | Configure the AWS profile used to access the S3 FileIO. |
118119
| s3.role-session-name | session | An optional identifier for the assumed role session. |
119120
| s3.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. |
120121
| s3.signer | bearer | Configure the signature version of the FileIO. |
@@ -720,7 +721,7 @@ catalog:
720721
| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog |
721722
| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true |
722723
| glue.endpoint | <https://glue.us-east-1.amazonaws.com> | Configure an alternative endpoint of the Glue service for GlueCatalog to access |
723-
| glue.profile-name | default | Configure the static profile used to access the Glue Catalog |
724+
| glue.profile-name | default | Configure the AWS profile used to access the Glue Catalog |
724725
| glue.region | us-east-1 | Set the region of the Glue Catalog |
725726
| glue.access-key-id | admin | Configure the static access key id used to access the Glue Catalog |
726727
| glue.secret-access-key | password | Configure the static secret access key used to access the Glue Catalog |
@@ -826,6 +827,7 @@ configures the AWS credentials for both Glue Catalog and S3 FileIO.
826827
| client.access-key-id | admin | Configure the static access key id used to access both the Glue/DynamoDB Catalog and the S3 FileIO |
827828
| client.secret-access-key | password | Configure the static secret access key used to access both the Glue/DynamoDB Catalog and the S3 FileIO |
828829
| client.session-token | AQoDYXdzEJr... | Configure the static session token used to access both the Glue/DynamoDB Catalog and the S3 FileIO |
830+
| client.profile-name | default | Configure the AWS profile used to access both the Glue/DynamoDB Catalog and the S3 FileIO |
829831
| client.role-session-name | session | An optional identifier for the assumed role session. |
830832
| client.role-arn | arn:aws:... | AWS Role ARN. If provided instead of access_key and secret_key, temporary credentials will be fetched by assuming this role. |
831833

pyiceberg/catalog/glue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
NoSuchTableError,
4949
TableAlreadyExistsError,
5050
)
51-
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
51+
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
5252
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
5353
from pyiceberg.schema import Schema, SchemaVisitor, visit
5454
from pyiceberg.serializers import FromInputFile
@@ -329,7 +329,7 @@ def __init__(self, name: str, client: Optional["GlueClient"] = None, **propertie
329329
retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE)
330330

331331
session = boto3.Session(
332-
profile_name=properties.get(GLUE_PROFILE_NAME),
332+
profile_name=get_first_property_value(properties, GLUE_PROFILE_NAME, AWS_PROFILE_NAME),
333333
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION),
334334
botocore_session=properties.get(BOTOCORE_SESSION),
335335
aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),

pyiceberg/io/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@
4141

4242
logger = logging.getLogger(__name__)
4343

44+
AWS_PROFILE_NAME = "client.profile-name"
4445
AWS_REGION = "client.region"
4546
AWS_ACCESS_KEY_ID = "client.access-key-id"
4647
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
4748
AWS_SESSION_TOKEN = "client.session-token"
4849
AWS_ROLE_ARN = "client.role-arn"
4950
AWS_ROLE_SESSION_NAME = "client.role-session-name"
51+
S3_PROFILE_NAME = "s3.profile-name"
5052
S3_ANONYMOUS = "s3.anonymous"
5153
S3_ENDPOINT = "s3.endpoint"
5254
S3_ACCESS_KEY_ID = "s3.access-key-id"

pyiceberg/io/fsspec.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
ADLS_TENANT_ID,
5252
ADLS_TOKEN,
5353
AWS_ACCESS_KEY_ID,
54+
AWS_PROFILE_NAME,
5455
AWS_REGION,
5556
AWS_SECRET_ACCESS_KEY,
5657
AWS_SESSION_TOKEN,
@@ -71,6 +72,7 @@
7172
S3_CONNECT_TIMEOUT,
7273
S3_ENDPOINT,
7374
S3_FORCE_VIRTUAL_ADDRESSING,
75+
S3_PROFILE_NAME,
7476
S3_PROXY_URI,
7577
S3_REGION,
7678
S3_REQUEST_TIMEOUT,
@@ -205,7 +207,16 @@ def _s3(properties: Properties) -> AbstractFileSystem:
205207
else:
206208
anon = False
207209

208-
fs = S3FileSystem(anon=anon, client_kwargs=client_kwargs, config_kwargs=config_kwargs)
210+
s3_fs_kwargs = {
211+
"anon": anon,
212+
"client_kwargs": client_kwargs,
213+
"config_kwargs": config_kwargs,
214+
}
215+
216+
if profile_name := get_first_property_value(properties, S3_PROFILE_NAME, AWS_PROFILE_NAME):
217+
s3_fs_kwargs["profile"] = profile_name
218+
219+
fs = S3FileSystem(**s3_fs_kwargs)
209220

210221
for event_name, event_function in register_events.items():
211222
fs.s3.meta.events.unregister(event_name, unique_id=1925)

tests/catalog/test_glue_profile.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from unittest import mock
19+
20+
from moto import mock_aws
21+
22+
from pyiceberg.catalog.glue import GlueCatalog
23+
from pyiceberg.typedef import Properties
24+
25+
UNIFIED_AWS_SESSION_PROPERTIES = {
26+
"client.access-key-id": "client.access-key-id",
27+
"client.secret-access-key": "client.secret-access-key",
28+
"client.region": "client.region",
29+
"client.session-token": "client.session-token",
30+
}
31+
32+
33+
@mock_aws
34+
def test_passing_client_profile_name_properties_to_glue() -> None:
35+
session_properties: Properties = {
36+
"client.profile-name": "profile_name",
37+
**UNIFIED_AWS_SESSION_PROPERTIES,
38+
}
39+
40+
with mock.patch("boto3.Session") as mock_session:
41+
test_catalog = GlueCatalog("glue", **session_properties)
42+
43+
mock_session.assert_called_with(
44+
aws_access_key_id="client.access-key-id",
45+
aws_secret_access_key="client.secret-access-key",
46+
aws_session_token="client.session-token",
47+
region_name="client.region",
48+
profile_name="profile_name",
49+
botocore_session=None,
50+
)
51+
assert test_catalog.glue is mock_session().client()

tests/io/test_fsspec.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,13 @@
3434
from pyiceberg.io.fsspec import FsspecFileIO, S3V4RestSigner
3535
from pyiceberg.io.pyarrow import PyArrowFileIO
3636
from pyiceberg.typedef import Properties
37-
from tests.conftest import UNIFIED_AWS_SESSION_PROPERTIES
37+
38+
UNIFIED_AWS_SESSION_PROPERTIES = {
39+
"client.access-key-id": "client.access-key-id",
40+
"client.secret-access-key": "client.secret-access-key",
41+
"client.region": "client.region",
42+
"client.session-token": "client.session-token",
43+
}
3844

3945

4046
def test_fsspec_infer_local_fs_from_path(fsspec_fileio: FsspecFileIO) -> None:

tests/io/test_fsspec_profile.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
import uuid
20+
from unittest import mock
21+
22+
from pyiceberg.io.fsspec import FsspecFileIO
23+
from pyiceberg.typedef import Properties
24+
25+
UNIFIED_AWS_SESSION_PROPERTIES = {
26+
"client.access-key-id": "client.access-key-id",
27+
"client.secret-access-key": "client.secret-access-key",
28+
"client.region": "client.region",
29+
"client.session-token": "client.session-token",
30+
}
31+
32+
33+
def test_fsspec_s3_session_properties_with_profile() -> None:
34+
session_properties: Properties = {
35+
"s3.profile-name": "test-profile",
36+
"s3.endpoint": "http://localhost:9000",
37+
**UNIFIED_AWS_SESSION_PROPERTIES,
38+
}
39+
40+
with mock.patch("s3fs.S3FileSystem") as mock_s3fs:
41+
s3_fileio = FsspecFileIO(properties=session_properties)
42+
filename = str(uuid.uuid4())
43+
44+
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
45+
46+
mock_s3fs.assert_called_with(
47+
anon=False,
48+
client_kwargs={
49+
"endpoint_url": "http://localhost:9000",
50+
"aws_access_key_id": "client.access-key-id",
51+
"aws_secret_access_key": "client.secret-access-key",
52+
"region_name": "client.region",
53+
"aws_session_token": "client.session-token",
54+
},
55+
config_kwargs={},
56+
profile="test-profile",
57+
)
58+
59+
60+
def test_fsspec_s3_session_properties_with_client_profile() -> None:
61+
session_properties: Properties = {
62+
"client.profile-name": "test-profile",
63+
"s3.endpoint": "http://localhost:9000",
64+
**UNIFIED_AWS_SESSION_PROPERTIES,
65+
}
66+
67+
with mock.patch("s3fs.S3FileSystem") as mock_s3fs:
68+
s3_fileio = FsspecFileIO(properties=session_properties)
69+
filename = str(uuid.uuid4())
70+
71+
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
72+
73+
mock_s3fs.assert_called_with(
74+
anon=False,
75+
client_kwargs={
76+
"endpoint_url": "http://localhost:9000",
77+
"aws_access_key_id": "client.access-key-id",
78+
"aws_secret_access_key": "client.secret-access-key",
79+
"region_name": "client.region",
80+
"aws_session_token": "client.session-token",
81+
},
82+
config_kwargs={},
83+
profile="test-profile",
84+
)

0 commit comments

Comments
 (0)