Skip to content

Commit 7377d05

Browse files
committed
feat: Add AWS profile support for GlueCatalog and internal S3FileIO
1 parent 2d6a1b9 commit 7377d05

File tree

6 files changed

+161
-4
lines changed

6 files changed

+161
-4
lines changed

pyiceberg/catalog/glue.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
NoSuchTableError,
4949
TableAlreadyExistsError,
5050
)
51-
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
51+
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_PROFILE_NAME, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
5252
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
5353
from pyiceberg.schema import Schema, SchemaVisitor, visit
5454
from pyiceberg.serializers import FromInputFile
@@ -329,7 +329,7 @@ def __init__(self, name: str, client: Optional["GlueClient"] = None, **propertie
329329
retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE)
330330

331331
session = boto3.Session(
332-
profile_name=properties.get(GLUE_PROFILE_NAME),
332+
profile_name=get_first_property_value(properties, GLUE_PROFILE_NAME, AWS_PROFILE_NAME),
333333
region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION),
334334
botocore_session=properties.get(BOTOCORE_SESSION),
335335
aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),

pyiceberg/io/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@
4141

4242
logger = logging.getLogger(__name__)
4343

44+
AWS_PROFILE_NAME = "client.profile-name"
4445
AWS_REGION = "client.region"
4546
AWS_ACCESS_KEY_ID = "client.access-key-id"
4647
AWS_SECRET_ACCESS_KEY = "client.secret-access-key"
4748
AWS_SESSION_TOKEN = "client.session-token"
4849
AWS_ROLE_ARN = "client.role-arn"
4950
AWS_ROLE_SESSION_NAME = "client.role-session-name"
51+
S3_PROFILE_NAME = "s3.profile-name"
5052
S3_ANONYMOUS = "s3.anonymous"
5153
S3_ENDPOINT = "s3.endpoint"
5254
S3_ACCESS_KEY_ID = "s3.access-key-id"

pyiceberg/io/fsspec.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
ADLS_TENANT_ID,
5252
ADLS_TOKEN,
5353
AWS_ACCESS_KEY_ID,
54+
AWS_PROFILE_NAME,
5455
AWS_REGION,
5556
AWS_SECRET_ACCESS_KEY,
5657
AWS_SESSION_TOKEN,
@@ -71,6 +72,7 @@
7172
S3_CONNECT_TIMEOUT,
7273
S3_ENDPOINT,
7374
S3_FORCE_VIRTUAL_ADDRESSING,
75+
S3_PROFILE_NAME,
7476
S3_PROXY_URI,
7577
S3_REGION,
7678
S3_REQUEST_TIMEOUT,
@@ -205,7 +207,13 @@ def _s3(properties: Properties) -> AbstractFileSystem:
205207
else:
206208
anon = False
207209

208-
fs = S3FileSystem(anon=anon, client_kwargs=client_kwargs, config_kwargs=config_kwargs)
210+
session = None
211+
if profile_name := get_first_property_value(properties, S3_PROFILE_NAME, AWS_PROFILE_NAME):
212+
from aiobotocore.session import AioSession
213+
214+
session = AioSession(profile=profile_name)
215+
216+
fs = S3FileSystem(anon=anon, client_kwargs=client_kwargs, config_kwargs=config_kwargs, session=session)
209217

210218
for event_name, event_function in register_events.items():
211219
fs.s3.meta.events.unregister(event_name, unique_id=1925)

tests/catalog/test_glue_profile.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from unittest import mock
19+
20+
from moto import mock_aws
21+
22+
from pyiceberg.catalog.glue import GlueCatalog
23+
from pyiceberg.typedef import Properties
24+
25+
UNIFIED_AWS_SESSION_PROPERTIES = {
26+
"client.access-key-id": "client.access-key-id",
27+
"client.secret-access-key": "client.secret-access-key",
28+
"client.region": "client.region",
29+
"client.session-token": "client.session-token",
30+
}
31+
32+
33+
@mock_aws
34+
def test_passing_client_profile_name_properties_to_glue() -> None:
35+
session_properties: Properties = {
36+
"client.profile-name": "profile_name",
37+
**UNIFIED_AWS_SESSION_PROPERTIES,
38+
}
39+
40+
with mock.patch("boto3.Session") as mock_session:
41+
test_catalog = GlueCatalog("glue", **session_properties)
42+
43+
mock_session.assert_called_with(
44+
aws_access_key_id="client.access-key-id",
45+
aws_secret_access_key="client.secret-access-key",
46+
aws_session_token="client.session-token",
47+
region_name="client.region",
48+
profile_name="profile_name",
49+
botocore_session=None,
50+
)
51+
assert test_catalog.glue is mock_session().client()

tests/io/test_fsspec.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,13 @@
3434
from pyiceberg.io.fsspec import FsspecFileIO, S3V4RestSigner
3535
from pyiceberg.io.pyarrow import PyArrowFileIO
3636
from pyiceberg.typedef import Properties
37-
from tests.conftest import UNIFIED_AWS_SESSION_PROPERTIES
37+
38+
UNIFIED_AWS_SESSION_PROPERTIES = {
39+
"client.access-key-id": "client.access-key-id",
40+
"client.secret-access-key": "client.secret-access-key",
41+
"client.region": "client.region",
42+
"client.session-token": "client.session-token",
43+
}
3844

3945

4046
def test_fsspec_infer_local_fs_from_path(fsspec_fileio: FsspecFileIO) -> None:
@@ -303,6 +309,7 @@ def test_fsspec_s3_session_properties() -> None:
303309
"aws_session_token": "s3.session-token",
304310
},
305311
config_kwargs={},
312+
session=None,
306313
)
307314

308315

@@ -333,6 +340,7 @@ def test_fsspec_s3_session_properties_force_virtual_addressing() -> None:
333340
"aws_session_token": "s3.session-token",
334341
},
335342
config_kwargs={"s3": {"addressing_style": "virtual"}},
343+
session=None,
336344
)
337345

338346

@@ -363,6 +371,7 @@ def test_fsspec_s3_session_properties_with_anonymous() -> None:
363371
"aws_session_token": "s3.session-token",
364372
},
365373
config_kwargs={},
374+
session=None,
366375
)
367376

368377

@@ -388,6 +397,7 @@ def test_fsspec_unified_session_properties() -> None:
388397
"aws_session_token": "client.session-token",
389398
},
390399
config_kwargs={},
400+
session=None,
391401
)
392402

393403

tests/io/test_fsspec_profile.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
import uuid
20+
from unittest import mock
21+
22+
from pyiceberg.io.fsspec import FsspecFileIO
23+
from pyiceberg.typedef import Properties
24+
25+
UNIFIED_AWS_SESSION_PROPERTIES = {
26+
"client.access-key-id": "client.access-key-id",
27+
"client.secret-access-key": "client.secret-access-key",
28+
"client.region": "client.region",
29+
"client.session-token": "client.session-token",
30+
}
31+
32+
33+
def test_fsspec_s3_session_properties_with_profile() -> None:
34+
session_properties: Properties = {
35+
"s3.profile-name": "test-profile",
36+
"s3.endpoint": "http://localhost:9000",
37+
**UNIFIED_AWS_SESSION_PROPERTIES,
38+
}
39+
40+
with mock.patch("s3fs.S3FileSystem") as mock_s3fs, mock.patch("aiobotocore.session.AioSession") as mock_aio_session:
41+
s3_fileio = FsspecFileIO(properties=session_properties)
42+
filename = str(uuid.uuid4())
43+
44+
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
45+
46+
mock_aio_session.assert_called_with(profile="test-profile")
47+
mock_s3fs.assert_called_with(
48+
anon=False,
49+
client_kwargs={
50+
"endpoint_url": "http://localhost:9000",
51+
"aws_access_key_id": "client.access-key-id",
52+
"aws_secret_access_key": "client.secret-access-key",
53+
"region_name": "client.region",
54+
"aws_session_token": "client.session-token",
55+
},
56+
config_kwargs={},
57+
session=mock_aio_session(),
58+
)
59+
60+
61+
def test_fsspec_s3_session_properties_with_client_profile() -> None:
62+
session_properties: Properties = {
63+
"client.profile-name": "test-profile",
64+
"s3.endpoint": "http://localhost:9000",
65+
**UNIFIED_AWS_SESSION_PROPERTIES,
66+
}
67+
68+
with mock.patch("s3fs.S3FileSystem") as mock_s3fs, mock.patch("aiobotocore.session.AioSession") as mock_aio_session:
69+
s3_fileio = FsspecFileIO(properties=session_properties)
70+
filename = str(uuid.uuid4())
71+
72+
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
73+
74+
mock_aio_session.assert_called_with(profile="test-profile")
75+
mock_s3fs.assert_called_with(
76+
anon=False,
77+
client_kwargs={
78+
"endpoint_url": "http://localhost:9000",
79+
"aws_access_key_id": "client.access-key-id",
80+
"aws_secret_access_key": "client.secret-access-key",
81+
"region_name": "client.region",
82+
"aws_session_token": "client.session-token",
83+
},
84+
config_kwargs={},
85+
session=mock_aio_session(),
86+
)

0 commit comments

Comments
 (0)