Skip to content

Commit e45fd9c

Browse files
committed
feat: add S3 SSE configs (FsspecFileIO only)
1 parent 536a7d0 commit e45fd9c

File tree

4 files changed

+52
-0
lines changed

4 files changed

+52
-0
lines changed

mkdocs/docs/configuration.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ For the FileIO there are several configuration options available:
129129
| s3.force-virtual-addressing | False | Whether to use virtual addressing of buckets. If true, then virtual addressing is always enabled. If false, then virtual addressing is only enabled if endpoint_override is empty. This can be used for non-AWS backends that only support virtual hosted-style access. |
130130
| s3.retry-strategy-impl | None | Ability to set a custom S3 retry strategy. A full path to a class needs to be given that extends the [S3RetryStrategy](https://github.com/apache/arrow/blob/639201bfa412db26ce45e73851432018af6c945e/python/pyarrow/_s3fs.pyx#L110) base class. |
131131
| s3.anonymous | True | Configure whether to use anonymous connection. If False (default), uses key/secret if configured or boto's credential resolver. |
132+
| s3.server-side-encryption | aws:kms | Configure server-side encryption (e.g. `AES256` or `aws:kms`). Only supported by `FsspecFileIO`. |
133+
| s3.sse-kms-key-id | alias/my-key | Configure the SSE-KMS key id (or ARN) for multipart uploads. Only supported by `FsspecFileIO`. |
132134

133135
<!-- markdown-link-check-enable-->
134136

pyiceberg/io/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
S3_ROLE_SESSION_NAME = "s3.role-session-name"
6868
S3_FORCE_VIRTUAL_ADDRESSING = "s3.force-virtual-addressing"
6969
S3_RETRY_STRATEGY_IMPL = "s3.retry-strategy-impl"
70+
S3_SERVER_SIDE_ENCRYPTION = "s3.server-side-encryption"
71+
S3_SSE_KMS_KEY_ID = "s3.sse-kms-key-id"
7072
HDFS_HOST = "hdfs.host"
7173
HDFS_PORT = "hdfs.port"
7274
HDFS_USER = "hdfs.user"

pyiceberg/io/fsspec.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,13 @@
7878
S3_REGION,
7979
S3_REQUEST_TIMEOUT,
8080
S3_SECRET_ACCESS_KEY,
81+
S3_SERVER_SIDE_ENCRYPTION,
8182
S3_SESSION_TOKEN,
8283
S3_SIGNER,
8384
S3_SIGNER_ENDPOINT,
8485
S3_SIGNER_ENDPOINT_DEFAULT,
8586
S3_SIGNER_URI,
87+
S3_SSE_KMS_KEY_ID,
8688
FileIO,
8789
InputFile,
8890
InputStream,
@@ -176,6 +178,7 @@ def _s3(properties: Properties) -> AbstractFileSystem:
176178
"region_name": get_first_property_value(properties, S3_REGION, AWS_REGION),
177179
}
178180
config_kwargs = {}
181+
s3_additional_kwargs = {}
179182
register_events: dict[str, Callable[[AWSRequest], None]] = {}
180183

181184
if signer := properties.get(S3_SIGNER):
@@ -208,6 +211,12 @@ def _s3(properties: Properties) -> AbstractFileSystem:
208211
else:
209212
anon = False
210213

214+
if server_side_encryption := properties.get(S3_SERVER_SIDE_ENCRYPTION):
215+
s3_additional_kwargs["ServerSideEncryption"] = server_side_encryption
216+
217+
if sse_kms_key_id := properties.get(S3_SSE_KMS_KEY_ID):
218+
s3_additional_kwargs["SSEKMSKeyId"] = sse_kms_key_id
219+
211220
s3_fs_kwargs = {
212221
"anon": anon,
213222
"client_kwargs": client_kwargs,
@@ -217,6 +226,9 @@ def _s3(properties: Properties) -> AbstractFileSystem:
217226
if profile_name := get_first_property_value(properties, S3_PROFILE_NAME, AWS_PROFILE_NAME):
218227
s3_fs_kwargs["profile"] = profile_name
219228

229+
if s3_additional_kwargs:
230+
s3_fs_kwargs["s3_additional_kwargs"] = s3_additional_kwargs
231+
220232
fs = S3FileSystem(**s3_fs_kwargs)
221233

222234
for event_name, event_function in register_events.items():

tests/io/test_fsspec.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,42 @@ def test_fsspec_unified_session_properties() -> None:
391391
)
392392

393393

394+
def test_fsspec_s3_encryption_additional_kwargs() -> None:
395+
session_properties: Properties = {
396+
"s3.server-side-encryption": "aws:kms",
397+
"s3.sse-kms-key-id": "arn:aws:kms:us-east-1:123456789012:key/test-key",
398+
**UNIFIED_AWS_SESSION_PROPERTIES,
399+
}
400+
401+
with mock.patch("s3fs.S3FileSystem") as mock_s3fs:
402+
s3_fileio = FsspecFileIO(properties=session_properties)
403+
filename = str(uuid.uuid4())
404+
405+
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
406+
407+
call_kwargs = mock_s3fs.call_args.kwargs
408+
assert call_kwargs["s3_additional_kwargs"] == {
409+
"ServerSideEncryption": "aws:kms",
410+
"SSEKMSKeyId": "arn:aws:kms:us-east-1:123456789012:key/test-key",
411+
}
412+
413+
414+
def test_fsspec_s3_encryption_additional_kwargs_partial() -> None:
415+
session_properties: Properties = {
416+
"s3.server-side-encryption": "AES256",
417+
**UNIFIED_AWS_SESSION_PROPERTIES,
418+
}
419+
420+
with mock.patch("s3fs.S3FileSystem") as mock_s3fs:
421+
s3_fileio = FsspecFileIO(properties=session_properties)
422+
filename = str(uuid.uuid4())
423+
424+
s3_fileio.new_input(location=f"s3://warehouse/{filename}")
425+
426+
call_kwargs = mock_s3fs.call_args.kwargs
427+
assert call_kwargs["s3_additional_kwargs"] == {"ServerSideEncryption": "AES256"}
428+
429+
394430
@pytest.mark.adls
395431
def test_fsspec_new_input_file_adls(adls_fsspec_fileio: FsspecFileIO) -> None:
396432
"""Test creating a new input file from an fsspec file-io"""

0 commit comments

Comments
 (0)