Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,14 @@ For the FileIO there are several configuration options available:
<!-- markdown-link-check-disable -->

| Key | Example | Description |
| -------------------- | ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| -------------------- | ------------------------ |-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| s3.endpoint | https://10.0.19.25/ | Configure an alternative endpoint of the S3 service for the FileIO to access. This could be used to use S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. |
| s3.access-key-id | admin | Configure the static access key id used to access the FileIO. |
| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
| s3.signer | bearer | Configure the signature version of the FileIO. |
| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.singer.uri>/v1/aws/s3/sign`. |
| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.singer.uri>/<s3.singer.endpoint>`. |
| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.singer.uri>/<s3.singer.endpoint>`. (default : v1/aws/s3/sign). |
Comment thread
Fokko marked this conversation as resolved.
Outdated
| s3.region | us-west-2 | Sets the region of the bucket |
| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. |
| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. |
Expand Down
2 changes: 2 additions & 0 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
S3_PROXY_URI = "s3.proxy-uri"
S3_CONNECT_TIMEOUT = "s3.connect-timeout"
S3_SIGNER_URI = "s3.signer.uri"
S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"
HDFS_HOST = "hdfs.host"
HDFS_PORT = "hdfs.port"
HDFS_USER = "hdfs.user"
Expand Down
6 changes: 5 additions & 1 deletion pyiceberg/io/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
S3_REGION,
S3_SECRET_ACCESS_KEY,
S3_SESSION_TOKEN,
S3_SIGNER_ENDPOINT,
S3_SIGNER_ENDPOINT_DEFAULT,
S3_SIGNER_URI,
ADLFS_ClIENT_SECRET,
FileIO,
Expand All @@ -86,6 +88,8 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A
raise SignError("Signer set, but token is not available")

signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/")
signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT)

signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"}
signer_body = {
"method": request.method,
Expand All @@ -94,7 +98,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A
"headers": {key: [val] for key, val in request.headers.items()},
}

response = requests.post(f"{signer_url}/v1/aws/s3/sign", headers=signer_headers, json=signer_body)
response = requests.post(f"{signer_url}/{signer_endpoint.strip()}", headers=signer_headers, json=signer_body)
try:
response.raise_for_status()
response_json = response.json()
Expand Down
53 changes: 53 additions & 0 deletions tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,59 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None:
}


def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None:
new_uri = "https://other-bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro"
endpoint = "v1/main/s3-sign/foo.bar?e=e&b=b&k=k=k&s=s&w=w"
requests_mock.post(
f"{TEST_URI}/{endpoint}",
json={
"uri": new_uri,
"headers": {
"Authorization": [
"AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
],
"Host": ["bucket.s3.us-west-2.amazonaws.com"],
"User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"],
"x-amz-content-sha256": ["UNSIGNED-PAYLOAD"],
"X-Amz-Date": ["20221017T102940Z"],
"X-Amz-Security-Token": [
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw="
],
},
"extensions": {},
},
status_code=200,
)

request = AWSRequest(
method="HEAD",
url="https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
headers={"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"},
data=b"",
params={},
auth_path="/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
)
request.context = {
"client_region": "us-west-2",
"has_streaming_input": False,
"auth_type": None,
"signing": {"bucket": "bucket"},
"retries": {"attempt": 1, "invocation-id": "75d143fb-0219-439b-872c-18213d1c8d54"},
}

signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "s3.signer.endpoint": endpoint}, request)

assert signed_request.url == new_uri
assert dict(signed_request.headers) == {
"Authorization": "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02",
"Host": "bucket.s3.us-west-2.amazonaws.com",
"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0",
"X-Amz-Date": "20221017T102940Z",
"X-Amz-Security-Token": "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=",
"x-amz-content-sha256": "UNSIGNED-PAYLOAD",
}


def test_s3v4_rest_signer_forbidden(requests_mock: Mocker) -> None:
requests_mock.post(
f"{TEST_URI}/v1/aws/s3/sign",
Expand Down