Skip to content

Commit 3953633

Browse files
Fix SigV4 auth to use base64-encoded content SHA256 and custom canonical request
1 parent 627a5eb commit 3953633

File tree

2 files changed

+94
-10
lines changed

2 files changed

+94
-10
lines changed

pyiceberg/catalog/rest/__init__.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,8 @@ def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str,
678678
return {"namespace": identifier_tuple[:-1], "name": identifier_tuple[-1]}
679679

680680
def _init_sigv4(self, session: Session) -> None:
681+
import base64
682+
import hashlib
681683
from urllib import parse
682684

683685
import boto3
@@ -686,6 +688,12 @@ def _init_sigv4(self, session: Session) -> None:
686688
from requests import PreparedRequest
687689
from requests.adapters import HTTPAdapter
688690

691+
class _IcebergSigV4Auth(SigV4Auth):
692+
def canonical_request(self, request: Any) -> str:
693+
cr = super().canonical_request(request)
694+
# Replace the last line (body_checksum) with hex-encoded payload hash.
695+
return cr.rsplit("\n", 1)[0] + "\n" + self.payload(request)
696+
689697
class SigV4Adapter(HTTPAdapter):
690698
def __init__(self, **properties: str):
691699
super().__init__()
@@ -710,17 +718,27 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin
710718
# remove the connection header as it will be updated after signing
711719
if "connection" in request.headers:
712720
del request.headers["connection"]
713-
# For empty bodies, explicitly set the content hash header to the SHA256 of an empty string
714-
if not request.body:
715-
request.headers["x-amz-content-sha256"] = EMPTY_BODY_SHA256
721+
722+
# Compute the x-amz-content-sha256 header to match Iceberg Java SDK:
723+
# - empty body → hex (EMPTY_BODY_SHA256)
724+
# - non-empty body → base64
725+
if request.body:
726+
body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body
727+
content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
728+
else:
729+
content_sha256_header = EMPTY_BODY_SHA256
730+
731+
signing_headers = dict(request.headers)
732+
signing_headers["x-amz-content-sha256"] = content_sha256_header
716733

717734
aws_request = AWSRequest(
718-
method=request.method, url=url, params=params, data=request.body, headers=dict(request.headers)
735+
method=request.method, url=url, params=params, data=request.body, headers=signing_headers
719736
)
720737

721-
SigV4Auth(credentials, service, region).add_auth(aws_request)
722-
original_header = request.headers
723-
signed_headers = aws_request.headers
738+
_IcebergSigV4Auth(credentials, service, region).add_auth(aws_request)
739+
740+
original_header = dict(request.headers)
741+
signed_headers = dict(aws_request.headers)
724742
relocated_headers = {}
725743

726744
# relocate headers if there is a conflict with signed headers

tests/catalog/test_rest.py

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,9 +493,10 @@ def test_sigv4_sign_request_without_body(rest_mock: Mocker) -> None:
493493
assert isinstance(adapter, HTTPAdapter)
494494
adapter.add_headers(prepared)
495495

496-
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256")
496+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
497497
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
498498
assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256
499+
assert "SignedHeaders=" in prepared.headers["Authorization"]
499500

500501

501502
def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
@@ -524,9 +525,74 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
524525
assert isinstance(adapter, HTTPAdapter)
525526
adapter.add_headers(prepared)
526527

527-
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256")
528+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
529+
assert "SignedHeaders=" in prepared.headers["Authorization"]
530+
# Conflicting Authorization header is relocated
528531
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
529-
assert prepared.headers.get("x-amz-content-sha256") != EMPTY_BODY_SHA256
532+
assert prepared.headers["x-amz-content-sha256"] == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k="
533+
534+
535+
def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None:
536+
existing_token = "existing_token"
537+
538+
catalog = RestCatalog(
539+
"rest",
540+
**{
541+
"uri": TEST_URI,
542+
"token": existing_token,
543+
"rest.sigv4-enabled": "true",
544+
"rest.signing-region": "us-west-2",
545+
"client.access-key-id": "id",
546+
"client.secret-access-key": "secret",
547+
},
548+
)
549+
550+
body_content = b'{"namespace": "test_namespace"}'
551+
prepared = catalog._session.prepare_request(
552+
Request(
553+
"POST",
554+
f"{TEST_URI}v1/namespaces",
555+
data=body_content,
556+
)
557+
)
558+
adapter = catalog._session.adapters[catalog.uri]
559+
assert isinstance(adapter, HTTPAdapter)
560+
adapter.add_headers(prepared)
561+
562+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
563+
assert "SignedHeaders=" in prepared.headers["Authorization"]
564+
assert prepared.headers["x-amz-content-sha256"] == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo="
565+
566+
567+
def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
568+
catalog = RestCatalog(
569+
"rest",
570+
**{
571+
"uri": TEST_URI,
572+
"rest.sigv4-enabled": "true",
573+
"rest.signing-region": "us-west-2",
574+
"client.access-key-id": "id",
575+
"client.secret-access-key": "secret",
576+
},
577+
)
578+
579+
prepared = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config"))
580+
adapter = catalog._session.adapters[catalog.uri]
581+
assert isinstance(adapter, HTTPAdapter)
582+
583+
# Inject conflicting SigV4 headers before signing
584+
prepared.headers["x-amz-content-sha256"] = "fake"
585+
prepared.headers["X-Amz-Date"] = "fake"
586+
587+
adapter.add_headers(prepared)
588+
589+
# Matching Java SDK: conflicting headers are relocated with "Original-" prefix
590+
assert prepared.headers.get("Original-x-amz-content-sha256") == "fake"
591+
assert prepared.headers.get("Original-X-Amz-Date") == "fake"
592+
# SigV4 headers are set correctly after signing
593+
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
594+
assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256
595+
assert "X-Amz-Date" in prepared.headers
530596

531597

532598
def test_list_tables_404(rest_mock: Mocker) -> None:

0 commit comments

Comments
 (0)