Skip to content

Commit 363c76b

Browse files
Improve SigV4 tests and add botocore version reference
1 parent e69203d commit 363c76b

2 files changed

Lines changed: 58 additions & 9 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -777,6 +777,8 @@ def canonical_request(self, request: Any) -> str:
777777
# Reuses the logic from botocore's SigV4Auth.canonical_request
778778
# (https://github.com/boto/botocore/blob/develop/botocore/auth.py)
779779
# but always uses self.payload(request) for the body checksum.
780+
# Validated against botocore <= 1.42.x
781+
# (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637)
780782
cr = [request.method.upper()]
781783
path = self._normalize_url_path(parse.urlsplit(request.url).path)
782784
cr.append(path)

tests/catalog/test_rest.py

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -669,10 +669,9 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None:
669669
assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}"
670670
# Non-empty body should have base64-encoded SHA256
671671
content_sha256 = prepared.headers["x-amz-content-sha256"]
672-
assert content_sha256 == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k="
673-
# Verify it's valid base64 and matches the body
674-
decoded = base64.b64decode(content_sha256)
675-
assert len(decoded) == 32 # SHA256 produces 32 bytes
672+
body_bytes = prepared.body.encode("utf-8") if isinstance(prepared.body, str) else prepared.body
673+
expected_sha256 = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
674+
assert content_sha256 == expected_sha256
676675
# x-amz-content-sha256 should be in signed headers
677676
assert "x-amz-content-sha256" in auth_header
678677

@@ -707,11 +706,8 @@ def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None:
707706
assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=")
708707
assert "SignedHeaders=" in prepared.headers["Authorization"]
709708
content_sha256 = prepared.headers["x-amz-content-sha256"]
710-
assert content_sha256 == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo="
711-
# Verify it's valid base64 and matches the body
712-
decoded = base64.b64decode(content_sha256)
713-
assert len(decoded) == 32 # SHA256 produces 32 bytes
714-
assert decoded == hashlib.sha256(body_content).digest()
709+
expected_sha256 = base64.b64encode(hashlib.sha256(body_content).digest()).decode()
710+
assert content_sha256 == expected_sha256
715711

716712

717713
def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
@@ -745,6 +741,57 @@ def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None:
745741
assert "X-Amz-Date" in prepared.headers
746742

747743

744+
def test_sigv4_canonical_request_uses_hex_payload(rest_mock: Mocker) -> None:
745+
"""Verify that the canonical request uses hex-encoded payload hash, not the base64 header value."""
746+
from unittest.mock import patch
747+
748+
from botocore.auth import SigV4Auth
749+
750+
catalog = RestCatalog(
751+
"rest",
752+
**{
753+
"uri": TEST_URI,
754+
"token": "token",
755+
"rest.sigv4-enabled": "true",
756+
"rest.signing-region": "us-west-2",
757+
"client.access-key-id": "id",
758+
"client.secret-access-key": "secret",
759+
},
760+
)
761+
762+
body_content = b'{"namespace": "test"}'
763+
prepared = catalog._session.prepare_request(
764+
Request(
765+
"POST",
766+
f"{TEST_URI}v1/namespaces",
767+
data=body_content,
768+
)
769+
)
770+
adapter = catalog._session.adapters[catalog.uri]
771+
assert isinstance(adapter, HTTPAdapter)
772+
773+
# Capture the canonical request string during signing
774+
captured_canonical = []
775+
original_add_auth = SigV4Auth.add_auth
776+
777+
def capturing_add_auth(self: Any, request: Any) -> None:
778+
captured_canonical.append(self.canonical_request(request))
779+
original_add_auth(self, request)
780+
781+
with patch.object(SigV4Auth, "add_auth", capturing_add_auth):
782+
adapter.add_headers(prepared)
783+
784+
assert len(captured_canonical) == 1
785+
canonical_lines = captured_canonical[0].split("\n")
786+
# Last line of canonical request is the payload hash
787+
payload_hash = canonical_lines[-1]
788+
# Must be hex-encoded (64 hex chars), not base64
789+
assert len(payload_hash) == 64
790+
assert payload_hash == hashlib.sha256(body_content).hexdigest()
791+
# Meanwhile the header is base64-encoded
792+
assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode()
793+
794+
748795
def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None:
749796
catalog = RestCatalog(
750797
"rest",

0 commit comments

Comments
 (0)