Skip to content

Commit 1f5f728

Browse files
add Java cross-language reference vector, harden body type handling
1 parent dcaa632 commit 1f5f728

4 files changed

Lines changed: 74 additions & 13 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -769,21 +769,18 @@ def _init_sigv4(self, session: Session) -> None:
769769
from requests.adapters import HTTPAdapter
770770

771771
class _IcebergSigV4Auth(SigV4Auth):
772-
def canonical_request(self, request: Any) -> str:
773-
# Reuses the logic from botocore's SigV4Auth.canonical_request
774-
# (https://github.com/boto/botocore/blob/develop/botocore/auth.py)
775-
# but always uses self.payload(request) for the body checksum.
776-
# Validated against botocore <= 1.42.x
777-
# (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637)
772+
def canonical_request(self, request: AWSRequest) -> str:
773+
# Override forces hex payload hash in the canonical request even when
774+
# x-amz-content-sha256 header is base64 (see body-hash block below).
775+
# Mirrors botocore <=1.42.x SigV4Auth.canonical_request layout:
776+
# https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637
778777
cr = [request.method.upper()]
779778
path = self._normalize_url_path(parse.urlsplit(request.url).path)
780779
cr.append(path)
781780
cr.append(self.canonical_query_string(request))
782781
headers_to_sign = self.headers_to_sign(request)
783782
cr.append(self.canonical_headers(headers_to_sign) + "\n")
784783
cr.append(self.signed_headers(headers_to_sign))
785-
# Always use hex-encoded payload hash per SigV4 spec,
786-
# regardless of the x-amz-content-sha256 header value (which may be base64).
787784
cr.append(self.payload(request))
788785
return "\n".join(cr)
789786

@@ -814,11 +811,20 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin
814811
if "connection" in request.headers:
815812
del request.headers["connection"]
816813

817-
# Compute the x-amz-content-sha256 header to match Iceberg Java SDK:
818-
# - empty body → hex (EMPTY_BODY_SHA256)
819-
# - non-empty body → base64
814+
# Match Iceberg Java's AWS SDK v2 flexible-checksum signing:
815+
# x-amz-content-sha256 header is base64 for non-empty bodies, hex for empty.
816+
# The SigV4 canonical request still uses hex (enforced in _IcebergSigV4Auth above).
817+
# Ref: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/RESTSigV4AuthSession.java
820818
if request.body:
821-
body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body
819+
if isinstance(request.body, str):
820+
body_bytes = request.body.encode("utf-8")
821+
elif isinstance(request.body, (bytes, bytearray)):
822+
body_bytes = request.body
823+
else:
824+
raise TypeError(
825+
f"Unsupported request body type for SigV4 signing: "
826+
f"{type(request.body).__name__}; expected str or bytes."
827+
)
822828
content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
823829
else:
824830
content_sha256_header = EMPTY_BODY_SHA256

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ sql-postgres = [
9292
]
9393
sql-sqlite = ["sqlalchemy>=2.0.18,<3"]
9494
gcsfs = ["gcsfs>=2023.1.0"]
95-
rest-sigv4 = ["boto3>=1.24.59"]
95+
rest-sigv4 = ["boto3>=1.24.59", "botocore<2"]
9696
hf = ["huggingface-hub>=0.24.0"]
9797
pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.10.0"]
9898
datafusion = ["datafusion>=52,<53"]

tests/catalog/test_rest.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,59 @@ def capturing_add_auth(self: Any, request: Any) -> None:
685685
assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode()
686686

687687

688+
def test_sigv4_content_sha256_matches_iceberg_java_reference(rest_mock: Mocker) -> None:
689+
"""Pin byte-for-byte equivalence with Iceberg Java TestRESTSigV4AuthSession (L121, L177)."""
690+
java_reference_body = b'{"namespace":["ns"],"properties":{}}'
691+
java_reference_base64 = "yc5oAKPWjHY4sW8XQq0l/3aNrrXJKBycVFNnDEGMfww="
692+
java_reference_empty_hex = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
693+
694+
catalog = RestCatalog(
695+
"rest",
696+
**{
697+
"uri": TEST_URI,
698+
"rest.sigv4-enabled": "true",
699+
"rest.signing-region": "us-east-1",
700+
"client.access-key-id": "id",
701+
"client.secret-access-key": "secret",
702+
},
703+
)
704+
adapter = catalog._session.adapters[catalog.uri]
705+
assert isinstance(adapter, HTTPAdapter)
706+
707+
# Non-empty body: must match Java's base64 reference value exactly
708+
prepared_with_body = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces", data=java_reference_body))
709+
adapter.add_headers(prepared_with_body)
710+
assert prepared_with_body.headers["x-amz-content-sha256"] == java_reference_base64
711+
712+
# Empty body: must match Java's hex reference value exactly
713+
prepared_empty = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config"))
714+
adapter.add_headers(prepared_empty)
715+
assert prepared_empty.headers["x-amz-content-sha256"] == java_reference_empty_hex
716+
717+
718+
def test_sigv4_unsupported_body_type_raises(rest_mock: Mocker) -> None:
719+
"""Unsupported body types (e.g. file-like) raise a clear error rather than crashing in hashlib."""
720+
catalog = RestCatalog(
721+
"rest",
722+
**{
723+
"uri": TEST_URI,
724+
"rest.sigv4-enabled": "true",
725+
"rest.signing-region": "us-east-1",
726+
"client.access-key-id": "id",
727+
"client.secret-access-key": "secret",
728+
},
729+
)
730+
adapter = catalog._session.adapters[catalog.uri]
731+
assert isinstance(adapter, HTTPAdapter)
732+
733+
prepared = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces"))
734+
# Inject an unsupported body type (a list — not str/bytes)
735+
prepared.body = ["not", "a", "valid", "body"] # type: ignore[assignment]
736+
737+
with pytest.raises(TypeError, match="Unsupported request body type for SigV4 signing"):
738+
adapter.add_headers(prepared)
739+
740+
688741
def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None:
689742
catalog = RestCatalog(
690743
"rest",

uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)