Skip to content

Commit 006a7fc

Browse files
add Java cross-language reference vector, harden body type handling
1 parent 0abf660 commit 006a7fc

4 files changed

Lines changed: 74 additions & 13 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -773,21 +773,18 @@ def _init_sigv4(self, session: Session) -> None:
773773
from requests.adapters import HTTPAdapter
774774

775775
class _IcebergSigV4Auth(SigV4Auth):
776-
def canonical_request(self, request: Any) -> str:
777-
# Reuses the logic from botocore's SigV4Auth.canonical_request
778-
# (https://github.com/boto/botocore/blob/develop/botocore/auth.py)
779-
# but always uses self.payload(request) for the body checksum.
780-
# Validated against botocore <= 1.42.x
781-
# (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637)
776+
def canonical_request(self, request: AWSRequest) -> str:
777+
# Override forces hex payload hash in the canonical request even when
778+
# x-amz-content-sha256 header is base64 (see body-hash block below).
779+
# Mirrors botocore <=1.42.x SigV4Auth.canonical_request layout:
780+
# https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637
782781
cr = [request.method.upper()]
783782
path = self._normalize_url_path(parse.urlsplit(request.url).path)
784783
cr.append(path)
785784
cr.append(self.canonical_query_string(request))
786785
headers_to_sign = self.headers_to_sign(request)
787786
cr.append(self.canonical_headers(headers_to_sign) + "\n")
788787
cr.append(self.signed_headers(headers_to_sign))
789-
# Always use hex-encoded payload hash per SigV4 spec,
790-
# regardless of the x-amz-content-sha256 header value (which may be base64).
791788
cr.append(self.payload(request))
792789
return "\n".join(cr)
793790

@@ -818,11 +815,20 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin
818815
if "connection" in request.headers:
819816
del request.headers["connection"]
820817

821-
# Compute the x-amz-content-sha256 header to match Iceberg Java SDK:
822-
# - empty body → hex (EMPTY_BODY_SHA256)
823-
# - non-empty body → base64
818+
# Match Iceberg Java's AWS SDK v2 flexible-checksum signing:
819+
# x-amz-content-sha256 header is base64 for non-empty bodies, hex for empty.
820+
# The SigV4 canonical request still uses hex (enforced in _IcebergSigV4Auth above).
821+
# Ref: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/RESTSigV4AuthSession.java
824822
if request.body:
825-
body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body
823+
if isinstance(request.body, str):
824+
body_bytes = request.body.encode("utf-8")
825+
elif isinstance(request.body, (bytes, bytearray)):
826+
body_bytes = request.body
827+
else:
828+
raise TypeError(
829+
f"Unsupported request body type for SigV4 signing: "
830+
f"{type(request.body).__name__}; expected str or bytes."
831+
)
826832
content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
827833
else:
828834
content_sha256_header = EMPTY_BODY_SHA256

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ sql-postgres = [
9292
]
9393
sql-sqlite = ["sqlalchemy>=2.0.18,<3"]
9494
gcsfs = ["gcsfs>=2023.1.0"]
95-
rest-sigv4 = ["boto3>=1.24.59"]
95+
rest-sigv4 = ["boto3>=1.24.59", "botocore<2"]
9696
hf = ["huggingface-hub>=0.24.0"]
9797
pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.10.0"]
9898
datafusion = ["datafusion>=52,<53"]

tests/catalog/test_rest.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,59 @@ def capturing_add_auth(self: Any, request: Any) -> None:
793793
assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode()
794794

795795

796+
def test_sigv4_content_sha256_matches_iceberg_java_reference(rest_mock: Mocker) -> None:
797+
"""Pin byte-for-byte equivalence with Iceberg Java TestRESTSigV4AuthSession (L121, L177)."""
798+
java_reference_body = b'{"namespace":["ns"],"properties":{}}'
799+
java_reference_base64 = "yc5oAKPWjHY4sW8XQq0l/3aNrrXJKBycVFNnDEGMfww="
800+
java_reference_empty_hex = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
801+
802+
catalog = RestCatalog(
803+
"rest",
804+
**{
805+
"uri": TEST_URI,
806+
"rest.sigv4-enabled": "true",
807+
"rest.signing-region": "us-east-1",
808+
"client.access-key-id": "id",
809+
"client.secret-access-key": "secret",
810+
},
811+
)
812+
adapter = catalog._session.adapters[catalog.uri]
813+
assert isinstance(adapter, HTTPAdapter)
814+
815+
# Non-empty body: must match Java's base64 reference value exactly
816+
prepared_with_body = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces", data=java_reference_body))
817+
adapter.add_headers(prepared_with_body)
818+
assert prepared_with_body.headers["x-amz-content-sha256"] == java_reference_base64
819+
820+
# Empty body: must match Java's hex reference value exactly
821+
prepared_empty = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config"))
822+
adapter.add_headers(prepared_empty)
823+
assert prepared_empty.headers["x-amz-content-sha256"] == java_reference_empty_hex
824+
825+
826+
def test_sigv4_unsupported_body_type_raises(rest_mock: Mocker) -> None:
827+
"""Unsupported body types (e.g. file-like) raise a clear error rather than crashing in hashlib."""
828+
catalog = RestCatalog(
829+
"rest",
830+
**{
831+
"uri": TEST_URI,
832+
"rest.sigv4-enabled": "true",
833+
"rest.signing-region": "us-east-1",
834+
"client.access-key-id": "id",
835+
"client.secret-access-key": "secret",
836+
},
837+
)
838+
adapter = catalog._session.adapters[catalog.uri]
839+
assert isinstance(adapter, HTTPAdapter)
840+
841+
prepared = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces"))
842+
# Inject an unsupported body type (a list — not str/bytes)
843+
prepared.body = ["not", "a", "valid", "body"] # type: ignore[assignment]
844+
845+
with pytest.raises(TypeError, match="Unsupported request body type for SigV4 signing"):
846+
adapter.add_headers(prepared)
847+
848+
796849
def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None:
797850
catalog = RestCatalog(
798851
"rest",

uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)