Skip to content

Commit d1efb3b

Browse files
Add support for sha512, xxhash algorithms, and MD5 for httpchecksums (boto#3637)
* Add support for sha512, CRT supported xxhash algorithms, and MD5 when modeled by a service Co-authored-by: jonathan343 <43360731+jonathan343@users.noreply.github.com>
1 parent 1e787fd commit d1efb3b

7 files changed

Lines changed: 457 additions & 99 deletions

File tree

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "enhancement",
3+
"category": "``checksums``",
4+
"description": "Added support for the SHA512 checksum algorithm. When the optional AWS CRT (``awscrt``) dependency is installed, support is also enabled for the XXHASH64, XXHASH3, and XXHASH128 checksum algorithms. Also added pass-through support for customer-provided MD5 checksum headers (without SDK-side MD5 calculation or validation)."
5+
}
6+

botocore/httpchecksum.py

Lines changed: 91 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import io
2424
import logging
2525
from binascii import crc32
26-
from hashlib import sha1, sha256
26+
from hashlib import sha1, sha256, sha512
2727

2828
from botocore.compat import HAS_CRT, has_minimum_crt_version, urlparse
2929
from botocore.exceptions import (
@@ -37,6 +37,7 @@
3737
from botocore.utils import (
3838
conditionally_calculate_md5,
3939
determine_content_length,
40+
get_checksum_header_algorithms,
4041
has_checksum_header,
4142
)
4243

@@ -127,6 +128,42 @@ def digest(self):
127128
return self._int_crc64nvme.to_bytes(8, byteorder="big")
128129

129130

131+
class CrtXxhash64Checksum(BaseChecksum):
132+
# Note: This class is only used if the CRT is available
133+
def __init__(self):
134+
self._xxhash = crt_checksums.XXHash.new_xxhash64()
135+
136+
def update(self, chunk):
137+
self._xxhash.update(chunk)
138+
139+
def digest(self):
140+
return self._xxhash.finalize()
141+
142+
143+
class CrtXxhash3Checksum(BaseChecksum):
144+
# Note: This class is only used if the CRT is available
145+
def __init__(self):
146+
self._xxhash = crt_checksums.XXHash.new_xxhash3_64()
147+
148+
def update(self, chunk):
149+
self._xxhash.update(chunk)
150+
151+
def digest(self):
152+
return self._xxhash.finalize()
153+
154+
155+
class CrtXxhash128Checksum(BaseChecksum):
156+
# Note: This class is only used if the CRT is available
157+
def __init__(self):
158+
self._xxhash = crt_checksums.XXHash.new_xxhash3_128()
159+
160+
def update(self, chunk):
161+
self._xxhash.update(chunk)
162+
163+
def digest(self):
164+
return self._xxhash.finalize()
165+
166+
130167
class Sha1Checksum(BaseChecksum):
131168
def __init__(self):
132169
self._checksum = sha1()
@@ -149,6 +186,17 @@ def digest(self):
149186
return self._checksum.digest()
150187

151188

189+
class Sha512Checksum(BaseChecksum):
190+
def __init__(self):
191+
self._checksum = sha512()
192+
193+
def update(self, chunk):
194+
self._checksum.update(chunk)
195+
196+
def digest(self):
197+
return self._checksum.digest()
198+
199+
152200
class AwsChunkedWrapper:
153201
_DEFAULT_CHUNK_SIZE = 1024 * 1024
154202

@@ -267,6 +315,7 @@ def _validate_checksum(self):
267315
def resolve_checksum_context(request, operation_model, params):
268316
resolve_request_checksum_algorithm(request, operation_model, params)
269317
resolve_response_checksum_algorithms(request, operation_model, params)
318+
_register_checksum_feature_ids(request)
270319

271320

272321
def resolve_request_checksum_algorithm(
@@ -398,7 +447,6 @@ def _apply_request_header_checksum(request):
398447
checksum_cls = _CHECKSUM_CLS.get(algorithm["algorithm"])
399448
digest = checksum_cls().handle(request["body"])
400449
request["headers"][location_name] = digest
401-
_register_checksum_algorithm_feature_id(algorithm)
402450

403451

404452
def _apply_request_trailer_checksum(request):
@@ -422,7 +470,6 @@ def _apply_request_trailer_checksum(request):
422470
else:
423471
headers["Content-Encoding"] = "aws-chunked"
424472
headers["X-Amz-Trailer"] = location_name
425-
_register_checksum_algorithm_feature_id(algorithm)
426473

427474
content_length = determine_content_length(body)
428475
if content_length is not None:
@@ -446,8 +493,22 @@ def _apply_request_trailer_checksum(request):
446493
)
447494

448495

496+
def _register_checksum_feature_ids(request):
497+
"""Register feature IDs for checksum algorithms used in the request."""
498+
if algorithm_list := get_checksum_header_algorithms(request):
499+
for algorithm_name in algorithm_list:
500+
_register_checksum_algorithm_feature_id(algorithm_name)
501+
return
502+
# If no checksum header exists yet, check the resolved context for
503+
# an algorithm that will be applied later by apply_request_checksum.
504+
checksum_context = request.get("context", {}).get("checksum", {})
505+
algorithm = checksum_context.get("request_algorithm")
506+
if algorithm and isinstance(algorithm, dict):
507+
_register_checksum_algorithm_feature_id(algorithm["algorithm"])
508+
509+
449510
def _register_checksum_algorithm_feature_id(algorithm):
450-
checksum_algorithm_name = algorithm["algorithm"].upper()
511+
checksum_algorithm_name = algorithm.upper()
451512
if checksum_algorithm_name == "CRC64NVME":
452513
checksum_algorithm_name = "CRC64"
453514
checksum_algorithm_name_feature_id = (
@@ -552,8 +613,16 @@ def _handle_bytes_response(http_response, response, algorithm):
552613
"crc32": Crc32Checksum,
553614
"sha1": Sha1Checksum,
554615
"sha256": Sha256Checksum,
616+
"sha512": Sha512Checksum,
555617
}
556-
_CRT_CHECKSUM_ALGORITHMS = ["crc32", "crc32c", "crc64nvme"]
618+
_CRT_CHECKSUM_ALGORITHMS = [
619+
"crc32",
620+
"crc32c",
621+
"crc64nvme",
622+
"xxhash64",
623+
"xxhash3",
624+
"xxhash128",
625+
]
557626
if HAS_CRT:
558627
# Use CRT checksum implementations if available
559628
_CRT_CHECKSUM_CLS = {
@@ -565,10 +634,25 @@ def _handle_bytes_response(http_response, response, algorithm):
565634
# CRC64NVME support wasn't officially added until 0.23.4
566635
_CRT_CHECKSUM_CLS["crc64nvme"] = CrtCrc64NvmeChecksum
567636

637+
if has_minimum_crt_version((0, 31, 2)):
638+
_CRT_CHECKSUM_CLS["xxhash64"] = CrtXxhash64Checksum
639+
_CRT_CHECKSUM_CLS["xxhash3"] = CrtXxhash3Checksum
640+
_CRT_CHECKSUM_CLS["xxhash128"] = CrtXxhash128Checksum
641+
568642
_CHECKSUM_CLS.update(_CRT_CHECKSUM_CLS)
569-
# Validate this list isn't out of sync with _CRT_CHECKSUM_CLS keys
643+
# Validate this list isn't out of sync with _CRT_CHECKSUM_ALGORITHMS keys
570644
assert all(
571645
name in _CRT_CHECKSUM_ALGORITHMS for name in _CRT_CHECKSUM_CLS.keys()
572646
)
573647
_SUPPORTED_CHECKSUM_ALGORITHMS = list(_CHECKSUM_CLS.keys())
574-
_ALGORITHMS_PRIORITY_LIST = ['crc64nvme', 'crc32c', 'crc32', 'sha1', 'sha256']
648+
_ALGORITHMS_PRIORITY_LIST = [
649+
'xxhash128',
650+
'xxhash3',
651+
'crc64nvme',
652+
'xxhash64',
653+
'crc32c',
654+
'crc32',
655+
'sha1',
656+
'sha256',
657+
'sha512',
658+
]

botocore/useragent.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,11 @@
100100
'CLI_V1_TO_V2_MIGRATION_DEBUG_MODE': '-',
101101
'CREDENTIALS_PROFILE_LOGIN': 'AC',
102102
'CREDENTIALS_LOGIN': 'AD',
103+
'FLEXIBLE_CHECKSUMS_REQ_MD5': 'AE',
104+
'FLEXIBLE_CHECKSUMS_REQ_SHA512': 'AF',
105+
'FLEXIBLE_CHECKSUMS_REQ_XXHASH3': 'AG',
106+
'FLEXIBLE_CHECKSUMS_REQ_XXHASH64': 'AH',
107+
'FLEXIBLE_CHECKSUMS_REQ_XXHASH128': 'AI',
103108
}
104109

105110

botocore/utils.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3301,22 +3301,34 @@ def _is_s3express_request(params):
33013301
return endpoint_properties.get('backend') == 'S3Express'
33023302

33033303

3304-
def has_checksum_header(params):
3304+
def get_checksum_header_algorithms(params):
33053305
"""
3306-
Checks if a header starting with "x-amz-checksum-" is provided in a request.
3306+
Returns the a list of algorithm name if a headers starting with "x-amz-checksum-"
3307+
are provided in a request, otherwise returns an empty list.
33073308
33083309
This function is considered private and subject to abrupt breaking changes or
33093310
removal without prior announcement. Please do not use it directly.
33103311
"""
33113312
headers = params['headers']
3313+
checksum_headers = []
33123314

33133315
# If a header matching the x-amz-checksum-* pattern is present, we
3314-
# assume a checksum has already been provided by the user.
3316+
# extract and return the algorithm name.
33153317
for header in headers:
3316-
if CHECKSUM_HEADER_PATTERN.match(header):
3317-
return True
3318+
match = CHECKSUM_HEADER_PATTERN.match(header)
3319+
if match:
3320+
checksum_headers.append(match.group(1))
3321+
return checksum_headers
33183322

3319-
return False
3323+
3324+
def has_checksum_header(params):
3325+
"""
3326+
Checks if a header starting with "x-amz-checksum-" is provided in a request.
3327+
3328+
This function is considered private and subject to abrupt breaking changes or
3329+
removal without prior announcement. Please do not use it directly.
3330+
"""
3331+
return bool(get_checksum_header_algorithms(params))
33203332

33213333

33223334
def conditionally_calculate_checksum(params, **kwargs):

0 commit comments

Comments
 (0)