Skip to content

Commit 5bb017a

Browse files
[Storage] Add support for CRC64 content validation (#47001)
* [Storage] [STG 102] Create File with Data (#44901) * [Storage][102] CRC64 content validation - part 1 (#45096) * [Storage][102] CRC64 content validation - part 2 (#45567) * [Storage][102] CRC64 content validation - part 3 (#45861) * [Storage][102] CRC64 content validation - part 4 (#45949) * [Storage] [STG 102] Added Support for IPv6 Accounts + Data Lake Tag Tests (#45766) * Fixed test collection bug * [Storage][102] CRC64 content validation - part 5 - datalake (#46034) * [Storage][102] CRC64 content validation - part 6 - file-share (#46262) * [Storage][102] CRC64 content validation - part 7 - record tests (#46461) * [Storage] Simplify Encoder seek, fix SM streaming retry (#46564) * [Storage] Cleanup and prepare content validation for merge to main (#46971) * Fix changelogs after merge * Tools black, Copilot feedback --------- Co-authored-by: Peter Wu <162184229+weirongw23-msft@users.noreply.github.com> Co-authored-by: Peter Wu <weirongwu@microsoft.com>
1 parent 2dc9ce7 commit 5bb017a

76 files changed

Lines changed: 8224 additions & 648 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

eng/tools/azure-sdk-tools/devtools_testutils/storage/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
from .api_version_policy import ApiVersionAssertPolicy
2+
from .decorators import GenericTestProxyParametrize1, GenericTestProxyParametrize2
23
from .service_versions import service_version_map, ServiceVersion, is_version_before
34
from .testcase import StorageRecordedTestCase, LogCaptured
45

56
__all__ = [
67
"ApiVersionAssertPolicy",
8+
"GenericTestProxyParametrize1",
9+
"GenericTestProxyParametrize2",
710
"service_version_map",
811
"StorageRecordedTestCase",
912
"ServiceVersion",
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from .asynctestcase import AsyncStorageRecordedTestCase
2+
from .asyncdecorators import GenericTestProxyParametrize1, GenericTestProxyParametrize2
23

3-
__all__ = ["AsyncStorageRecordedTestCase"]
4+
__all__ = ["AsyncStorageRecordedTestCase", "GenericTestProxyParametrize1", "GenericTestProxyParametrize2"]
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for
4+
# license information.
5+
# --------------------------------------------------------------------------
6+
7+
8+
class GenericTestProxyParametrize1:
9+
def __call__(self, fn):
10+
async def _wrapper(test_class, a, **kwargs):
11+
await fn(test_class, a, **kwargs)
12+
13+
return _wrapper
14+
15+
16+
class GenericTestProxyParametrize2:
17+
def __call__(self, fn):
18+
async def _wrapper(test_class, a, b, **kwargs):
19+
await fn(test_class, a, b, **kwargs)
20+
21+
return _wrapper
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# -------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for
4+
# license information.
5+
# --------------------------------------------------------------------------
6+
7+
8+
class GenericTestProxyParametrize1:
9+
def __call__(self, fn):
10+
def _wrapper(test_class, a, **kwargs):
11+
return fn(test_class, a, **kwargs)
12+
13+
return _wrapper
14+
15+
16+
class GenericTestProxyParametrize2:
17+
def __call__(self, fn):
18+
def _wrapper(test_class, a, b, **kwargs):
19+
return fn(test_class, a, b, **kwargs)
20+
21+
return _wrapper

sdk/storage/azure-storage-blob/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/storage/azure-storage-blob",
5-
"Tag": "python/storage/azure-storage-blob_28cfcca089"
5+
"Tag": "python/storage/azure-storage-blob_b09e37b521"
66
}

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py

Lines changed: 34 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
from ._quick_query_helper import BlobQueryReader
6666
from ._shared.base_client import parse_connection_str, StorageAccountHostsMixin, TransportWrapper
6767
from ._shared.response_handlers import process_storage_error, return_response_headers
68+
from ._shared.validation import is_crc64_validation, parse_validation_option
6869
from ._serialize import (
6970
get_access_conditions,
7071
get_api_version,
@@ -505,15 +506,11 @@ def upload_blob(
505506
:keyword ~azure.storage.blob.ContentSettings content_settings:
506507
ContentSettings object used to set blob properties. Used to set content type, encoding,
507508
language, disposition, md5, and cache control.
508-
:keyword bool validate_content:
509-
If true, calculates an MD5 hash for each chunk of the blob. The storage
510-
service checks the hash of the content that has arrived with the hash
511-
that was sent. This is primarily valuable for detecting bitflips on
512-
the wire if using http instead of https, as https (the default), will
513-
already validate. Note that this MD5 hash is not stored with the
514-
blob. Also note that if enabled, the memory-efficient upload algorithm
515-
will not be used because computing the MD5 hash requires buffering
516-
entire blocks, and doing so defeats the purpose of the memory-efficient algorithm.
509+
:keyword validate_content:
510+
Enables checksum validation for the transfer. Any checksum calculated is NOT stored with the blob.
511+
Choose "auto" (let the SDK choose the best algorithm), "crc64", or "md5". The use of bool is deprecated.
512+
NOTE: The use of "auto" or "crc64" requires the `azure-storage-extensions` package to be installed.
513+
:paramtype validate_content: Union[bool, Literal['auto', 'crc64', 'md5']]
517514
:keyword lease:
518515
Required if the blob has an active lease. If specified, upload_blob only succeeds if the
519516
blob's lease is active and matches this ID. Value can be a BlobLeaseClient object
@@ -616,6 +613,9 @@ def upload_blob(
616613
raise ValueError("Encryption required but no key was provided.")
617614
if kwargs.get('cpk') and self.scheme.lower() != 'https':
618615
raise ValueError("Customer provided encryption key must be used over HTTPS.")
616+
validate_content = parse_validation_option(kwargs.pop('validate_content', None))
617+
if is_crc64_validation(validate_content) and self.key_encryption_key:
618+
raise ValueError("Using encryption and content validation together is not currently supported.")
619619
options = _upload_blob_options(
620620
data=data,
621621
blob_type=blob_type,
@@ -627,6 +627,7 @@ def upload_blob(
627627
'key': self.key_encryption_key,
628628
'resolver': self.key_resolver_function
629629
},
630+
validate_content=validate_content,
630631
config=self._config,
631632
sdk_moniker=self._sdk_moniker,
632633
client=self._client,
@@ -683,15 +684,11 @@ def download_blob(
683684
684685
This keyword argument was introduced in API version '2019-12-12'.
685686
686-
:keyword bool validate_content:
687-
If true, calculates an MD5 hash for each chunk of the blob. The storage
688-
service checks the hash of the content that has arrived with the hash
689-
that was sent. This is primarily valuable for detecting bitflips on
690-
the wire if using http instead of https, as https (the default), will
691-
already validate. Note that this MD5 hash is not stored with the
692-
blob. Also note that if enabled, the memory-efficient upload algorithm
693-
will not be used because computing the MD5 hash requires buffering
694-
entire blocks, and doing so defeats the purpose of the memory-efficient algorithm.
687+
:keyword validate_content:
688+
Enables checksum validation for the transfer. Any checksum calculated is NOT stored with the blob.
689+
Choose "auto" (let the SDK choose the best algorithm), "crc64", or "md5". The use of bool is deprecated.
690+
NOTE: The use of "auto" or "crc64" requires the `azure-storage-extensions` package to be installed.
691+
:paramtype validate_content: Union[bool, Literal['auto', 'crc64', 'md5']]
695692
:keyword lease:
696693
Required if the blob has an active lease. If specified, download_blob only
697694
succeeds if the blob's lease is active and matches this ID. Value can be a
@@ -765,6 +762,9 @@ def download_blob(
765762
raise ValueError("Offset value must not be None if length is set.")
766763
if kwargs.get('cpk') and self.scheme.lower() != 'https':
767764
raise ValueError("Customer provided encryption key must be used over HTTPS.")
765+
validate_content = parse_validation_option(kwargs.pop('validate_content', None))
766+
if is_crc64_validation(validate_content) and self.key_encryption_key:
767+
raise ValueError("Using encryption and content validation together is not currently supported.")
768768
options = _download_blob_options(
769769
blob_name=self.blob_name,
770770
container_name=self.container_name,
@@ -778,6 +778,7 @@ def download_blob(
778778
'key': self.key_encryption_key,
779779
'resolver': self.key_resolver_function
780780
},
781+
validate_content=validate_content,
781782
config=self._config,
782783
sdk_moniker=self._sdk_moniker,
783784
client=self._client,
@@ -2009,15 +2010,11 @@ def stage_block(
20092010
:param int length:
20102011
Size of the block. Optional if the length of data can be determined. For Iterable and IO, if the
20112012
length is not provided and cannot be determined, all data will be read into memory.
2012-
:keyword bool validate_content:
2013-
If true, calculates an MD5 hash for each chunk of the blob. The storage
2014-
service checks the hash of the content that has arrived with the hash
2015-
that was sent. This is primarily valuable for detecting bitflips on
2016-
the wire if using http instead of https, as https (the default), will
2017-
already validate. Note that this MD5 hash is not stored with the
2018-
blob. Also note that if enabled, the memory-efficient upload algorithm
2019-
will not be used because computing the MD5 hash requires buffering
2020-
entire blocks, and doing so defeats the purpose of the memory-efficient algorithm.
2013+
:keyword validate_content:
2014+
Enables checksum validation for the transfer. Any checksum calculated is NOT stored with the blob.
2015+
Choose "auto" (let the SDK choose the best algorithm), "crc64", or "md5". The use of bool is deprecated.
2016+
NOTE: The use of "auto" or "crc64" requires the `azure-storage-extensions` package to be installed.
2017+
:paramtype validate_content: Union[bool, Literal['auto', 'crc64', 'md5']]
20212018
:keyword lease:
20222019
Required if the blob has an active lease. Value can be a BlobLeaseClient object
20232020
or the lease ID as a string.
@@ -2850,13 +2847,11 @@ def upload_page(
28502847
Required if the blob has an active lease. Value can be a BlobLeaseClient object
28512848
or the lease ID as a string.
28522849
:paramtype lease: ~azure.storage.blob.BlobLeaseClient or str
2853-
:keyword bool validate_content:
2854-
If true, calculates an MD5 hash of the page content. The storage
2855-
service checks the hash of the content that has arrived
2856-
with the hash that was sent. This is primarily valuable for detecting
2857-
bitflips on the wire if using http instead of https, as https (the default),
2858-
will already validate. Note that this MD5 hash is not stored with the
2859-
blob.
2850+
:keyword validate_content:
2851+
Enables checksum validation for the transfer. Any checksum calculated is NOT stored with the blob.
2852+
Choose "auto" (let the SDK choose the best algorithm), "crc64", or "md5". The use of bool is deprecated.
2853+
NOTE: The use of "auto" or "crc64" requires the `azure-storage-extensions` package to be installed.
2854+
:paramtype validate_content: Union[bool, Literal['auto', 'crc64', 'md5']]
28602855
:keyword int if_sequence_number_lte:
28612856
If the blob's sequence number is less than or equal to
28622857
the specified value, the request proceeds; otherwise it fails.
@@ -3157,13 +3152,11 @@ def append_block(
31573152
:param int length:
31583153
Size of the block. Optional if the length of data can be determined. For Iterable and IO, if the
31593154
length is not provided and cannot be determined, all data will be read into memory.
3160-
:keyword bool validate_content:
3161-
If true, calculates an MD5 hash of the block content. The storage
3162-
service checks the hash of the content that has arrived
3163-
with the hash that was sent. This is primarily valuable for detecting
3164-
bitflips on the wire if using http instead of https, as https (the default),
3165-
will already validate. Note that this MD5 hash is not stored with the
3166-
blob.
3155+
:keyword validate_content:
3156+
Enables checksum validation for the transfer. Any checksum calculated is NOT stored with the blob.
3157+
Choose "auto" (let the SDK choose the best algorithm), "crc64", or "md5". The use of bool is deprecated.
3158+
NOTE: The use of "auto" or "crc64" requires the `azure-storage-extensions` package to be installed.
3159+
:paramtype validate_content: Union[bool, Literal['auto', 'crc64', 'md5']]
31673160
:keyword int maxsize_condition:
31683161
Optional conditional header. The max length in bytes permitted for
31693162
the append blob. If the Append Block operation would cause the blob

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.pyi

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
173173
tags: Optional[Dict[str, str]] = None,
174174
overwrite: bool = False,
175175
content_settings: Optional[ContentSettings] = None,
176-
validate_content: bool = False,
176+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
177177
lease: Optional[BlobLeaseClient] = None,
178178
if_modified_since: Optional[datetime] = None,
179179
if_unmodified_since: Optional[datetime] = None,
@@ -200,7 +200,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
200200
length: Optional[int] = None,
201201
*,
202202
version_id: Optional[str] = None,
203-
validate_content: bool = False,
203+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
204204
lease: Optional[Union[BlobLeaseClient, str]] = None,
205205
if_modified_since: Optional[datetime] = None,
206206
if_unmodified_since: Optional[datetime] = None,
@@ -222,7 +222,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
222222
length: Optional[int] = None,
223223
*,
224224
version_id: Optional[str] = None,
225-
validate_content: bool = False,
225+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
226226
lease: Optional[Union[BlobLeaseClient, str]] = None,
227227
if_modified_since: Optional[datetime] = None,
228228
if_unmodified_since: Optional[datetime] = None,
@@ -244,7 +244,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
244244
length: Optional[int] = None,
245245
*,
246246
version_id: Optional[str] = None,
247-
validate_content: bool = False,
247+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
248248
lease: Optional[Union[BlobLeaseClient, str]] = None,
249249
if_modified_since: Optional[datetime] = None,
250250
if_unmodified_since: Optional[datetime] = None,
@@ -486,7 +486,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
486486
data: Union[bytes, Iterable[bytes], IO[bytes]],
487487
length: Optional[int] = None,
488488
*,
489-
validate_content: Optional[bool] = None,
489+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
490490
lease: Optional[Union[BlobLeaseClient, str]] = None,
491491
encoding: Optional[str] = None,
492492
cpk: Optional[CustomerProvidedEncryptionKey] = None,
@@ -671,7 +671,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
671671
length: int,
672672
*,
673673
lease: Optional[Union[BlobLeaseClient, str]] = None,
674-
validate_content: Optional[bool] = None,
674+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
675675
if_sequence_number_lte: Optional[int] = None,
676676
if_sequence_number_lt: Optional[int] = None,
677677
if_sequence_number_eq: Optional[int] = None,
@@ -741,7 +741,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
741741
data: Union[bytes, Iterable[bytes], IO[bytes]],
742742
length: Optional[int] = None,
743743
*,
744-
validate_content: Optional[bool] = None,
744+
validate_content: Optional[Union[bool, Literal['auto', 'crc64', 'md5']]] = None,
745745
maxsize_condition: Optional[int] = None,
746746
appendpos_condition: Optional[int] = None,
747747
lease: Optional[Union[BlobLeaseClient, str]] = None,

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client_helpers.py

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from ._shared.response_handlers import return_headers_and_deserialized, return_response_headers
5959
from ._shared.uploads import IterStreamer
6060
from ._shared.uploads_async import AsyncIterStreamer
61+
from ._shared.validation import CV_TYPE_PARSED, parse_validation_option
6162
from ._upload_helpers import _any_conditions
6263

6364
if TYPE_CHECKING:
@@ -110,6 +111,7 @@ def _upload_blob_options( # pylint:disable=too-many-statements
110111
length: Optional[int],
111112
metadata: Optional[Dict[str, str]],
112113
encryption_options: Dict[str, Any],
114+
validate_content: CV_TYPE_PARSED,
113115
config: "StorageConfiguration",
114116
sdk_moniker: str,
115117
client: "AzureBlobStorage",
@@ -135,7 +137,6 @@ def _upload_blob_options( # pylint:disable=too-many-statements
135137
else:
136138
raise TypeError(f"Unsupported data type: {type(data)}")
137139

138-
validate_content = kwargs.pop('validate_content', False)
139140
content_settings = kwargs.pop('content_settings', None)
140141
overwrite = kwargs.pop('overwrite', False)
141142
max_concurrency = kwargs.pop('max_concurrency', None)
@@ -258,42 +259,16 @@ def _download_blob_options(
258259
length: Optional[int],
259260
encoding: Optional[str],
260261
encryption_options: Dict[str, Any],
262+
validate_content: CV_TYPE_PARSED,
261263
config: "StorageConfiguration",
262264
sdk_moniker: str,
263265
client: "AzureBlobStorage",
264266
**kwargs
265267
) -> Dict[str, Any]:
266-
"""Creates a dictionary containing the options for a download blob operation.
267-
268-
:param str blob_name:
269-
The name of the blob.
270-
:param str container_name:
271-
The name of the container.
272-
:param Optional[str] version_id:
273-
The version id parameter is a value that, when present, specifies the version of the blob to download.
274-
:param Optional[int] offset:
275-
Start of byte range to use for downloading a section of the blob. Must be set if length is provided.
276-
:param Optional[int] length:
277-
Number of bytes to read from the stream. This is optional, but should be supplied for optimal performance.
278-
:param Optional[str] encoding:
279-
Encoding to decode the downloaded bytes. Default is None, i.e. no decoding.
280-
:param Dict[str, Any] encryption_options:
281-
The options for encryption, if enabled.
282-
:param StorageConfiguration config:
283-
The Storage configuration options.
284-
:param str sdk_moniker:
285-
The string representing the SDK package version.
286-
:param AzureBlobStorage client:
287-
The generated Blob Storage client.
288-
:return: A dictionary containing the download blob options.
289-
:rtype: Dict[str, Any]
290-
"""
291268
if length is not None:
292269
if offset is None:
293270
raise ValueError("Offset must be provided if length is provided.")
294271
length = offset + length - 1 # Service actually uses an end-range inclusive index
295-
296-
validate_content = kwargs.pop('validate_content', False)
297272
access_conditions = get_access_conditions(kwargs.pop('lease', None))
298273
mod_conditions = get_modify_conditions(kwargs)
299274

@@ -721,7 +696,7 @@ def _stage_block_options(
721696
if isinstance(data, bytes):
722697
data = data[:length]
723698

724-
validate_content = kwargs.pop('validate_content', False)
699+
validate_content = parse_validation_option(kwargs.pop('validate_content', None))
725700
cpk_scope_info = get_cpk_scope_info(kwargs)
726701
cpk = kwargs.pop('cpk', None)
727702
cpk_info = None
@@ -1004,7 +979,7 @@ def _upload_page_options(
1004979
)
1005980
mod_conditions = get_modify_conditions(kwargs)
1006981
cpk_scope_info = get_cpk_scope_info(kwargs)
1007-
validate_content = kwargs.pop('validate_content', False)
982+
validate_content = parse_validation_option(kwargs.pop('validate_content', None))
1008983
cpk = kwargs.pop('cpk', None)
1009984
cpk_info = None
1010985
if cpk:
@@ -1149,7 +1124,7 @@ def _append_block_options(
11491124

11501125
appendpos_condition = kwargs.pop('appendpos_condition', None)
11511126
maxsize_condition = kwargs.pop('maxsize_condition', None)
1152-
validate_content = kwargs.pop('validate_content', False)
1127+
validate_content = parse_validation_option(kwargs.pop('validate_content', None))
11531128
append_conditions = None
11541129
if maxsize_condition or appendpos_condition is not None:
11551130
append_conditions = AppendPositionAccessConditions(

0 commit comments

Comments
 (0)