2727use Google \Cloud \Core \Upload \ResumableUploader ;
2828use Google \Cloud \Core \Upload \StreamableUploader ;
2929use Google \Cloud \Core \UriTrait ;
30+ use Google \Cloud \Storage \HashValidatingStream ;
3031use Google \Cloud \Storage \StorageClient ;
3132use GuzzleHttp \Exception \RequestException ;
3233use GuzzleHttp \Psr7 \MimeType ;
@@ -331,6 +332,7 @@ public function downloadObject(array $args = [])
331332 $ requestedBytes = $ this ->getRequestedBytes ($ args );
332333 $ resultStream = Utils::streamFor (null );
333334 $ transcodedObj = false ;
335+ $ hashHeader = null ;
334336
335337 $ args ['retryStrategy ' ] ??= $ this ->retryStrategy ;
336338
@@ -339,12 +341,17 @@ public function downloadObject(array $args = [])
339341 $ invocationId = Uuid::uuid4 ()->toString ();
340342 $ requestOptions ['retryHeaders ' ] = self ::getRetryHeaders ($ invocationId , 1 );
341343 $ requestOptions ['restRetryFunction ' ] = $ this ->getRestRetryFunction ('objects ' , 'get ' , $ args );
342- // We try to deduce if the object is a transcoded object when we receive the headers.
343- $ requestOptions ['restOptions ' ]['on_headers ' ] = function ($ response ) use (&$ transcodedObj ) {
344+ // We try to deduce if the object is a transcoded object
345+ // and capture the X-Goog-Hash when we receive the headers.
346+ $ requestOptions ['restOptions ' ]['on_headers ' ] = function ($ response ) use (&$ transcodedObj , &$ hashHeader ) {
344347 $ header = $ response ->getHeader (self ::TRANSCODED_OBJ_HEADER_KEY );
345348 if (is_array ($ header ) && in_array (self ::TRANSCODED_OBJ_HEADER_VAL , $ header )) {
346349 $ transcodedObj = true ;
347350 }
351+ $ hash = $ response ->getHeaderLine ('X-Goog-Hash ' );
352+ if ($ hash ) {
353+ $ hashHeader = $ hash ;
354+ }
348355 };
349356 $ attempt = null ;
350357 $ requestOptions ['restRetryListener ' ] = function (
@@ -383,30 +390,111 @@ public function downloadObject(array $args = [])
383390 }
384391 };
385392
386- $ fetchedStream = $ this ->requestWrapper ->send (
393+ $ response = $ this ->requestWrapper ->send (
387394 $ request ,
388395 $ requestOptions
389- )->getBody ();
396+ );
397+ $ fetchedStream = $ response ->getBody ();
390398
391399 // If no retry attempt was made, then we can return the stream as is.
392400 // This is important in the case where downloadObject is called to open
393401 // the file but not to read from it yet.
394402 if ($ attempt === null ) {
395- return $ fetchedStream ;
403+ return $ this ->maybeWrapWithHashValidatingStream (
404+ $ fetchedStream ,
405+ $ args ,
406+ $ response ,
407+ $ hashHeader ,
408+ $ transcodedObj
409+ );
396410 }
397411
398412 // If our object is a transcoded object, then Range headers are not honoured.
399413 // That means even if we had a partial download available, the final obj
400414 // that was fetched will contain the complete object. So, we don't need to copy
401415 // the partial stream, we can just return the stream we fetched.
402416 if ($ transcodedObj ) {
403- return $ fetchedStream ;
417+ return $ this ->maybeWrapWithHashValidatingStream (
418+ $ fetchedStream ,
419+ $ args ,
420+ $ response ,
421+ $ hashHeader ,
422+ $ transcodedObj
423+ );
404424 }
405425
406426 Utils::copyToStream ($ fetchedStream , $ resultStream );
407427
408428 $ resultStream ->seek (0 );
409- return $ resultStream ;
429+ return $ this ->maybeWrapWithHashValidatingStream (
430+ $ resultStream ,
431+ $ args ,
432+ $ response ,
433+ $ hashHeader ,
434+ $ transcodedObj
435+ );
436+ }
437+
438+ /**
439+ * Wrap the download stream in a HashValidatingStream if validation is enabled.
440+ */
441+ private function maybeWrapWithHashValidatingStream (
442+ StreamInterface $ stream ,
443+ array $ args ,
444+ ResponseInterface $ response ,
445+ $ hashHeader = null ,
446+ $ transcodedObj = false
447+ ) {
448+ $ validate = $ args ['validate ' ] ?? 'crc32 ' ;
449+ if ($ validate === false || $ validate === 'none ' ) {
450+ return $ stream ;
451+ }
452+
453+ // Skip validation if the user requested a subrange of the object
454+ $ requestedBytes = $ this ->getRequestedBytes ($ args );
455+ if ($ requestedBytes ['startByte ' ] > 0 || $ requestedBytes ['endByte ' ] !== '' ) {
456+ return $ stream ;
457+ }
458+
459+ // Skip validation if the object is a transcoded object (served decompressed, stored compressed)
460+ if ($ transcodedObj || $ response ->hasHeader (self ::TRANSCODED_OBJ_HEADER_KEY )) {
461+ return $ stream ;
462+ }
463+
464+ $ hashHeader = $ hashHeader ?: $ response ->getHeaderLine ('X-Goog-Hash ' );
465+ if (!$ hashHeader ) {
466+ return $ stream ;
467+ }
468+
469+ $ hashes = [];
470+ $ parts = explode (', ' , $ hashHeader );
471+ foreach ($ parts as $ part ) {
472+ $ kv = explode ('= ' , trim ($ part ), 2 );
473+ if (count ($ kv ) === 2 ) {
474+ $ hashes [$ kv [0 ]] = $ kv [1 ];
475+ }
476+ }
477+
478+ $ options = [];
479+ $ crc32cSupported = in_array ('crc32c ' , hash_algos ());
480+
481+ if ($ validate === 'md5 ' ) {
482+ if (isset ($ hashes ['md5 ' ])) {
483+ $ options ['expectedMd5 ' ] = $ hashes ['md5 ' ];
484+ }
485+ } elseif ($ validate === 'crc32 ' || $ validate === 'crc32c ' || $ validate === true ) {
486+ if ($ crc32cSupported && isset ($ hashes ['crc32c ' ])) {
487+ $ options ['expectedCrc32c ' ] = $ hashes ['crc32c ' ];
488+ } elseif (isset ($ hashes ['md5 ' ])) {
489+ $ options ['expectedMd5 ' ] = $ hashes ['md5 ' ];
490+ }
491+ }
492+
493+ if (empty ($ options )) {
494+ return $ stream ;
495+ }
496+
497+ return new HashValidatingStream ($ stream , $ options );
410498 }
411499
412500 /**
@@ -418,13 +506,34 @@ public function downloadObject(array $args = [])
418506 */
419507 public function downloadObjectAsync (array $ args = [])
420508 {
509+ $ transcodedObj = false ;
510+ $ hashHeader = null ;
421511 list ($ request , $ requestOptions ) = $ this ->buildDownloadObjectParams ($ args );
422512
513+ // We try to deduce if the object is a transcoded object
514+ // and capture the X-Goog-Hash when we receive the headers.
515+ $ requestOptions ['restOptions ' ]['on_headers ' ] = function ($ response ) use (&$ transcodedObj , &$ hashHeader ) {
516+ $ header = $ response ->getHeader (self ::TRANSCODED_OBJ_HEADER_KEY );
517+ if (is_array ($ header ) && in_array (self ::TRANSCODED_OBJ_HEADER_VAL , $ header )) {
518+ $ transcodedObj = true ;
519+ }
520+ $ hash = $ response ->getHeaderLine ('X-Goog-Hash ' );
521+ if ($ hash ) {
522+ $ hashHeader = $ hash ;
523+ }
524+ };
525+
423526 return $ this ->requestWrapper ->sendAsync (
424527 $ request ,
425528 $ requestOptions
426- )->then (function (ResponseInterface $ response ) {
427- return $ response ->getBody ();
529+ )->then (function (ResponseInterface $ response ) use ($ args , &$ hashHeader , &$ transcodedObj ) {
530+ return $ this ->maybeWrapWithHashValidatingStream (
531+ $ response ->getBody (),
532+ $ args ,
533+ $ response ,
534+ $ hashHeader ,
535+ $ transcodedObj
536+ );
428537 });
429538 }
430539
0 commit comments