From 551677280f748b5ffdb3b85d3dadf4df02c056c9 Mon Sep 17 00:00:00 2001 From: Nico Kemnitz Date: Thu, 25 Jun 2026 17:15:28 +0200 Subject: [PATCH] fix: accept GCS gzip responses without Content-Length (#774) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCS serves large objects stored with `Content-Encoding: gzip` using chunked transfer with no `Content-Length` (and decompressive transcoding when the client does not accept gzip encoding). The GET path required `Content-Length` unconditionally and failed with `MissingContentLength`, even though a chunked body is a valid self-delimiting response (RFC 9112 §6.2 forbids `Content-Length` with `Transfer-Encoding: chunked`). Add `HeaderConfig::stored_size_header`: when `Content-Length` is absent the size falls back to this header. GCS sets it to `x-goog-stored-content-length` (always present); S3, Azure and HTTP leave it `None`, so a missing `Content-Length` remains an error for them. This fixes the reported `MissingContentLength` failure. Some transcoded GCS responses also omit the ETag and still fail with `MissingEtag`; that is left for a follow-up. Co-Authored-By: Claude Opus 4.8 --- src/aws/client.rs | 1 + src/azure/client.rs | 1 + src/client/get.rs | 49 ++++++++++++++++++++++++++++++++++++++++++++ src/client/header.rs | 12 +++++++++++ src/gcp/client.rs | 4 ++++ src/http/client.rs | 1 + 6 files changed, 68 insertions(+) diff --git a/src/aws/client.rs b/src/aws/client.rs index 4ea3b888..65b6a467 100644 --- a/src/aws/client.rs +++ b/src/aws/client.rs @@ -922,6 +922,7 @@ impl GetClient for S3Client { const HEADER_CONFIG: HeaderConfig = HeaderConfig { etag_required: false, last_modified_required: false, + stored_size_header: None, version_header: Some(VERSION_HEADER), user_defined_metadata_prefix: Some(USER_DEFINED_METADATA_HEADER_PREFIX), }; diff --git a/src/azure/client.rs b/src/azure/client.rs index 1c21fa9e..8b96f4d6 100644 --- a/src/azure/client.rs +++ b/src/azure/client.rs @@ -1075,6 +1075,7 @@ impl GetClient for AzureClient { const HEADER_CONFIG: HeaderConfig = HeaderConfig { etag_required: true, last_modified_required: true, + stored_size_header: None, version_header: Some(VERSION_HEADER), user_defined_metadata_prefix: Some(USER_DEFINED_METADATA_HEADER_PREFIX), }; diff --git a/src/client/get.rs b/src/client/get.rs index c7ec36d7..730b4187 100644 --- a/src/client/get.rs +++ b/src/client/get.rs @@ -445,6 +445,7 @@ mod tests { const CFG: HeaderConfig = HeaderConfig { etag_required: false, last_modified_required: false, + stored_size_header: None, version_header: None, user_defined_metadata_prefix: Some("x-test-meta-"), }; @@ -507,6 +508,54 @@ mod tests { assert_eq!(err.to_string(), "Requested 2..6, got 2..4"); } + #[test] + fn test_get_missing_content_length() { + // Mirrors the GCS config: size falls back to x-goog-stored-content-length. + const RELAXED: HeaderConfig = HeaderConfig { + stored_size_header: Some("x-goog-stored-content-length"), + ..CFG + }; + let path = Path::from("test"); + + let resp = |headers: &[(&str, &str)]| { + let mut builder = http::Response::builder().status(StatusCode::OK); + for (k, v) in headers { + builder = builder.header(*k, *v); + } + builder.body(()).unwrap().into_parts().0 + }; + + // No Content-Length, stored-size header present -> best-effort size from fallback. + let r = resp(&[("x-goog-stored-content-length", "355")]); + let (range, meta) = get_range_meta(RELAXED, &path, None, &r).unwrap(); + assert_eq!(meta.size, 355); + assert_eq!(range, 0..355); + + // No Content-Length and the stored-size header also absent -> still an error. + let r = resp(&[]); + let err = get_range_meta(RELAXED, &path, None, &r).unwrap_err(); + assert_eq!( + err.to_string(), + "Content-Length Header missing from response" + ); + + // A present Content-Length always wins over the fallback. + let r = resp(&[ + ("content-length", "10"), + ("x-goog-stored-content-length", "355"), + ]); + let (_, meta) = get_range_meta(RELAXED, &path, None, &r).unwrap(); + assert_eq!(meta.size, 10); + + // With the strict default (S3/Azure), a missing Content-Length is still fatal. + let r = resp(&[]); + let err = get_range_meta(CFG, &path, None, &r).unwrap_err(); + assert_eq!( + err.to_string(), + "Content-Length Header missing from response" + ); + } + #[test] fn test_get_attributes() { let resp = make_response( diff --git a/src/client/header.rs b/src/client/header.rs index 58cc3a8b..df0bcf37 100644 --- a/src/client/header.rs +++ b/src/client/header.rs @@ -36,6 +36,14 @@ pub(crate) struct HeaderConfig { /// Defaults to `true` pub last_modified_required: bool, + /// Header to read the object size from when `Content-Length` is absent. + /// + /// GCS omits `Content-Length` on chunked `Content-Encoding: gzip` responses — large + /// bodies, or decompressive transcoding — but always sends `x-goog-stored-content-length`. + /// Stores that always send a `Content-Length` (S3, Azure) leave this `None`, so a missing + /// `Content-Length` stays an error for them. + pub stored_size_header: Option<&'static str>, + /// The version header name if any pub version_header: Option<&'static str>, @@ -139,8 +147,12 @@ pub(crate) fn header_meta( Err(e) => return Err(e), }; + // Prefer `Content-Length`, falling back to a store-provided size header: GCS omits + // `Content-Length` on chunked gzip responses (large bodies, or transcoding) but always + // sends `x-goog-stored-content-length`. Stores without such a header still require it. let content_length = headers .get(CONTENT_LENGTH) + .or_else(|| cfg.stored_size_header.and_then(|h| headers.get(h))) .ok_or(Error::MissingContentLength)?; let content_length = content_length diff --git a/src/gcp/client.rs b/src/gcp/client.rs index f41171d2..6924e588 100644 --- a/src/gcp/client.rs +++ b/src/gcp/client.rs @@ -52,6 +52,7 @@ const VERSION_HEADER: &str = "x-goog-generation"; const DEFAULT_CONTENT_TYPE: &str = "application/octet-stream"; const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-goog-meta-"; const STORAGE_CLASS: &str = "x-goog-storage-class"; +const STORED_CONTENT_LENGTH_HEADER: &str = "x-goog-stored-content-length"; static VERSION_MATCH: HeaderName = HeaderName::from_static("x-goog-if-generation-match"); @@ -617,9 +618,12 @@ impl GoogleCloudStorageClient { #[async_trait] impl GetClient for GoogleCloudStorageClient { const STORE: &'static str = STORE; + // GCS omits Content-Length on chunked gzip responses (large bodies, or decompressive + // transcoding); the size is recovered from x-goog-stored-content-length instead. const HEADER_CONFIG: HeaderConfig = HeaderConfig { etag_required: true, last_modified_required: true, + stored_size_header: Some(STORED_CONTENT_LENGTH_HEADER), version_header: Some(VERSION_HEADER), user_defined_metadata_prefix: Some(USER_DEFINED_METADATA_HEADER_PREFIX), }; diff --git a/src/http/client.rs b/src/http/client.rs index 580b7258..e976bb03 100644 --- a/src/http/client.rs +++ b/src/http/client.rs @@ -359,6 +359,7 @@ impl GetClient for Client { const HEADER_CONFIG: HeaderConfig = HeaderConfig { etag_required: false, last_modified_required: false, + stored_size_header: None, version_header: None, user_defined_metadata_prefix: None, };