Skip to content

Commit abed7f6

Browse files
fix(decoder): use gzip_parser as fallback in GzipDecoder instead of inner_parser
When GzipDecoder is explicitly configured and the HTTP response lacks gzip-related headers (e.g., S3 pre-signed URLs returning binary/octet-stream), the fallback_parser should still decompress the data since the user has explicitly declared they expect gzip content. Co-Authored-By: unknown <>
1 parent 15542de commit abed7f6

2 files changed

Lines changed: 21 additions & 1 deletion

File tree

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2642,7 +2642,7 @@ def create_gzip_decoder(
26422642
return CompositeRawDecoder.by_headers(
26432643
[({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
26442644
stream_response=True,
2645-
fallback_parser=gzip_parser.inner_parser,
2645+
fallback_parser=gzip_parser,
26462646
)
26472647

26482648
@staticmethod

unit_tests/sources/declarative/decoders/test_composite_decoder.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,26 @@ def test_composite_raw_decoder_csv_parser_without_mocked_response():
332332
thread.join(timeout=5) # ensure thread is cleaned up
333333

334334

335+
def test_gzip_decoder_by_headers_fallback_decompresses_when_no_gzip_header(requests_mock):
336+
requests_mock.register_uri(
337+
"GET",
338+
"https://airbyte.io/",
339+
content=generate_csv(should_compress=True),
340+
headers={"Content-Type": "binary/octet-stream"},
341+
)
342+
response = requests.get("https://airbyte.io/", stream=True)
343+
344+
gzip_parser = GzipParser(inner_parser=CsvParser())
345+
composite_raw_decoder = CompositeRawDecoder.by_headers(
346+
[({"Content-Encoding", "Content-Type"}, {"gzip", "application/gzip"}, gzip_parser)],
347+
stream_response=True,
348+
fallback_parser=gzip_parser,
349+
)
350+
parsed_records = list(composite_raw_decoder.decode(response))
351+
assert len(parsed_records) == 3
352+
assert parsed_records[0] == {"id": "1", "name": "John", "age": "28"}
353+
354+
335355
def test_given_response_already_consumed_when_decode_then_no_data_is_returned(requests_mock):
336356
requests_mock.register_uri(
337357
"GET", "https://airbyte.io/", content=json.dumps({"test": "test"}).encode()

0 commit comments

Comments
 (0)