Skip to content

Commit 670e1e3

Browse files
test: add parametrized tests for gzip auto-detection without Content-Encoding header
Co-Authored-By: syed.khadeer@airbyte.io <cloud-support@airbyte.io>
1 parent e622087 commit 670e1e3

1 file changed

Lines changed: 77 additions & 0 deletions

File tree

unit_tests/sources/declarative/decoders/test_composite_decoder.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,83 @@ def test_composite_raw_decoder_csv_parser_without_mocked_response():
332332
thread.join(timeout=5) # ensure thread is cleaned up
333333

334334

335+
@pytest.mark.parametrize(
336+
"content,inner_parser,stream,use_by_headers,expected_count",
337+
[
338+
pytest.param(
339+
generate_csv(encoding="utf-8", delimiter="\t", should_compress=True),
340+
CsvParser(encoding="utf-8", delimiter="\\t"),
341+
True,
342+
False,
343+
3,
344+
id="gzip_csv_no_header_streamed",
345+
),
346+
pytest.param(
347+
generate_csv(encoding="iso-8859-1", delimiter="\t", should_compress=True),
348+
CsvParser(encoding="iso-8859-1", delimiter="\\t"),
349+
True,
350+
False,
351+
3,
352+
id="gzip_csv_no_header_iso_encoding",
353+
),
354+
pytest.param(
355+
generate_compressed_jsonlines(),
356+
JsonLineParser(),
357+
True,
358+
True,
359+
3,
360+
id="gzip_jsonl_no_header_by_headers_fallback",
361+
),
362+
pytest.param(
363+
"".join(generate_jsonlines()).encode("utf-8"),
364+
JsonLineParser(),
365+
True,
366+
False,
367+
3,
368+
id="non_gzip_passthrough",
369+
),
370+
pytest.param(
371+
generate_compressed_jsonlines(),
372+
JsonLineParser(),
373+
False,
374+
False,
375+
3,
376+
id="gzip_no_header_non_streamed",
377+
),
378+
pytest.param(
379+
b"",
380+
JsonLineParser(),
381+
True,
382+
False,
383+
0,
384+
id="empty_data",
385+
),
386+
],
387+
)
388+
def test_gzip_parser_auto_detection(
389+
requests_mock, content, inner_parser, stream, use_by_headers, expected_count
390+
):
391+
requests_mock.register_uri(
392+
"GET",
393+
"https://airbyte.io/",
394+
content=content,
395+
)
396+
response = requests.get("https://airbyte.io/", stream=stream)
397+
398+
parser = GzipParser(inner_parser=inner_parser)
399+
if use_by_headers:
400+
composite_raw_decoder = CompositeRawDecoder.by_headers(
401+
[({"Content-Encoding"}, {"gzip"}, parser)],
402+
stream_response=stream,
403+
fallback_parser=parser,
404+
)
405+
else:
406+
composite_raw_decoder = CompositeRawDecoder(parser=parser, stream_response=stream)
407+
408+
parsed_records = list(composite_raw_decoder.decode(response))
409+
assert len(parsed_records) == expected_count
410+
411+
335412
def test_given_response_already_consumed_when_decode_then_no_data_is_returned(requests_mock):
336413
requests_mock.register_uri(
337414
"GET", "https://airbyte.io/", content=json.dumps({"test": "test"}).encode()

0 commit comments

Comments
 (0)