@@ -332,6 +332,83 @@ def test_composite_raw_decoder_csv_parser_without_mocked_response():
332332 thread .join (timeout = 5 ) # ensure thread is cleaned up
333333
334334
335+ @pytest .mark .parametrize (
336+ "content,inner_parser,stream,use_by_headers,expected_count" ,
337+ [
338+ pytest .param (
339+ generate_csv (encoding = "utf-8" , delimiter = "\t " , should_compress = True ),
340+ CsvParser (encoding = "utf-8" , delimiter = "\\ t" ),
341+ True ,
342+ False ,
343+ 3 ,
344+ id = "gzip_csv_no_header_streamed" ,
345+ ),
346+ pytest .param (
347+ generate_csv (encoding = "iso-8859-1" , delimiter = "\t " , should_compress = True ),
348+ CsvParser (encoding = "iso-8859-1" , delimiter = "\\ t" ),
349+ True ,
350+ False ,
351+ 3 ,
352+ id = "gzip_csv_no_header_iso_encoding" ,
353+ ),
354+ pytest .param (
355+ generate_compressed_jsonlines (),
356+ JsonLineParser (),
357+ True ,
358+ True ,
359+ 3 ,
360+ id = "gzip_jsonl_no_header_by_headers_fallback" ,
361+ ),
362+ pytest .param (
363+ "" .join (generate_jsonlines ()).encode ("utf-8" ),
364+ JsonLineParser (),
365+ True ,
366+ False ,
367+ 3 ,
368+ id = "non_gzip_passthrough" ,
369+ ),
370+ pytest .param (
371+ generate_compressed_jsonlines (),
372+ JsonLineParser (),
373+ False ,
374+ False ,
375+ 3 ,
376+ id = "gzip_no_header_non_streamed" ,
377+ ),
378+ pytest .param (
379+ b"" ,
380+ JsonLineParser (),
381+ True ,
382+ False ,
383+ 0 ,
384+ id = "empty_data" ,
385+ ),
386+ ],
387+ )
388+ def test_gzip_parser_auto_detection (
389+ requests_mock , content , inner_parser , stream , use_by_headers , expected_count
390+ ):
391+ requests_mock .register_uri (
392+ "GET" ,
393+ "https://airbyte.io/" ,
394+ content = content ,
395+ )
396+ response = requests .get ("https://airbyte.io/" , stream = stream )
397+
398+ parser = GzipParser (inner_parser = inner_parser )
399+ if use_by_headers :
400+ composite_raw_decoder = CompositeRawDecoder .by_headers (
401+ [({"Content-Encoding" }, {"gzip" }, parser )],
402+ stream_response = stream ,
403+ fallback_parser = parser ,
404+ )
405+ else :
406+ composite_raw_decoder = CompositeRawDecoder (parser = parser , stream_response = stream )
407+
408+ parsed_records = list (composite_raw_decoder .decode (response ))
409+ assert len (parsed_records ) == expected_count
410+
411+
335412def test_given_response_already_consumed_when_decode_then_no_data_is_returned (requests_mock ):
336413 requests_mock .register_uri (
337414 "GET" , "https://airbyte.io/" , content = json .dumps ({"test" : "test" }).encode ()
0 commit comments