Skip to content

Commit daa44ee

Browse files
authored
Remove CSV reader warnings emitted in unit tests (rapidsai#21794)
CSV reader has an option to deduce the compression type from the file extension (default compression option, `AUTO`). However, many tests pass the default compression type when reading for a host buffer. The reader cannot determine the compression type, assumes uncompressed, and emits a warning. We don't want the noise of these warnings in the unit tests, so this PR specifies the compression type when not reading from a file. Authors: - Vukasin Milovanovic (https://github.com/vuule) Approvers: - Paul Mattione (https://github.com/pmattione-nvidia) - David Wendt (https://github.com/davidwendt) URL: rapidsai#21794
1 parent 5d5fb51 commit daa44ee

1 file changed

Lines changed: 38 additions & 4 deletions

File tree

cpp/tests/io/csv_test.cpp

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ struct CsvFixedPointReaderTest : public CsvReaderTest {
9494
cudf::io::csv_reader_options::builder(
9595
cudf::io::source_info{cudf::host_span<std::byte const>{
9696
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
97+
.compression(cudf::io::compression_type::NONE)
9798
.dtypes({data_type{type_to_id<DecimalType>(), scale}})
9899
.header(-1);
99100

@@ -1099,6 +1100,7 @@ TEST_F(CsvReaderTest, ByteRangeStrings)
10991100
cudf::io::csv_reader_options::builder(
11001101
cudf::io::source_info{cudf::host_span<std::byte const>{
11011102
reinterpret_cast<std::byte const*>(input.c_str()), input.size()}})
1103+
.compression(cudf::io::compression_type::NONE)
11021104
.names({"A"})
11031105
.dtypes({dtype<cudf::string_view>()})
11041106
.header(-1)
@@ -1214,6 +1216,7 @@ TEST_F(CsvReaderTest, StringInference)
12141216
cudf::io::csv_reader_options::builder(
12151217
cudf::io::source_info{cudf::host_span<std::byte const>{
12161218
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
1219+
.compression(cudf::io::compression_type::NONE)
12171220
.header(-1);
12181221
auto const result = cudf::io::read_csv(in_opts);
12191222

@@ -1228,6 +1231,7 @@ TEST_F(CsvReaderTest, TypeInferenceThousands)
12281231
cudf::io::csv_reader_options::builder(
12291232
cudf::io::source_info{cudf::host_span<std::byte const>{
12301233
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
1234+
.compression(cudf::io::compression_type::NONE)
12311235
.header(-1)
12321236
.thousands('`');
12331237
auto const result = cudf::io::read_csv(in_opts);
@@ -1257,6 +1261,7 @@ TEST_F(CsvReaderTest, TypeInferenceWithDecimal)
12571261
cudf::io::csv_reader_options::builder(
12581262
cudf::io::source_info{cudf::host_span<std::byte const>{
12591263
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
1264+
.compression(cudf::io::compression_type::NONE)
12601265
.header(-1)
12611266
.thousands('`')
12621267
.decimal(';');
@@ -1284,6 +1289,7 @@ TEST_F(CsvReaderTest, SkipRowsXorSkipFooter)
12841289
cudf::io::csv_reader_options::builder(
12851290
cudf::io::source_info{cudf::host_span<std::byte const>{
12861291
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
1292+
.compression(cudf::io::compression_type::NONE)
12871293
.header(-1)
12881294
.skiprows(1);
12891295
EXPECT_NO_THROW(cudf::io::read_csv(skiprows_options));
@@ -1292,6 +1298,7 @@ TEST_F(CsvReaderTest, SkipRowsXorSkipFooter)
12921298
cudf::io::csv_reader_options::builder(
12931299
cudf::io::source_info{cudf::host_span<std::byte const>{
12941300
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
1301+
.compression(cudf::io::compression_type::NONE)
12951302
.header(-1)
12961303
.skipfooter(1);
12971304
EXPECT_NO_THROW(cudf::io::read_csv(skipfooter_options));
@@ -1991,6 +1998,7 @@ TEST_F(CsvReaderTest, UserImplementedSource)
19911998
TestSource source{csv_data.str()};
19921999
cudf::io::csv_reader_options in_opts =
19932000
cudf::io::csv_reader_options::builder(cudf::io::source_info{&source})
2001+
.compression(cudf::io::compression_type::NONE)
19942002
.dtypes({dtype<int8_t>(), dtype<int16_t>(), dtype<int32_t>()})
19952003
.header(-1);
19962004
auto result = cudf::io::read_csv(in_opts);
@@ -2252,6 +2260,7 @@ TEST_F(CsvReaderTest, DtypesMap)
22522260
cudf::io::csv_reader_options::builder(
22532261
cudf::io::source_info{cudf::host_span<std::byte const>{
22542262
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2263+
.compression(cudf::io::compression_type::NONE)
22552264
.names({"A", "B"})
22562265
.dtypes({{"B", dtype<int16_t>()}, {"A", dtype<int32_t>()}})
22572266
.header(-1);
@@ -2270,6 +2279,7 @@ TEST_F(CsvReaderTest, DtypesMapPartial)
22702279
cudf::io::csv_reader_options in_opts =
22712280
cudf::io::csv_reader_options::builder(
22722281
cudf::io::source_info{cudf::host_span<std::byte const>{nullptr, 0}})
2282+
.compression(cudf::io::compression_type::NONE)
22732283
.names({"A", "B"})
22742284
.dtypes({{"A", dtype<int16_t>()}});
22752285
{
@@ -2296,6 +2306,7 @@ TEST_F(CsvReaderTest, DtypesArrayInvalid)
22962306
cudf::io::csv_reader_options in_opts =
22972307
cudf::io::csv_reader_options::builder(
22982308
cudf::io::source_info{cudf::host_span<std::byte const>{nullptr, 0}})
2309+
.compression(cudf::io::compression_type::NONE)
22992310
.names({"A", "B", "C"})
23002311
.dtypes(std::vector<cudf::data_type>{dtype<int16_t>(), dtype<int8_t>()});
23012312

@@ -2338,6 +2349,7 @@ TEST_F(CsvReaderTest, UseColsValidation)
23382349
cudf::io::csv_reader_options::builder(
23392350
cudf::io::source_info{cudf::host_span<std::byte const>{
23402351
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
2352+
.compression(cudf::io::compression_type::NONE)
23412353
.names({"a", "b"})
23422354
.use_cols_indexes({0});
23432355
EXPECT_THROW(cudf::io::read_csv(idx_cnt_options), cudf::logic_error);
@@ -2346,6 +2358,7 @@ TEST_F(CsvReaderTest, UseColsValidation)
23462358
cudf::io::csv_reader_options::builder(
23472359
cudf::io::source_info{cudf::host_span<std::byte const>{
23482360
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
2361+
.compression(cudf::io::compression_type::NONE)
23492362
.names({"a", "b"})
23502363
.use_cols_indexes({0, 0});
23512364
EXPECT_THROW(cudf::io::read_csv(unique_idx_cnt_options), cudf::logic_error);
@@ -2354,6 +2367,7 @@ TEST_F(CsvReaderTest, UseColsValidation)
23542367
cudf::io::csv_reader_options::builder(
23552368
cudf::io::source_info{cudf::host_span<std::byte const>{
23562369
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
2370+
.compression(cudf::io::compression_type::NONE)
23572371
.names({"a", "b", "c"})
23582372
.use_cols_names({"nonexistent_name"});
23592373
EXPECT_THROW(cudf::io::read_csv(bad_name_options), cudf::logic_error);
@@ -2367,6 +2381,7 @@ TEST_F(CsvReaderTest, CropColumns)
23672381
cudf::io::csv_reader_options::builder(
23682382
cudf::io::source_info{cudf::host_span<std::byte const>{
23692383
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2384+
.compression(cudf::io::compression_type::NONE)
23702385
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<float>()})
23712386
.names({"a", "b"})
23722387
.header(-1);
@@ -2388,6 +2403,7 @@ TEST_F(CsvReaderTest, CropColumnsUseColsNames)
23882403
cudf::io::csv_reader_options::builder(
23892404
cudf::io::source_info{cudf::host_span<std::byte const>{
23902405
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2406+
.compression(cudf::io::compression_type::NONE)
23912407
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<float>()})
23922408
.names({"a", "b"})
23932409
.use_cols_names({"b"})
@@ -2408,6 +2424,7 @@ TEST_F(CsvReaderTest, ExtraColumns)
24082424
cudf::io::csv_reader_options::builder(
24092425
cudf::io::source_info{cudf::host_span<std::byte const>{
24102426
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2427+
.compression(cudf::io::compression_type::NONE)
24112428
.names({"a", "b", "c", "d"})
24122429
.header(-1);
24132430
auto result = cudf::io::read_csv(opts);
@@ -2422,6 +2439,7 @@ TEST_F(CsvReaderTest, ExtraColumns)
24222439
cudf::io::csv_reader_options::builder(
24232440
cudf::io::source_info{cudf::host_span<std::byte const>{
24242441
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2442+
.compression(cudf::io::compression_type::NONE)
24252443
.names({"a", "b", "c", "d"})
24262444
.dtypes({dtype<int32_t>(), dtype<int32_t>(), dtype<int32_t>(), dtype<float>()})
24272445
.header(-1);
@@ -2443,6 +2461,7 @@ TEST_F(CsvReaderTest, ExtraColumnsUseCols)
24432461
cudf::io::csv_reader_options::builder(
24442462
cudf::io::source_info{cudf::host_span<std::byte const>{
24452463
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2464+
.compression(cudf::io::compression_type::NONE)
24462465
.names({"a", "b", "c", "d"})
24472466
.use_cols_names({"b", "d"})
24482467
.header(-1);
@@ -2458,6 +2477,7 @@ TEST_F(CsvReaderTest, ExtraColumnsUseCols)
24582477
cudf::io::csv_reader_options::builder(
24592478
cudf::io::source_info{cudf::host_span<std::byte const>{
24602479
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2480+
.compression(cudf::io::compression_type::NONE)
24612481
.names({"a", "b", "c", "d"})
24622482
.use_cols_names({"b", "d"})
24632483
.dtypes({dtype<int32_t>(), dtype<int32_t>(), dtype<int32_t>(), dtype<cudf::string_view>()})
@@ -2480,6 +2500,7 @@ TEST_F(CsvReaderTest, EmptyColumns)
24802500
cudf::io::csv_reader_options::builder(
24812501
cudf::io::source_info{cudf::host_span<std::byte const>{
24822502
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2503+
.compression(cudf::io::compression_type::NONE)
24832504
.names({"a", "b", "c", "d"})
24842505
.header(-1);
24852506
// More elements in `names` than in the file; additional columns are filled with nulls
@@ -2503,6 +2524,7 @@ TEST_F(CsvReaderTest, BlankLineAfterFirstRow)
25032524
cudf::io::csv_reader_options::builder(
25042525
cudf::io::source_info{cudf::host_span<std::byte const>{
25052526
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2527+
.compression(cudf::io::compression_type::NONE)
25062528
.header(-1);
25072529
// No header, getting column names/count from first row
25082530
auto result = cudf::io::read_csv(no_header_opts);
@@ -2512,8 +2534,10 @@ TEST_F(CsvReaderTest, BlankLineAfterFirstRow)
25122534
}
25132535
{
25142536
cudf::io::csv_reader_options header_opts =
2515-
cudf::io::csv_reader_options::builder(cudf::io::source_info{cudf::host_span<std::byte const>{
2516-
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}});
2537+
cudf::io::csv_reader_options::builder(
2538+
cudf::io::source_info{cudf::host_span<std::byte const>{
2539+
reinterpret_cast<std::byte const*>(csv_in.c_str()), csv_in.size()}})
2540+
.compression(cudf::io::compression_type::NONE);
25172541
// Getting column names/count from header
25182542
auto result = cudf::io::read_csv(header_opts);
25192543

@@ -2529,6 +2553,7 @@ TEST_F(CsvReaderTest, NullCount)
25292553
cudf::io::csv_reader_options::builder(
25302554
cudf::io::source_info{cudf::host_span<std::byte const>{
25312555
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
2556+
.compression(cudf::io::compression_type::NONE)
25322557
.header(-1);
25332558
auto const result = cudf::io::read_csv(in_opts);
25342559
auto const result_view = result.tbl->view();
@@ -2543,8 +2568,10 @@ TEST_F(CsvReaderTest, UTF8BOM)
25432568
{
25442569
std::string buffer = "\xEF\xBB\xBFMonth,Day,Year\nJune,6,2023\nAugust,25,1990\nMay,1,2000\n";
25452570
cudf::io::csv_reader_options in_opts =
2546-
cudf::io::csv_reader_options::builder(cudf::io::source_info{cudf::host_span<std::byte const>{
2547-
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}});
2571+
cudf::io::csv_reader_options::builder(
2572+
cudf::io::source_info{cudf::host_span<std::byte const>{
2573+
reinterpret_cast<std::byte const*>(buffer.c_str()), buffer.size()}})
2574+
.compression(cudf::io::compression_type::NONE);
25482575
auto const result = cudf::io::read_csv(in_opts);
25492576
auto const result_view = result.tbl->view();
25502577
EXPECT_EQ(result_view.num_rows(), 3);
@@ -2625,6 +2652,7 @@ TEST_F(CsvReaderTest, DoubleQuotesOddCount)
26252652
cudf::io::csv_reader_options::builder(
26262653
cudf::io::source_info{cudf::host_span<std::byte const>{
26272654
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2655+
.compression(cudf::io::compression_type::NONE)
26282656
.header(-1)
26292657
.dtypes({dtype<cudf::string_view>()});
26302658
auto const result = cudf::io::read_csv(in_opts);
@@ -2647,6 +2675,7 @@ end
26472675
cudf::io::csv_reader_options::builder(
26482676
cudf::io::source_info{cudf::host_span<std::byte const>{
26492677
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2678+
.compression(cudf::io::compression_type::NONE)
26502679
.header(-1)
26512680
.dtypes({dtype<cudf::string_view>()})
26522681
.delimiter('\t');
@@ -2671,6 +2700,7 @@ TEST_F(CsvReaderTest, QuotedFieldWithTrailingDelimiter)
26712700
cudf::io::csv_reader_options::builder(
26722701
cudf::io::source_info{cudf::host_span<std::byte const>{
26732702
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2703+
.compression(cudf::io::compression_type::NONE)
26742704
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
26752705
auto const result = cudf::io::read_csv(in_opts);
26762706

@@ -2695,6 +2725,7 @@ TEST_F(CsvReaderTest, EscapedQuotesWithSemicolonDelimiter)
26952725
cudf::io::csv_reader_options::builder(
26962726
cudf::io::source_info{cudf::host_span<std::byte const>{
26972727
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2728+
.compression(cudf::io::compression_type::NONE)
26982729
.delimiter(';')
26992730
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
27002731
auto const result = cudf::io::read_csv(in_opts);
@@ -2720,6 +2751,7 @@ TEST_F(CsvReaderTest, EscapedQuoteBeforeDelimiterInQuotedField)
27202751
cudf::io::csv_reader_options::builder(
27212752
cudf::io::source_info{cudf::host_span<std::byte const>{
27222753
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2754+
.compression(cudf::io::compression_type::NONE)
27232755
.delimiter(';')
27242756
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
27252757
auto const result = cudf::io::read_csv(in_opts);
@@ -2741,6 +2773,7 @@ TEST_F(CsvReaderTest, CommentLines)
27412773
cudf::io::csv_reader_options::builder(
27422774
cudf::io::source_info{cudf::host_span<std::byte const>{
27432775
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2776+
.compression(cudf::io::compression_type::NONE)
27442777
.comment('#')
27452778
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<int32_t>(), dtype<int32_t>()});
27462779
auto const result = cudf::io::read_csv(in_opts);
@@ -2769,6 +2802,7 @@ TEST_F(CsvReaderTest, CommentLinesWithQuotedStrings)
27692802
cudf::io::csv_reader_options::builder(
27702803
cudf::io::source_info{cudf::host_span<std::byte const>{
27712804
reinterpret_cast<std::byte const*>(buffer.data()), buffer.size()}})
2805+
.compression(cudf::io::compression_type::NONE)
27722806
.comment('#')
27732807
.dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
27742808
auto const result = cudf::io::read_csv(in_opts);

0 commit comments

Comments
 (0)