@@ -2302,6 +2302,123 @@ TEST_F(ParquetReaderTest, RepeatedNoAnnotations)
23022302 CUDF_TEST_EXPECT_TABLES_EQUAL (result.tbl ->view (), expected);
23032303}
23042304
2305+ // Regression test for https://github.com/rapidsai/cudf/issues/22541.
2306+ // Schema (single-field inner repeated group must decode as list<struct<someId>>):
2307+ // required group root {
2308+ // optional int32 primitive;
2309+ // repeated group myComplex {
2310+ // optional int32 id;
2311+ // repeated group repeatedMessage {
2312+ // optional int32 someId;
2313+ // }
2314+ // }
2315+ // }
2316+ // Regenerate `nested_array_bytes` from the parquet-mr source file with:
2317+ // xxd -i nested-array-struct.parquet > /tmp/blob.cpp
2318+ // (source: spark/sql/core/src/test/resources/test-data/nested-array-struct.parquet)
2319+ TEST_F (ParquetReaderTest, RepeatedNoAnnotationsSingleFieldNested)
2320+ {
2321+ constexpr std::array<unsigned char , 775 > nested_array_bytes{
2322+ 0x50 , 0x41 , 0x52 , 0x31 , 0x15 , 0x00 , 0x15 , 0x24 , 0x15 , 0x24 , 0x2c , 0x15 , 0x06 , 0x15 , 0x00 , 0x15 ,
2323+ 0x06 , 0x15 , 0x08 , 0x00 , 0x00 , 0x02 , 0x00 , 0x00 , 0x00 , 0x03 , 0x07 , 0x02 , 0x00 , 0x00 , 0x00 , 0x05 ,
2324+ 0x00 , 0x00 , 0x00 , 0x08 , 0x00 , 0x00 , 0x00 , 0x15 , 0x00 , 0x15 , 0x32 , 0x15 , 0x32 , 0x2c , 0x15 , 0x06 ,
2325+ 0x15 , 0x00 , 0x15 , 0x06 , 0x15 , 0x06 , 0x00 , 0x00 , 0x02 , 0x00 , 0x00 , 0x00 , 0x03 , 0x00 , 0x03 , 0x00 ,
2326+ 0x00 , 0x00 , 0x03 , 0x2a , 0x00 , 0x01 , 0x00 , 0x00 , 0x00 , 0x04 , 0x00 , 0x00 , 0x00 , 0x07 , 0x00 , 0x00 ,
2327+ 0x00 , 0x15 , 0x00 , 0x15 , 0x34 , 0x15 , 0x34 , 0x2c , 0x15 , 0x06 , 0x15 , 0x00 , 0x15 , 0x06 , 0x15 , 0x06 ,
2328+ 0x00 , 0x00 , 0x03 , 0x00 , 0x00 , 0x00 , 0x03 , 0x00 , 0x00 , 0x03 , 0x00 , 0x00 , 0x00 , 0x03 , 0x3f , 0x00 ,
2329+ 0x03 , 0x00 , 0x00 , 0x00 , 0x06 , 0x00 , 0x00 , 0x00 , 0x09 , 0x00 , 0x00 , 0x00 , 0x15 , 0x02 , 0x19 , 0x6c ,
2330+ 0x48 , 0x29 , 0x54 , 0x65 , 0x73 , 0x74 , 0x50 , 0x72 , 0x6f , 0x74 , 0x6f , 0x62 , 0x75 , 0x66 , 0x2e , 0x41 ,
2331+ 0x72 , 0x72 , 0x61 , 0x79 , 0x57 , 0x69 , 0x74 , 0x68 , 0x4e , 0x65 , 0x73 , 0x74 , 0x65 , 0x64 , 0x47 , 0x72 ,
2332+ 0x6f , 0x75 , 0x70 , 0x41 , 0x6e , 0x64 , 0x41 , 0x72 , 0x72 , 0x61 , 0x79 , 0x15 , 0x04 , 0x00 , 0x15 , 0x02 ,
2333+ 0x25 , 0x02 , 0x18 , 0x09 , 0x70 , 0x72 , 0x69 , 0x6d , 0x69 , 0x74 , 0x69 , 0x76 , 0x65 , 0x00 , 0x35 , 0x04 ,
2334+ 0x18 , 0x09 , 0x6d , 0x79 , 0x43 , 0x6f , 0x6d , 0x70 , 0x6c , 0x65 , 0x78 , 0x15 , 0x04 , 0x00 , 0x15 , 0x02 ,
2335+ 0x25 , 0x02 , 0x18 , 0x02 , 0x69 , 0x64 , 0x00 , 0x35 , 0x04 , 0x18 , 0x0f , 0x72 , 0x65 , 0x70 , 0x65 , 0x61 ,
2336+ 0x74 , 0x65 , 0x64 , 0x4d , 0x65 , 0x73 , 0x73 , 0x61 , 0x67 , 0x65 , 0x15 , 0x02 , 0x00 , 0x15 , 0x02 , 0x25 ,
2337+ 0x02 , 0x18 , 0x06 , 0x73 , 0x6f , 0x6d , 0x65 , 0x49 , 0x64 , 0x00 , 0x16 , 0x06 , 0x19 , 0x1c , 0x19 , 0x3c ,
2338+ 0x26 , 0x08 , 0x1c , 0x15 , 0x02 , 0x19 , 0x35 , 0x08 , 0x06 , 0x00 , 0x19 , 0x18 , 0x09 , 0x70 , 0x72 , 0x69 ,
2339+ 0x6d , 0x69 , 0x74 , 0x69 , 0x76 , 0x65 , 0x15 , 0x00 , 0x16 , 0x06 , 0x16 , 0x46 , 0x16 , 0x46 , 0x26 , 0x08 ,
2340+ 0x00 , 0x00 , 0x26 , 0x4e , 0x1c , 0x15 , 0x02 , 0x19 , 0x25 , 0x06 , 0x00 , 0x19 , 0x28 , 0x09 , 0x6d , 0x79 ,
2341+ 0x43 , 0x6f , 0x6d , 0x70 , 0x6c , 0x65 , 0x78 , 0x02 , 0x69 , 0x64 , 0x15 , 0x00 , 0x16 , 0x06 , 0x16 , 0x54 ,
2342+ 0x16 , 0x54 , 0x26 , 0x4e , 0x00 , 0x00 , 0x26 , 0xa2 , 0x01 , 0x1c , 0x15 , 0x02 , 0x19 , 0x25 , 0x06 , 0x00 ,
2343+ 0x19 , 0x38 , 0x09 , 0x6d , 0x79 , 0x43 , 0x6f , 0x6d , 0x70 , 0x6c , 0x65 , 0x78 , 0x0f , 0x72 , 0x65 , 0x70 ,
2344+ 0x65 , 0x61 , 0x74 , 0x65 , 0x64 , 0x4d , 0x65 , 0x73 , 0x73 , 0x61 , 0x67 , 0x65 , 0x06 , 0x73 , 0x6f , 0x6d ,
2345+ 0x65 , 0x49 , 0x64 , 0x15 , 0x00 , 0x16 , 0x06 , 0x16 , 0x56 , 0x16 , 0x56 , 0x26 , 0xa2 , 0x01 , 0x00 , 0x00 ,
2346+ 0x16 , 0xf0 , 0x01 , 0x16 , 0x06 , 0x00 , 0x19 , 0x2c , 0x18 , 0x18 , 0x70 , 0x61 , 0x72 , 0x71 , 0x75 , 0x65 ,
2347+ 0x74 , 0x2e , 0x70 , 0x72 , 0x6f , 0x74 , 0x6f , 0x2e , 0x64 , 0x65 , 0x73 , 0x63 , 0x72 , 0x69 , 0x70 , 0x74 ,
2348+ 0x6f , 0x72 , 0x18 , 0xf8 , 0x01 , 0x6e , 0x61 , 0x6d , 0x65 , 0x3a , 0x20 , 0x22 , 0x41 , 0x72 , 0x72 , 0x61 ,
2349+ 0x79 , 0x57 , 0x69 , 0x74 , 0x68 , 0x4e , 0x65 , 0x73 , 0x74 , 0x65 , 0x64 , 0x47 , 0x72 , 0x6f , 0x75 , 0x70 ,
2350+ 0x41 , 0x6e , 0x64 , 0x41 , 0x72 , 0x72 , 0x61 , 0x79 , 0x22 , 0x0a , 0x66 , 0x69 , 0x65 , 0x6c , 0x64 , 0x20 ,
2351+ 0x7b , 0x0a , 0x20 , 0x20 , 0x6e , 0x61 , 0x6d , 0x65 , 0x3a , 0x20 , 0x22 , 0x70 , 0x72 , 0x69 , 0x6d , 0x69 ,
2352+ 0x74 , 0x69 , 0x76 , 0x65 , 0x22 , 0x0a , 0x20 , 0x20 , 0x6e , 0x75 , 0x6d , 0x62 , 0x65 , 0x72 , 0x3a , 0x20 ,
2353+ 0x31 , 0x0a , 0x20 , 0x20 , 0x6c , 0x61 , 0x62 , 0x65 , 0x6c , 0x3a , 0x20 , 0x4c , 0x41 , 0x42 , 0x45 , 0x4c ,
2354+ 0x5f , 0x4f , 0x50 , 0x54 , 0x49 , 0x4f , 0x4e , 0x41 , 0x4c , 0x0a , 0x20 , 0x20 , 0x74 , 0x79 , 0x70 , 0x65 ,
2355+ 0x3a , 0x20 , 0x54 , 0x59 , 0x50 , 0x45 , 0x5f , 0x49 , 0x4e , 0x54 , 0x33 , 0x32 , 0x0a , 0x7d , 0x0a , 0x66 ,
2356+ 0x69 , 0x65 , 0x6c , 0x64 , 0x20 , 0x7b , 0x0a , 0x20 , 0x20 , 0x6e , 0x61 , 0x6d , 0x65 , 0x3a , 0x20 , 0x22 ,
2357+ 0x6d , 0x79 , 0x43 , 0x6f , 0x6d , 0x70 , 0x6c , 0x65 , 0x78 , 0x22 , 0x0a , 0x20 , 0x20 , 0x6e , 0x75 , 0x6d ,
2358+ 0x62 , 0x65 , 0x72 , 0x3a , 0x20 , 0x32 , 0x0a , 0x20 , 0x20 , 0x6c , 0x61 , 0x62 , 0x65 , 0x6c , 0x3a , 0x20 ,
2359+ 0x4c , 0x41 , 0x42 , 0x45 , 0x4c , 0x5f , 0x52 , 0x45 , 0x50 , 0x45 , 0x41 , 0x54 , 0x45 , 0x44 , 0x0a , 0x20 ,
2360+ 0x20 , 0x74 , 0x79 , 0x70 , 0x65 , 0x3a , 0x20 , 0x54 , 0x59 , 0x50 , 0x45 , 0x5f , 0x4d , 0x45 , 0x53 , 0x53 ,
2361+ 0x41 , 0x47 , 0x45 , 0x0a , 0x20 , 0x20 , 0x74 , 0x79 , 0x70 , 0x65 , 0x5f , 0x6e , 0x61 , 0x6d , 0x65 , 0x3a ,
2362+ 0x20 , 0x22 , 0x2e , 0x54 , 0x65 , 0x73 , 0x74 , 0x50 , 0x72 , 0x6f , 0x74 , 0x6f , 0x62 , 0x75 , 0x66 , 0x2e ,
2363+ 0x4d , 0x79 , 0x43 , 0x6f , 0x6d , 0x70 , 0x6c , 0x65 , 0x78 , 0x22 , 0x0a , 0x7d , 0x0a , 0x00 , 0x18 , 0x13 ,
2364+ 0x70 , 0x61 , 0x72 , 0x71 , 0x75 , 0x65 , 0x74 , 0x2e , 0x70 , 0x72 , 0x6f , 0x74 , 0x6f , 0x2e , 0x63 , 0x6c ,
2365+ 0x61 , 0x73 , 0x73 , 0x18 , 0x3c , 0x70 , 0x61 , 0x72 , 0x71 , 0x75 , 0x65 , 0x74 , 0x2e , 0x70 , 0x72 , 0x6f ,
2366+ 0x74 , 0x6f , 0x2e , 0x74 , 0x65 , 0x73 , 0x74 , 0x2e , 0x54 , 0x65 , 0x73 , 0x74 , 0x50 , 0x72 , 0x6f , 0x74 ,
2367+ 0x6f , 0x62 , 0x75 , 0x66 , 0x24 , 0x41 , 0x72 , 0x72 , 0x61 , 0x79 , 0x57 , 0x69 , 0x74 , 0x68 , 0x4e , 0x65 ,
2368+ 0x73 , 0x74 , 0x65 , 0x64 , 0x47 , 0x72 , 0x6f , 0x75 , 0x70 , 0x41 , 0x6e , 0x64 , 0x41 , 0x72 , 0x72 , 0x61 ,
2369+ 0x79 , 0x00 , 0x18 , 0x0a , 0x70 , 0x61 , 0x72 , 0x71 , 0x75 , 0x65 , 0x74 , 0x2d , 0x6d , 0x72 , 0x00 , 0x83 ,
2370+ 0x02 , 0x00 , 0x00 , 0x50 , 0x41 , 0x52 , 0x31 };
2371+
2372+ auto read_opts = cudf::io::parquet_reader_options::builder (
2373+ cudf::io::source_info{cudf::host_span<std::byte const >{
2374+ reinterpret_cast <std::byte const *>(nested_array_bytes.data ()), nested_array_bytes.size ()}});
2375+ auto result = cudf::io::read_parquet (read_opts);
2376+
2377+ // 3 rows: (2, [{1, [{3}]}]), (5, [{4, [{6}]}]), (8, [{7, [{9}]}])
2378+ auto const tv = result.tbl ->view ();
2379+ ASSERT_EQ (tv.num_columns (), 2 );
2380+ EXPECT_EQ (tv.column (0 ).size (), 3 );
2381+
2382+ // Column 0: primitive int32 = [2, 5, 8]
2383+ column_wrapper<int32_t > col0{2 , 5 , 8 };
2384+
2385+ // Column 1: list<struct<id:int32, repeatedMessage:list<struct<someId:int32>>>>
2386+ auto const & outer_list = tv.column (1 );
2387+ ASSERT_EQ (outer_list.type ().id (), cudf::type_id::LIST );
2388+ auto const outer_child = outer_list.child (cudf::lists_column_view::child_column_index);
2389+ ASSERT_EQ (outer_child.type ().id (), cudf::type_id::STRUCT );
2390+ ASSERT_EQ (outer_child.num_children (), 2 );
2391+ EXPECT_EQ (outer_child.child (0 ).type ().id (), cudf::type_id::INT32 );
2392+ ASSERT_EQ (outer_child.child (1 ).type ().id (), cudf::type_id::LIST );
2393+ auto const inner_child = outer_child.child (1 ).child (cudf::lists_column_view::child_column_index);
2394+ // Inner single-field group must be a STRUCT, not collapsed to a primitive (issue #22541).
2395+ ASSERT_EQ (inner_child.type ().id (), cudf::type_id::STRUCT );
2396+ ASSERT_EQ (inner_child.num_children (), 1 );
2397+ EXPECT_EQ (inner_child.child (0 ).type ().id (), cudf::type_id::INT32 );
2398+
2399+ column_wrapper<int32_t > inner_someid{3 , 6 , 9 };
2400+ auto inner_struct = cudf::test::structs_column_wrapper{{inner_someid}};
2401+ auto inner_list_offsets =
2402+ cudf::test::fixed_width_column_wrapper<cudf::size_type>{0 , 1 , 2 , 3 }.release ();
2403+ auto inner_list = cudf::make_lists_column (
2404+ 3 , std::move (inner_list_offsets), inner_struct.release (), 0 , rmm::device_buffer{});
2405+
2406+ column_wrapper<int32_t > outer_id{1 , 4 , 7 };
2407+ std::vector<std::unique_ptr<cudf::column>> outer_struct_children;
2408+ outer_struct_children.push_back (outer_id.release ());
2409+ outer_struct_children.push_back (std::move (inner_list));
2410+ auto outer_struct_col = cudf::test::structs_column_wrapper{{std::move (outer_struct_children)}};
2411+
2412+ auto outer_list_offsets =
2413+ cudf::test::fixed_width_column_wrapper<cudf::size_type>{0 , 1 , 2 , 3 }.release ();
2414+ auto outer_list_col = cudf::make_lists_column (
2415+ 3 , std::move (outer_list_offsets), outer_struct_col.release (), 0 , rmm::device_buffer{});
2416+
2417+ // Testing for equivalence here because we only care about the outermost validity buffers.
2418+ table_view expected{{col0, *outer_list_col}};
2419+ CUDF_TEST_EXPECT_TABLES_EQUIVALENT (result.tbl ->view (), expected);
2420+ }
2421+
23052422TEST_F (ParquetReaderTest, DeltaSkipRowsWithNulls)
23062423{
23072424 using cudf::io::column_encoding;
0 commit comments