@@ -156,13 +156,316 @@ fn benchmark_struct_conversion(c: &mut Criterion) {
156156 group. finish ( ) ;
157157}
158158
159+ /// Create a schema with nested structs: Struct<Struct<int64 fields>>
160+ fn make_nested_struct_schema ( num_fields : usize ) -> DataType {
161+ let inner_fields: Vec < Field > = ( 0 ..num_fields)
162+ . map ( |i| Field :: new ( format ! ( "inner_f{}" , i) , DataType :: Int64 , true ) )
163+ . collect ( ) ;
164+ let inner_struct = DataType :: Struct ( Fields :: from ( inner_fields) ) ;
165+ let outer_fields = vec ! [ Field :: new( "nested" , inner_struct, true ) ] ;
166+ DataType :: Struct ( Fields :: from ( outer_fields) )
167+ }
168+
169+ /// Create a schema with deeply nested structs (3 levels): Struct<Struct<Struct<int64 fields>>>
170+ fn make_deeply_nested_struct_schema ( num_fields : usize ) -> DataType {
171+ let inner_fields: Vec < Field > = ( 0 ..num_fields)
172+ . map ( |i| Field :: new ( format ! ( "inner_f{}" , i) , DataType :: Int64 , true ) )
173+ . collect ( ) ;
174+ let inner_struct = DataType :: Struct ( Fields :: from ( inner_fields) ) ;
175+ let middle_fields = vec ! [ Field :: new( "level2" , inner_struct, true ) ] ;
176+ let middle_struct = DataType :: Struct ( Fields :: from ( middle_fields) ) ;
177+ let outer_fields = vec ! [ Field :: new( "level1" , middle_struct, true ) ] ;
178+ DataType :: Struct ( Fields :: from ( outer_fields) )
179+ }
180+
181+ /// Calculate row size for nested struct: Struct<Struct<int64 fields>>
182+ fn get_nested_row_size ( num_inner_fields : usize ) -> usize {
183+ // Top-level row has 1 column (the outer struct)
184+ let top_level_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
185+ let struct_pointer_size = 8 ;
186+
187+ // Outer struct has 1 field (the inner struct)
188+ let outer_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
189+ let outer_struct_size = outer_bitset_width + 8 ; // pointer to inner struct
190+
191+ // Inner struct has num_inner_fields int64 fields
192+ let inner_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( num_inner_fields) ;
193+ let inner_data_size = num_inner_fields * 8 ;
194+ let inner_struct_size = inner_bitset_width + inner_data_size;
195+
196+ top_level_bitset_width + struct_pointer_size + outer_struct_size + inner_struct_size
197+ }
198+
199+ /// Calculate row size for deeply nested struct: Struct<Struct<Struct<int64 fields>>>
200+ fn get_deeply_nested_row_size ( num_inner_fields : usize ) -> usize {
201+ // Top-level row has 1 column (the level1 struct)
202+ let top_level_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
203+ let struct_pointer_size = 8 ;
204+
205+ // Level 1 struct has 1 field (the level2 struct)
206+ let level1_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
207+ let level1_struct_size = level1_bitset_width + 8 ;
208+
209+ // Level 2 struct has 1 field (the inner struct)
210+ let level2_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
211+ let level2_struct_size = level2_bitset_width + 8 ;
212+
213+ // Inner struct has num_inner_fields int64 fields
214+ let inner_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( num_inner_fields) ;
215+ let inner_data_size = num_inner_fields * 8 ;
216+ let inner_struct_size = inner_bitset_width + inner_data_size;
217+
218+ top_level_bitset_width
219+ + struct_pointer_size
220+ + level1_struct_size
221+ + level2_struct_size
222+ + inner_struct_size
223+ }
224+
225+ struct NestedRowData {
226+ data : Vec < u8 > ,
227+ }
228+
229+ impl NestedRowData {
230+ fn new ( num_inner_fields : usize ) -> Self {
231+ let row_size = get_nested_row_size ( num_inner_fields) ;
232+ let mut data = vec ! [ 0u8 ; row_size] ;
233+
234+ let top_level_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
235+ let outer_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
236+ let inner_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( num_inner_fields) ;
237+
238+ // Calculate offsets
239+ let outer_struct_start = top_level_bitset_width + 8 ;
240+ let outer_struct_size = outer_bitset_width + 8 ;
241+ let inner_struct_start = outer_struct_start + outer_struct_size;
242+ let inner_struct_size = inner_bitset_width + num_inner_fields * 8 ;
243+
244+ // Write top-level struct pointer (points to outer struct)
245+ let outer_offset_and_size =
246+ ( ( outer_struct_start as i64 ) << 32 ) | ( outer_struct_size as i64 ) ;
247+ data[ top_level_bitset_width..top_level_bitset_width + 8 ]
248+ . copy_from_slice ( & outer_offset_and_size. to_le_bytes ( ) ) ;
249+
250+ // Write outer struct pointer (points to inner struct)
251+ // Offset is relative to outer struct start
252+ let inner_relative_offset = inner_struct_start - outer_struct_start;
253+ let inner_offset_and_size =
254+ ( ( inner_relative_offset as i64 ) << 32 ) | ( inner_struct_size as i64 ) ;
255+ data[ outer_struct_start + outer_bitset_width..outer_struct_start + outer_bitset_width + 8 ]
256+ . copy_from_slice ( & inner_offset_and_size. to_le_bytes ( ) ) ;
257+
258+ // Fill inner struct with some data
259+ for i in 0 ..num_inner_fields {
260+ let value_offset = inner_struct_start + inner_bitset_width + i * 8 ;
261+ let value = ( i as i64 ) * 100 ;
262+ data[ value_offset..value_offset + 8 ] . copy_from_slice ( & value. to_le_bytes ( ) ) ;
263+ }
264+
265+ NestedRowData { data }
266+ }
267+
268+ fn to_spark_row ( & self , spark_row : & mut SparkUnsafeRow ) {
269+ spark_row. point_to_slice ( & self . data ) ;
270+ }
271+ }
272+
273+ struct DeeplyNestedRowData {
274+ data : Vec < u8 > ,
275+ }
276+
277+ impl DeeplyNestedRowData {
278+ fn new ( num_inner_fields : usize ) -> Self {
279+ let row_size = get_deeply_nested_row_size ( num_inner_fields) ;
280+ let mut data = vec ! [ 0u8 ; row_size] ;
281+
282+ let top_level_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
283+ let level1_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
284+ let level2_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( 1 ) ;
285+ let inner_bitset_width = SparkUnsafeRow :: get_row_bitset_width ( num_inner_fields) ;
286+
287+ // Calculate offsets
288+ let level1_struct_start = top_level_bitset_width + 8 ;
289+ let level1_struct_size = level1_bitset_width + 8 ;
290+ let level2_struct_start = level1_struct_start + level1_struct_size;
291+ let level2_struct_size = level2_bitset_width + 8 ;
292+ let inner_struct_start = level2_struct_start + level2_struct_size;
293+ let inner_struct_size = inner_bitset_width + num_inner_fields * 8 ;
294+
295+ // Write top-level struct pointer (points to level1 struct)
296+ let level1_offset_and_size =
297+ ( ( level1_struct_start as i64 ) << 32 ) | ( level1_struct_size as i64 ) ;
298+ data[ top_level_bitset_width..top_level_bitset_width + 8 ]
299+ . copy_from_slice ( & level1_offset_and_size. to_le_bytes ( ) ) ;
300+
301+ // Write level1 struct pointer (points to level2 struct)
302+ let level2_relative_offset = level2_struct_start - level1_struct_start;
303+ let level2_offset_and_size =
304+ ( ( level2_relative_offset as i64 ) << 32 ) | ( level2_struct_size as i64 ) ;
305+ data[ level1_struct_start + level1_bitset_width
306+ ..level1_struct_start + level1_bitset_width + 8 ]
307+ . copy_from_slice ( & level2_offset_and_size. to_le_bytes ( ) ) ;
308+
309+ // Write level2 struct pointer (points to inner struct)
310+ let inner_relative_offset = inner_struct_start - level2_struct_start;
311+ let inner_offset_and_size =
312+ ( ( inner_relative_offset as i64 ) << 32 ) | ( inner_struct_size as i64 ) ;
313+ data[ level2_struct_start + level2_bitset_width
314+ ..level2_struct_start + level2_bitset_width + 8 ]
315+ . copy_from_slice ( & inner_offset_and_size. to_le_bytes ( ) ) ;
316+
317+ // Fill inner struct with some data
318+ for i in 0 ..num_inner_fields {
319+ let value_offset = inner_struct_start + inner_bitset_width + i * 8 ;
320+ let value = ( i as i64 ) * 100 ;
321+ data[ value_offset..value_offset + 8 ] . copy_from_slice ( & value. to_le_bytes ( ) ) ;
322+ }
323+
324+ DeeplyNestedRowData { data }
325+ }
326+
327+ fn to_spark_row ( & self , spark_row : & mut SparkUnsafeRow ) {
328+ spark_row. point_to_slice ( & self . data ) ;
329+ }
330+ }
331+
332+ fn benchmark_nested_struct_conversion ( c : & mut Criterion ) {
333+ let mut group = c. benchmark_group ( "nested_struct_conversion" ) ;
334+
335+ // Test nested structs with different inner field counts
336+ for num_fields in [ 5 , 10 , 20 ] {
337+ for num_rows in [ 1000 , 10000 ] {
338+ let schema = vec ! [ make_nested_struct_schema( num_fields) ] ;
339+
340+ // Create row data
341+ let rows: Vec < NestedRowData > = ( 0 ..num_rows)
342+ . map ( |_| NestedRowData :: new ( num_fields) )
343+ . collect ( ) ;
344+
345+ let spark_rows: Vec < SparkUnsafeRow > = rows
346+ . iter ( )
347+ . map ( |row_data| {
348+ let mut spark_row = SparkUnsafeRow :: new_with_num_fields ( 1 ) ;
349+ row_data. to_spark_row ( & mut spark_row) ;
350+ spark_row. set_not_null_at ( 0 ) ;
351+ spark_row
352+ } )
353+ . collect ( ) ;
354+
355+ let mut row_addresses: Vec < i64 > =
356+ spark_rows. iter ( ) . map ( |row| row. get_row_addr ( ) ) . collect ( ) ;
357+ let mut row_sizes: Vec < i32 > = spark_rows. iter ( ) . map ( |row| row. get_row_size ( ) ) . collect ( ) ;
358+
359+ let row_address_ptr = row_addresses. as_mut_ptr ( ) ;
360+ let row_size_ptr = row_sizes. as_mut_ptr ( ) ;
361+
362+ group. bench_with_input (
363+ BenchmarkId :: new (
364+ format ! ( "inner_fields_{}" , num_fields) ,
365+ format ! ( "rows_{}" , num_rows) ,
366+ ) ,
367+ & ( num_rows, & schema) ,
368+ |b, ( num_rows, schema) | {
369+ b. iter ( || {
370+ let tempfile = Builder :: new ( ) . tempfile ( ) . unwrap ( ) ;
371+
372+ process_sorted_row_partition (
373+ * num_rows,
374+ BATCH_SIZE ,
375+ row_address_ptr,
376+ row_size_ptr,
377+ schema,
378+ tempfile. path ( ) . to_str ( ) . unwrap ( ) . to_string ( ) ,
379+ 1.0 ,
380+ false ,
381+ 0 ,
382+ None ,
383+ & CompressionCodec :: Zstd ( 1 ) ,
384+ )
385+ . unwrap ( ) ;
386+ } ) ;
387+ } ,
388+ ) ;
389+
390+ std:: mem:: drop ( spark_rows) ;
391+ }
392+ }
393+
394+ group. finish ( ) ;
395+ }
396+
397+ fn benchmark_deeply_nested_struct_conversion ( c : & mut Criterion ) {
398+ let mut group = c. benchmark_group ( "deeply_nested_struct_conversion" ) ;
399+
400+ // Test deeply nested structs (3 levels) with different inner field counts
401+ for num_fields in [ 5 , 10 , 20 ] {
402+ for num_rows in [ 1000 , 10000 ] {
403+ let schema = vec ! [ make_deeply_nested_struct_schema( num_fields) ] ;
404+
405+ // Create row data
406+ let rows: Vec < DeeplyNestedRowData > = ( 0 ..num_rows)
407+ . map ( |_| DeeplyNestedRowData :: new ( num_fields) )
408+ . collect ( ) ;
409+
410+ let spark_rows: Vec < SparkUnsafeRow > = rows
411+ . iter ( )
412+ . map ( |row_data| {
413+ let mut spark_row = SparkUnsafeRow :: new_with_num_fields ( 1 ) ;
414+ row_data. to_spark_row ( & mut spark_row) ;
415+ spark_row. set_not_null_at ( 0 ) ;
416+ spark_row
417+ } )
418+ . collect ( ) ;
419+
420+ let mut row_addresses: Vec < i64 > =
421+ spark_rows. iter ( ) . map ( |row| row. get_row_addr ( ) ) . collect ( ) ;
422+ let mut row_sizes: Vec < i32 > = spark_rows. iter ( ) . map ( |row| row. get_row_size ( ) ) . collect ( ) ;
423+
424+ let row_address_ptr = row_addresses. as_mut_ptr ( ) ;
425+ let row_size_ptr = row_sizes. as_mut_ptr ( ) ;
426+
427+ group. bench_with_input (
428+ BenchmarkId :: new (
429+ format ! ( "inner_fields_{}" , num_fields) ,
430+ format ! ( "rows_{}" , num_rows) ,
431+ ) ,
432+ & ( num_rows, & schema) ,
433+ |b, ( num_rows, schema) | {
434+ b. iter ( || {
435+ let tempfile = Builder :: new ( ) . tempfile ( ) . unwrap ( ) ;
436+
437+ process_sorted_row_partition (
438+ * num_rows,
439+ BATCH_SIZE ,
440+ row_address_ptr,
441+ row_size_ptr,
442+ schema,
443+ tempfile. path ( ) . to_str ( ) . unwrap ( ) . to_string ( ) ,
444+ 1.0 ,
445+ false ,
446+ 0 ,
447+ None ,
448+ & CompressionCodec :: Zstd ( 1 ) ,
449+ )
450+ . unwrap ( ) ;
451+ } ) ;
452+ } ,
453+ ) ;
454+
455+ std:: mem:: drop ( spark_rows) ;
456+ }
457+ }
458+
459+ group. finish ( ) ;
460+ }
461+
159462fn config ( ) -> Criterion {
160463 Criterion :: default ( )
161464}
162465
163466criterion_group ! {
164467 name = benches;
165468 config = config( ) ;
166- targets = benchmark_struct_conversion
469+ targets = benchmark_struct_conversion, benchmark_nested_struct_conversion , benchmark_deeply_nested_struct_conversion
167470}
168471criterion_main ! ( benches) ;
0 commit comments