2626
2727use std:: sync:: Arc ;
2828
29- use datafusion:: arrow:: array:: UInt64Array ;
29+ use datafusion:: arrow:: array:: UInt64Builder ;
3030use datafusion:: arrow:: compute:: BatchCoalescer ;
3131use datafusion:: arrow:: datatypes:: SchemaRef ;
3232use datafusion:: arrow:: record_batch:: RecordBatch ;
@@ -142,12 +142,12 @@ impl ScratchSpace {
142142/// Materializes a partition's data from `(batch_idx, row_idx)` pairs into
143143/// coalesced `RecordBatch`es using `BatchCoalescer::push_batch_with_indices`.
144144///
145- /// Uses `scratch_indices ` as a reusable buffer to avoid allocations.
145+ /// Uses `scratch_builder ` as a reusable builder to avoid allocations.
146146pub fn materialize_partition (
147147 partition_indices : & [ ( u32 , u32 ) ] ,
148148 input_batches : & InputBatchStore ,
149149 target_batch_size : usize ,
150- scratch_indices : & mut Vec < u64 > ,
150+ scratch_builder : & mut UInt64Builder ,
151151) -> Result < Vec < RecordBatch > > {
152152 if partition_indices. is_empty ( ) {
153153 return Ok ( Vec :: new ( ) ) ;
@@ -170,14 +170,11 @@ pub fn materialize_partition(
170170
171171 let batch = input_batches. get_batch ( current_batch_idx) ;
172172
173- // Reuse scratch buffer to reduce allocations
174- scratch_indices. clear ( ) ;
175- scratch_indices. extend (
176- partition_indices[ start..end]
177- . iter ( )
178- . map ( |( _, r) | * r as u64 ) ,
179- ) ;
180- let idx_array = UInt64Array :: from ( scratch_indices. clone ( ) ) ;
173+ // Use builder pattern to avoid Vec allocation and clone
174+ for ( _, r) in & partition_indices[ start..end] {
175+ scratch_builder. append_value ( * r as u64 ) ;
176+ }
177+ let idx_array = scratch_builder. finish ( ) ;
181178
182179 coalescer. push_batch_with_indices ( batch. clone ( ) , & idx_array) ?;
183180 while let Some ( completed) = coalescer. next_completed_batch ( ) {
@@ -322,7 +319,7 @@ mod tests {
322319 fn test_materialize_partition_empty ( ) {
323320 let schema = create_test_schema ( ) ;
324321 let store = InputBatchStore :: new ( schema) ;
325- let mut scratch = Vec :: new ( ) ;
322+ let mut scratch = UInt64Builder :: new ( ) ;
326323 let result = materialize_partition ( & [ ] , & store, 8192 , & mut scratch) . unwrap ( ) ;
327324 assert ! ( result. is_empty( ) ) ;
328325 }
@@ -335,7 +332,7 @@ mod tests {
335332
336333 // Select rows 0, 2, 4 from batch 0
337334 let indices = vec ! [ ( 0u32 , 0u32 ) , ( 0 , 2 ) , ( 0 , 4 ) ] ;
338- let mut scratch = Vec :: new ( ) ;
335+ let mut scratch = UInt64Builder :: new ( ) ;
339336 let result = materialize_partition ( & indices, & store, 8192 , & mut scratch) . unwrap ( ) ;
340337
341338 let total_rows: usize = result. iter ( ) . map ( |b| b. num_rows ( ) ) . sum ( ) ;
@@ -361,7 +358,7 @@ mod tests {
361358
362359 // Select row 1 from batch 0, rows 0 and 2 from batch 1
363360 let indices = vec ! [ ( 0u32 , 1u32 ) , ( 1 , 0 ) , ( 1 , 2 ) ] ;
364- let mut scratch = Vec :: new ( ) ;
361+ let mut scratch = UInt64Builder :: new ( ) ;
365362 let result = materialize_partition ( & indices, & store, 8192 , & mut scratch) . unwrap ( ) ;
366363
367364 let total_rows: usize = result. iter ( ) . map ( |b| b. num_rows ( ) ) . sum ( ) ;
@@ -381,7 +378,7 @@ mod tests {
381378 let indices: Vec < ( u32 , u32 ) > = ( 0 ..20000 ) . map ( |i| ( 0u32 , i as u32 ) ) . collect ( ) ;
382379
383380 // Use target_batch_size of 8192
384- let mut scratch = Vec :: new ( ) ;
381+ let mut scratch = UInt64Builder :: new ( ) ;
385382 let result = materialize_partition ( & indices, & store, 8192 , & mut scratch) . unwrap ( ) ;
386383
387384 // Should produce multiple batches
@@ -449,7 +446,7 @@ mod tests {
449446 }
450447
451448 // Use target_batch_size of 8192
452- let mut scratch = Vec :: new ( ) ;
449+ let mut scratch = UInt64Builder :: new ( ) ;
453450 let result = materialize_partition ( & indices, & store, 8192 , & mut scratch) . unwrap ( ) ;
454451
455452 // Should coalesce into far fewer output batches than input batches
0 commit comments