@@ -395,6 +395,137 @@ pub fn longest_consecutive_prefix<T: Borrow<usize>>(
395395 count
396396}
397397
398+ /// Splits `vec` at index `n`, returning the first `n` elements and leaving the
399+ /// remaining `vec.len() - n` elements in `vec`.
400+ ///
401+ /// Allocates for whichever side is smaller, so the new allocation is
402+ /// `min(n, vec.len() - n)` rather than always `n` (as `vec.drain(0..n).collect()`
403+ /// would). This matters when the split emits a prefix under memory pressure,
404+ /// where `n` can be close to `vec.len()`.
405+ pub fn split_vec_min_alloc < T > ( vec : & mut Vec < T > , n : usize ) -> Vec < T > {
406+ if n * 2 <= vec. len ( ) {
407+ vec. drain ( 0 ..n) . collect ( )
408+ } else {
409+ let remaining = vec. split_off ( n) ;
410+ std:: mem:: replace ( vec, remaining)
411+ }
412+ }
413+
414+ #[ cfg( test) ]
415+ mod split_vec_min_alloc_tests {
416+ use super :: split_vec_min_alloc;
417+
418+ #[ test]
419+ fn drain_branch ( ) {
420+ // n * 2 <= len -> drain+collect branch (allocates n elements)
421+ let mut v = vec ! [ 1 , 2 , 3 , 4 , 5 , 6 ] ;
422+ let first = split_vec_min_alloc ( & mut v, 2 ) ;
423+ assert_eq ! ( first, vec![ 1 , 2 ] ) ;
424+ assert_eq ! ( v, vec![ 3 , 4 , 5 , 6 ] ) ;
425+ }
426+
427+ #[ test]
428+ fn split_off_branch ( ) {
429+ // remaining < n -> split_off+replace branch (allocates remaining elements)
430+ let mut v = vec ! [ 1 , 2 , 3 , 4 , 5 , 6 ] ;
431+ let first = split_vec_min_alloc ( & mut v, 4 ) ;
432+ assert_eq ! ( first, vec![ 1 , 2 , 3 , 4 ] ) ;
433+ assert_eq ! ( v, vec![ 5 , 6 ] ) ;
434+ }
435+
436+ #[ test]
437+ fn exactly_half ( ) {
438+ // n * 2 == len -> drain branch (boundary)
439+ let mut v = vec ! [ 1 , 2 , 3 , 4 ] ;
440+ let first = split_vec_min_alloc ( & mut v, 2 ) ;
441+ assert_eq ! ( first, vec![ 1 , 2 ] ) ;
442+ assert_eq ! ( v, vec![ 3 , 4 ] ) ;
443+ }
444+
445+ #[ test]
446+ fn take_all ( ) {
447+ let mut v = vec ! [ 1 , 2 , 3 ] ;
448+ let first = split_vec_min_alloc ( & mut v, 3 ) ;
449+ assert_eq ! ( first, vec![ 1 , 2 , 3 ] ) ;
450+ assert ! ( v. is_empty( ) ) ;
451+ }
452+
453+ #[ test]
454+ fn take_none ( ) {
455+ let mut v = vec ! [ 1 , 2 , 3 ] ;
456+ let first = split_vec_min_alloc ( & mut v, 0 ) ;
457+ assert ! ( first. is_empty( ) ) ;
458+ assert_eq ! ( v, vec![ 1 , 2 , 3 ] ) ;
459+ }
460+
461+ #[ test]
462+ fn emitted_prefix_does_not_realloc_on_push ( ) {
463+ // Demonstrates *why* the split-off branch must NOT call `shrink_to_fit`.
464+ //
465+ // Downstream callers (e.g. `multi_group_by/bytes.rs`, which does
466+ // `first_n_offsets.push(offset_n)` right after the split) push onto the
467+ // emitted prefix immediately. The split-off branch hands the original
468+ // backing allocation to that prefix, so the prefix already has spare
469+ // capacity for the very next push.
470+ //
471+ // If we shrank the prefix to fit, that next push would have to
472+ // reallocate, and Vec's growth strategy would land it at a *larger*
473+ // capacity than the original allocation we started with -- the opposite
474+ // of the memory saving `shrink_to_fit` was meant to deliver.
475+
476+ // A Vec with a known, deliberately large capacity. n*2 > len, so this
477+ // takes the split-off branch.
478+ let mut v: Vec < u32 > = Vec :: with_capacity ( 64 ) ;
479+ v. extend ( 0 ..10 ) ;
480+ let original_capacity = v. capacity ( ) ;
481+ assert ! ( original_capacity >= 64 ) ;
482+
483+ // Emit a prefix that is most of the Vec (n = 8, remaining = 2).
484+ let mut prefix = split_vec_min_alloc ( & mut v, 8 ) ;
485+ assert_eq ! ( prefix, vec![ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ) ;
486+
487+ // The split-off branch moved the original backing store into `prefix`,
488+ // so it keeps the original (large) capacity -- no shrink happened.
489+ assert_eq ! (
490+ prefix. capacity( ) ,
491+ original_capacity,
492+ "split-off branch must hand the original allocation to the prefix"
493+ ) ;
494+
495+ // The caller's very next operation: push one element onto the prefix.
496+ prefix. push ( 99 ) ;
497+
498+ // Because the capacity was preserved, the push reused the existing
499+ // allocation: post-push capacity is unchanged and still <= original.
500+ // This is the realloc that `shrink_to_fit` would have forced.
501+ assert_eq ! (
502+ prefix. capacity( ) ,
503+ original_capacity,
504+ "push must reuse the preserved allocation (no realloc)"
505+ ) ;
506+ assert ! ( prefix. capacity( ) <= original_capacity) ;
507+
508+ // Counter-demonstration: had we shrunk the prefix to fit (capacity 8),
509+ // the same push would have reallocated. Vec doubles on growth, so the
510+ // post-push capacity (16) ends up LARGER than where a length-8 prefix
511+ // started -- and we paid a realloc for it.
512+ let mut shrunk: Vec < u32 > = prefix[ ..8 ] . to_vec ( ) ;
513+ shrunk. shrink_to_fit ( ) ;
514+ let shrunk_capacity = shrink_then_push_capacity ( & mut shrunk) ;
515+ assert ! (
516+ shrunk_capacity > 8 ,
517+ "shrink-to-fit then push reallocates to a larger capacity"
518+ ) ;
519+ }
520+
521+ /// Helper for the counter-demonstration above: push one element and report
522+ /// the resulting capacity.
523+ fn shrink_then_push_capacity ( v : & mut Vec < u32 > ) -> usize {
524+ v. push ( 99 ) ;
525+ v. capacity ( )
526+ }
527+ }
528+
398529/// Creates single element [`ListArray`], [`LargeListArray`] and
399530/// [`FixedSizeListArray`] from other arrays
400531///
0 commit comments