@@ -4719,6 +4719,14 @@ impl ScalarValue {
47194719 /// This can be relevant when `self` is a list or contains a list as a nested value, as
47204720 /// a single list holds an Arc to its entire original array buffer.
47214721 pub fn compact ( & mut self ) {
4722+ // copy_array_data + compact_view_buffers + downcast back, all in one step.
4723+ macro_rules! compact_array {
4724+ ( $arr: expr, $from_type: ty, $( $as_method: tt) +) => {
4725+ * Arc :: make_mut( $arr) = ScalarValue :: compact_view_buffers(
4726+ Arc :: new( <$from_type>:: from( copy_array_data( & $arr. to_data( ) ) ) ) as ArrayRef ,
4727+ ) . $( $as_method) +. clone( )
4728+ } ;
4729+ }
47224730 match self {
47234731 ScalarValue :: Null
47244732 | ScalarValue :: Boolean ( _)
@@ -4762,33 +4770,20 @@ impl ScalarValue {
47624770 | ScalarValue :: LargeBinary ( _)
47634771 | ScalarValue :: BinaryView ( _) => ( ) ,
47644772 ScalarValue :: FixedSizeList ( arr) => {
4765- let array = copy_array_data ( & arr. to_data ( ) ) ;
4766- * Arc :: make_mut ( arr) = FixedSizeListArray :: from ( array) ;
4767- }
4768- ScalarValue :: List ( arr) => {
4769- let array = copy_array_data ( & arr. to_data ( ) ) ;
4770- * Arc :: make_mut ( arr) = ListArray :: from ( array) ;
4773+ compact_array ! ( arr, FixedSizeListArray , as_fixed_size_list( ) )
47714774 }
4775+ ScalarValue :: List ( arr) => compact_array ! ( arr, ListArray , as_list:: <i32 >( ) ) ,
47724776 ScalarValue :: LargeList ( arr) => {
4773- let array = copy_array_data ( & arr. to_data ( ) ) ;
4774- * Arc :: make_mut ( arr) = LargeListArray :: from ( array)
4777+ compact_array ! ( arr, LargeListArray , as_list:: <i64 >( ) )
47754778 }
47764779 ScalarValue :: ListView ( arr) => {
4777- let array = copy_array_data ( & arr. to_data ( ) ) ;
4778- * Arc :: make_mut ( arr) = ListViewArray :: from ( array) ;
4780+ compact_array ! ( arr, ListViewArray , as_list_view:: <i32 >( ) )
47794781 }
47804782 ScalarValue :: LargeListView ( arr) => {
4781- let array = copy_array_data ( & arr. to_data ( ) ) ;
4782- * Arc :: make_mut ( arr) = LargeListViewArray :: from ( array)
4783- }
4784- ScalarValue :: Struct ( arr) => {
4785- let array = copy_array_data ( & arr. to_data ( ) ) ;
4786- * Arc :: make_mut ( arr) = StructArray :: from ( array) ;
4787- }
4788- ScalarValue :: Map ( arr) => {
4789- let array = copy_array_data ( & arr. to_data ( ) ) ;
4790- * Arc :: make_mut ( arr) = MapArray :: from ( array) ;
4783+ compact_array ! ( arr, LargeListViewArray , as_list_view:: <i64 >( ) )
47914784 }
4785+ ScalarValue :: Struct ( arr) => compact_array ! ( arr, StructArray , as_struct( ) ) ,
4786+ ScalarValue :: Map ( arr) => compact_array ! ( arr, MapArray , as_map( ) ) ,
47924787 ScalarValue :: Union ( val, _, _) => {
47934788 if let Some ( ( _, value) ) = val. as_mut ( ) {
47944789 value. compact ( ) ;
@@ -4809,6 +4804,95 @@ impl ScalarValue {
48094804 self
48104805 }
48114806
4807+ /// Recursively compacts the backing buffers of any [`StringViewArray`] or
4808+ /// [`BinaryViewArray`] nested within `array`.
4809+ ///
4810+ /// View-typed arrays keep an `Arc` reference to their original backing
4811+ /// buffers, so a single scalar extracted from a large batch still retains
4812+ /// the entire buffer. Calling [`.gc()`][StringViewArray::gc] copies only
4813+ /// the bytes that are actually referenced by the surviving views, releasing
4814+ /// the rest.
4815+ ///
4816+ /// Container types (`List`, `LargeList`, `FixedSizeList`, `ListView`,
4817+ /// `LargeListView`, `Struct`, `Map`) are handled by recursing into their
4818+ /// child / values arrays and reconstructing the parent with the compacted
4819+ /// children. All other types are returned unchanged.
4820+ fn compact_view_buffers ( array : ArrayRef ) -> ArrayRef {
4821+ // Macro for the i32/i64-offset list pair (List / LargeList).
4822+ macro_rules! gc_list {
4823+ ( $field: expr, $offset_type: ty, $array_type: ty) => { {
4824+ let list = array. as_list:: <$offset_type>( ) ;
4825+ Arc :: new( <$array_type>:: new(
4826+ Arc :: clone( $field) ,
4827+ list. offsets( ) . clone( ) ,
4828+ ScalarValue :: compact_view_buffers( Arc :: clone( list. values( ) ) ) ,
4829+ list. nulls( ) . cloned( ) ,
4830+ ) ) as ArrayRef
4831+ } } ;
4832+ }
4833+ // Macro for the i32/i64-offset list-view pair (ListView / LargeListView).
4834+ macro_rules! gc_list_view {
4835+ ( $field: expr, $offset_type: ty, $array_type: ty) => { {
4836+ let list = array. as_list_view:: <$offset_type>( ) ;
4837+ Arc :: new( <$array_type>:: new(
4838+ Arc :: clone( $field) ,
4839+ list. offsets( ) . clone( ) ,
4840+ list. sizes( ) . clone( ) ,
4841+ ScalarValue :: compact_view_buffers( Arc :: clone( list. values( ) ) ) ,
4842+ list. nulls( ) . cloned( ) ,
4843+ ) ) as ArrayRef
4844+ } } ;
4845+ }
4846+
4847+ match array. data_type ( ) {
4848+ DataType :: Utf8View => Arc :: new ( array. as_string_view ( ) . gc ( ) ) ,
4849+ DataType :: BinaryView => Arc :: new ( array. as_binary_view ( ) . gc ( ) ) ,
4850+ DataType :: Struct ( _) => {
4851+ let s = array. as_struct ( ) ;
4852+ let columns = s
4853+ . columns ( )
4854+ . iter ( )
4855+ . map ( |c| ScalarValue :: compact_view_buffers ( Arc :: clone ( c) ) )
4856+ . collect ( ) ;
4857+ Arc :: new ( StructArray :: new (
4858+ s. fields ( ) . clone ( ) ,
4859+ columns,
4860+ s. nulls ( ) . cloned ( ) ,
4861+ ) )
4862+ }
4863+ DataType :: List ( field) => gc_list ! ( field, i32 , ListArray ) ,
4864+ DataType :: LargeList ( field) => gc_list ! ( field, i64 , LargeListArray ) ,
4865+ DataType :: FixedSizeList ( field, size) => {
4866+ let list = array. as_fixed_size_list ( ) ;
4867+ Arc :: new ( FixedSizeListArray :: new (
4868+ Arc :: clone ( field) ,
4869+ * size,
4870+ ScalarValue :: compact_view_buffers ( Arc :: clone ( list. values ( ) ) ) ,
4871+ list. nulls ( ) . cloned ( ) ,
4872+ ) )
4873+ }
4874+ DataType :: ListView ( field) => gc_list_view ! ( field, i32 , ListViewArray ) ,
4875+ DataType :: LargeListView ( field) => {
4876+ gc_list_view ! ( field, i64 , LargeListViewArray )
4877+ }
4878+ DataType :: Map ( field, ordered) => {
4879+ let map = array. as_map ( ) ;
4880+ let entries = ScalarValue :: compact_view_buffers ( Arc :: new (
4881+ map. entries ( ) . clone ( ) ,
4882+ )
4883+ as ArrayRef ) ;
4884+ Arc :: new ( MapArray :: new (
4885+ Arc :: clone ( field) ,
4886+ map. offsets ( ) . clone ( ) ,
4887+ entries. as_struct ( ) . clone ( ) ,
4888+ map. nulls ( ) . cloned ( ) ,
4889+ * ordered,
4890+ ) )
4891+ }
4892+ _ => array,
4893+ }
4894+ }
4895+
48124896 /// Returns the minimum value for the given numeric `DataType`.
48134897 ///
48144898 /// This function returns the smallest representable value for numeric
@@ -10708,4 +10792,200 @@ mod tests {
1070810792 ]
1070910793 ) ;
1071010794 }
10795+
10796+ // ── compact / compact_view_buffers ───────────────────────────────────────
10797+
10798+ /// Builds a `StringViewArray` with `n` strings that are all longer than
10799+ /// 12 bytes so they are stored in backing buffers rather than inline.
10800+ fn make_long_strings ( n : usize ) -> StringViewArray {
10801+ let mut b = StringViewBuilder :: new ( ) ;
10802+ for i in 0 ..n {
10803+ b. append_value ( format ! ( "long_string_value_pad_{i:04}" ) ) ;
10804+ }
10805+ b. finish ( )
10806+ }
10807+
10808+ /// Total bytes across all backing buffers of a `StringViewArray`.
10809+ fn utf8view_buffer_bytes ( a : & StringViewArray ) -> usize {
10810+ a. data_buffers ( ) . iter ( ) . map ( |b| b. len ( ) ) . sum ( )
10811+ }
10812+
10813+ #[ test]
10814+ fn test_compact_list_utf8view ( ) {
10815+ const N : usize = 50 ;
10816+ let strings = make_long_strings ( N ) ;
10817+ let one_len = strings. value ( 0 ) . len ( ) ;
10818+ assert ! ( utf8view_buffer_bytes( & strings) >= N * one_len) ;
10819+
10820+ let single_row_list_array =
10821+ SingleRowListArrayBuilder :: new ( Arc :: new ( strings. slice ( 0 , 1 ) ) as ArrayRef )
10822+ . build_list_array ( ) ;
10823+ let mut scalar = ScalarValue :: List ( Arc :: new ( single_row_list_array) ) ;
10824+ scalar. compact ( ) ;
10825+
10826+ let ScalarValue :: List ( arr) = & scalar else {
10827+ panic ! ( "expected List" )
10828+ } ;
10829+ assert_eq ! (
10830+ utf8view_buffer_bytes( arr. values( ) . as_string_view( ) ) ,
10831+ one_len
10832+ ) ;
10833+ assert_eq ! ( arr. values( ) . as_string_view( ) . value( 0 ) , strings. value( 0 ) ) ;
10834+ }
10835+
10836+ #[ test]
10837+ fn test_compact_large_list_utf8view ( ) {
10838+ const N : usize = 50 ;
10839+ let strings = make_long_strings ( N ) ;
10840+ let one_len = strings. value ( 0 ) . len ( ) ;
10841+ assert ! ( utf8view_buffer_bytes( & strings) >= N * one_len) ;
10842+
10843+ let single_row_list_array =
10844+ SingleRowListArrayBuilder :: new ( Arc :: new ( strings. slice ( 0 , 1 ) ) as ArrayRef )
10845+ . build_large_list_array ( ) ;
10846+ let mut scalar = ScalarValue :: LargeList ( Arc :: new ( single_row_list_array) ) ;
10847+ scalar. compact ( ) ;
10848+
10849+ let ScalarValue :: LargeList ( arr) = & scalar else {
10850+ panic ! ( "expected LargeList" )
10851+ } ;
10852+ assert_eq ! (
10853+ utf8view_buffer_bytes( arr. values( ) . as_string_view( ) ) ,
10854+ one_len
10855+ ) ;
10856+ assert_eq ! ( arr. values( ) . as_string_view( ) . value( 0 ) , strings. value( 0 ) ) ;
10857+ }
10858+
10859+ #[ test]
10860+ fn test_compact_fixed_size_list_utf8view ( ) {
10861+ const N : usize = 50 ;
10862+ let strings = make_long_strings ( N ) ;
10863+ let one_len = strings. value ( 0 ) . len ( ) ;
10864+ assert ! ( utf8view_buffer_bytes( & strings) >= N * one_len) ;
10865+
10866+ let single_row_list_array =
10867+ SingleRowListArrayBuilder :: new ( Arc :: new ( strings. slice ( 0 , 1 ) ) as ArrayRef )
10868+ . build_fixed_size_list_array ( 1 ) ;
10869+ let mut scalar = ScalarValue :: FixedSizeList ( Arc :: new ( single_row_list_array) ) ;
10870+ scalar. compact ( ) ;
10871+
10872+ let ScalarValue :: FixedSizeList ( arr) = & scalar else {
10873+ panic ! ( "expected FixedSizeList" )
10874+ } ;
10875+ assert_eq ! (
10876+ utf8view_buffer_bytes( arr. values( ) . as_string_view( ) ) ,
10877+ one_len
10878+ ) ;
10879+ assert_eq ! ( arr. values( ) . as_string_view( ) . value( 0 ) , strings. value( 0 ) ) ;
10880+ }
10881+
10882+ #[ test]
10883+ fn test_compact_list_view_utf8view ( ) {
10884+ const N : usize = 50 ;
10885+ let strings = make_long_strings ( N ) ;
10886+ let one_len = strings. value ( 0 ) . len ( ) ;
10887+ assert ! ( utf8view_buffer_bytes( & strings) >= N * one_len) ;
10888+
10889+ let single_row_list_array =
10890+ SingleRowListArrayBuilder :: new ( Arc :: new ( strings. slice ( 0 , 1 ) ) as ArrayRef )
10891+ . build_list_view_array ( ) ;
10892+ let mut scalar = ScalarValue :: ListView ( Arc :: new ( single_row_list_array) ) ;
10893+ scalar. compact ( ) ;
10894+
10895+ let ScalarValue :: ListView ( arr) = & scalar else {
10896+ panic ! ( "expected ListView" )
10897+ } ;
10898+ assert_eq ! (
10899+ utf8view_buffer_bytes( arr. values( ) . as_string_view( ) ) ,
10900+ one_len
10901+ ) ;
10902+ assert_eq ! ( arr. values( ) . as_string_view( ) . value( 0 ) , strings. value( 0 ) ) ;
10903+ }
10904+
10905+ #[ test]
10906+ fn test_compact_large_list_view_utf8view ( ) {
10907+ const N : usize = 50 ;
10908+ let strings = make_long_strings ( N ) ;
10909+ let one_len = strings. value ( 0 ) . len ( ) ;
10910+ assert ! ( utf8view_buffer_bytes( & strings) >= N * one_len) ;
10911+
10912+ let single_row_list_array =
10913+ SingleRowListArrayBuilder :: new ( Arc :: new ( strings. slice ( 0 , 1 ) ) as ArrayRef )
10914+ . build_large_list_view_array ( ) ;
10915+ let mut scalar = ScalarValue :: LargeListView ( Arc :: new ( single_row_list_array) ) ;
10916+ scalar. compact ( ) ;
10917+
10918+ let ScalarValue :: LargeListView ( arr) = & scalar else {
10919+ panic ! ( "expected LargeListView" )
10920+ } ;
10921+ assert_eq ! (
10922+ utf8view_buffer_bytes( arr. values( ) . as_string_view( ) ) ,
10923+ one_len
10924+ ) ;
10925+ assert_eq ! ( arr. values( ) . as_string_view( ) . value( 0 ) , strings. value( 0 ) ) ;
10926+ }
10927+
10928+ #[ test]
10929+ fn test_compact_struct_utf8view ( ) {
10930+ const N : usize = 50 ;
10931+ let strings = make_long_strings ( N ) ;
10932+ let one_len = strings. value ( 0 ) . len ( ) ;
10933+
10934+ let field = Arc :: new ( Field :: new ( "name" , DataType :: Utf8View , true ) ) ;
10935+ let struct_arr = StructArray :: new (
10936+ Fields :: from ( vec ! [ Arc :: clone( & field) ] ) ,
10937+ vec ! [ Arc :: new( strings. slice( 0 , 1 ) ) as ArrayRef ] ,
10938+ None ,
10939+ ) ;
10940+
10941+ let mut scalar = ScalarValue :: Struct ( Arc :: new ( struct_arr) ) ;
10942+ scalar. compact ( ) ;
10943+
10944+ let ScalarValue :: Struct ( arr) = & scalar else {
10945+ panic ! ( "expected Struct" )
10946+ } ;
10947+ let col = arr. column ( 0 ) . as_string_view ( ) ;
10948+ assert_eq ! ( utf8view_buffer_bytes( col) , one_len) ;
10949+ assert_eq ! ( col. value( 0 ) , strings. value( 0 ) ) ;
10950+ }
10951+
10952+ #[ test]
10953+ fn test_compact_map_utf8view ( ) {
10954+ const N : usize = 50 ;
10955+ let strings = make_long_strings ( N ) ;
10956+ let one_len = strings. value ( 0 ) . len ( ) ;
10957+
10958+ let key_field = Arc :: new ( Field :: new ( "key" , DataType :: Utf8View , false ) ) ;
10959+ let val_field = Arc :: new ( Field :: new ( "value" , DataType :: Int32 , true ) ) ;
10960+ let entries = StructArray :: new (
10961+ Fields :: from ( vec ! [ Arc :: clone( & key_field) , Arc :: clone( & val_field) ] ) ,
10962+ vec ! [
10963+ Arc :: new( strings. slice( 0 , 1 ) ) as ArrayRef ,
10964+ Arc :: new( Int32Array :: from( vec![ 1i32 ] ) ) as ArrayRef ,
10965+ ] ,
10966+ None ,
10967+ ) ;
10968+ let entries_field = Arc :: new ( Field :: new (
10969+ "entries" ,
10970+ DataType :: Struct ( Fields :: from ( vec ! [ key_field, val_field] ) ) ,
10971+ false ,
10972+ ) ) ;
10973+ let map = MapArray :: new (
10974+ entries_field,
10975+ OffsetBuffer :: new ( vec ! [ 0i32 , 1 ] . into ( ) ) ,
10976+ entries,
10977+ None ,
10978+ false ,
10979+ ) ;
10980+
10981+ let mut scalar = ScalarValue :: Map ( Arc :: new ( map) ) ;
10982+ scalar. compact ( ) ;
10983+
10984+ let ScalarValue :: Map ( arr) = & scalar else {
10985+ panic ! ( "expected Map" )
10986+ } ;
10987+ let keys = arr. entries ( ) . column ( 0 ) . as_string_view ( ) ;
10988+ assert_eq ! ( utf8view_buffer_bytes( keys) , one_len) ;
10989+ assert_eq ! ( keys. value( 0 ) , strings. value( 0 ) ) ;
10990+ }
1071110991}
0 commit comments