11// SPDX-License-Identifier: Apache-2.0
22// SPDX-FileCopyrightText: Copyright the Vortex contributors
33
4- use vortex_buffer:: BufferMut ;
4+ use vortex_buffer:: Buffer ;
55use vortex_error:: VortexExpect ;
66use vortex_error:: VortexResult ;
77
@@ -19,12 +19,13 @@ use crate::arrays::chunked::ChunkedArrayExt;
1919use crate :: arrays:: listview:: ListViewArrayExt ;
2020use crate :: arrays:: listview:: ListViewRebuildMode ;
2121use crate :: arrays:: struct_:: StructArrayExt ;
22- use crate :: builders:: builder_with_capacity ;
22+ use crate :: builders:: builder_with_capacity_in ;
2323use crate :: builtins:: ArrayBuiltins ;
2424use crate :: dtype:: DType ;
2525use crate :: dtype:: Nullability ;
2626use crate :: dtype:: PType ;
2727use crate :: dtype:: StructFields ;
28+ use crate :: memory:: HostAllocatorExt ;
2829use crate :: validity:: Validity ;
2930
3031pub ( super ) fn _canonicalize (
@@ -56,7 +57,7 @@ pub(super) fn _canonicalize(
5657 ctx,
5758 ) ?) ,
5859 _ => {
59- let mut builder = builder_with_capacity ( array. dtype ( ) , array. len ( ) ) ;
60+ let mut builder = builder_with_capacity_in ( ctx . allocator ( ) , array. dtype ( ) , array. len ( ) ) ;
6061 array. array ( ) . append_to_builder ( builder. as_mut ( ) , ctx) ?;
6162 builder. finish_into_canonical ( )
6263 }
@@ -131,8 +132,12 @@ fn swizzle_list_chunks(
131132 // this much more complicated.
132133 // We (somewhat arbitrarily) choose `u64` for our offsets and sizes here. These can always be
133134 // narrowed later by the compressor.
134- let mut offsets = BufferMut :: < u64 > :: with_capacity ( len) ;
135- let mut sizes = BufferMut :: < u64 > :: with_capacity ( len) ;
135+ let allocator = ctx. allocator ( ) ;
136+ let mut offsets = allocator. allocate_typed :: < u64 > ( len) ?;
137+ let mut sizes = allocator. allocate_typed :: < u64 > ( len) ?;
138+ let offsets_out: & mut [ u64 ] = offsets. as_mut_slice_typed :: < u64 > ( ) ?;
139+ let sizes_slice_out: & mut [ u64 ] = sizes. as_mut_slice_typed :: < u64 > ( ) ?;
140+ let mut next_list = 0usize ;
136141
137142 for chunk in chunks {
138143 let chunk_array = chunk. clone ( ) . execute :: < ListViewArray > ( ctx) ?;
@@ -162,19 +167,31 @@ fn swizzle_list_chunks(
162167 let sizes_slice = sizes_arr. as_slice :: < u64 > ( ) ;
163168
164169 // Append offsets and sizes, adjusting offsets to point into the combined array.
165- offsets. extend ( offsets_slice. iter ( ) . map ( |o| o + num_elements) ) ;
166- sizes. extend ( sizes_slice) ;
170+ for ( & offset, & size) in offsets_slice. iter ( ) . zip ( sizes_slice. iter ( ) ) {
171+ offsets_out[ next_list] = offset + num_elements;
172+ sizes_slice_out[ next_list] = size;
173+ next_list += 1 ;
174+ }
167175
168176 num_elements += chunk_array. elements ( ) . len ( ) as u64 ;
169177 }
178+ debug_assert_eq ! ( next_list, len) ;
170179
171180 // SAFETY: elements are sliced from valid `ListViewArray`s (from `to_listview()`).
172181 let chunked_elements =
173182 unsafe { ChunkedArray :: new_unchecked ( list_elements_chunks, elem_dtype. clone ( ) ) }
174183 . into_array ( ) ;
175184
176- let offsets = PrimitiveArray :: new ( offsets. freeze ( ) , Validity :: NonNullable ) . into_array ( ) ;
177- let sizes = PrimitiveArray :: new ( sizes. freeze ( ) , Validity :: NonNullable ) . into_array ( ) ;
185+ let offsets = PrimitiveArray :: new (
186+ Buffer :: < u64 > :: from_byte_buffer ( offsets. freeze ( ) ) ,
187+ Validity :: NonNullable ,
188+ )
189+ . into_array ( ) ;
190+ let sizes = PrimitiveArray :: new (
191+ Buffer :: < u64 > :: from_byte_buffer ( sizes. freeze ( ) ) ,
192+ Validity :: NonNullable ,
193+ )
194+ . into_array ( ) ;
178195
179196 // SAFETY:
180197 // - `offsets` and `sizes` are non-nullable u64 arrays of the same length
@@ -192,9 +209,15 @@ fn swizzle_list_chunks(
192209#[ cfg( test) ]
193210mod tests {
194211 use std:: sync:: Arc ;
212+ use std:: sync:: atomic:: AtomicUsize ;
213+ use std:: sync:: atomic:: Ordering ;
195214
196215 use vortex_buffer:: buffer;
216+ use vortex_error:: VortexResult ;
217+ use vortex_session:: VortexSession ;
197218
219+ use crate :: Canonical ;
220+ use crate :: ExecutionCtx ;
198221 use crate :: IntoArray ;
199222 use crate :: ToCanonical ;
200223 use crate :: accessor:: ArrayAccessor ;
@@ -207,8 +230,28 @@ mod tests {
207230 use crate :: dtype:: DType :: Primitive ;
208231 use crate :: dtype:: Nullability :: NonNullable ;
209232 use crate :: dtype:: PType :: I32 ;
233+ use crate :: memory:: DefaultHostAllocator ;
234+ use crate :: memory:: HostAllocator ;
235+ use crate :: memory:: MemorySessionExt ;
236+ use crate :: memory:: WritableHostBuffer ;
210237 use crate :: validity:: Validity ;
211238
239+ #[ derive( Debug ) ]
240+ struct CountingAllocator {
241+ allocations : Arc < AtomicUsize > ,
242+ }
243+
244+ impl HostAllocator for CountingAllocator {
245+ fn allocate (
246+ & self ,
247+ len : usize ,
248+ alignment : vortex_buffer:: Alignment ,
249+ ) -> VortexResult < WritableHostBuffer > {
250+ self . allocations . fetch_add ( 1 , Ordering :: Relaxed ) ;
251+ DefaultHostAllocator . allocate ( len, alignment)
252+ }
253+ }
254+
212255 #[ test]
213256 pub fn pack_nested_structs ( ) {
214257 let struct_array = StructArray :: try_new (
@@ -265,4 +308,42 @@ mod tests {
265308 assert_eq ! ( l1. scalar_at( 0 ) . unwrap( ) , canon_values. scalar_at( 0 ) . unwrap( ) ) ;
266309 assert_eq ! ( l2. scalar_at( 0 ) . unwrap( ) , canon_values. scalar_at( 1 ) . unwrap( ) ) ;
267310 }
311+
312+ #[ test]
313+ fn list_canonicalize_uses_memory_session_allocator ( ) {
314+ let allocations = Arc :: new ( AtomicUsize :: new ( 0 ) ) ;
315+ let session = VortexSession :: empty ( ) ;
316+ session
317+ . memory_mut ( )
318+ . set_allocator ( Arc :: new ( CountingAllocator {
319+ allocations : Arc :: clone ( & allocations) ,
320+ } ) ) ;
321+ let mut ctx = ExecutionCtx :: new ( session) ;
322+
323+ let l1 = ListArray :: try_new (
324+ buffer ! [ 1 , 2 , 3 , 4 ] . into_array ( ) ,
325+ buffer ! [ 0 , 3 ] . into_array ( ) ,
326+ Validity :: NonNullable ,
327+ )
328+ . unwrap ( ) ;
329+ let l2 = ListArray :: try_new (
330+ buffer ! [ 5 , 6 ] . into_array ( ) ,
331+ buffer ! [ 0 , 2 ] . into_array ( ) ,
332+ Validity :: NonNullable ,
333+ )
334+ . unwrap ( ) ;
335+
336+ let chunked_list = ChunkedArray :: try_new (
337+ vec ! [ l1. into_array( ) , l2. into_array( ) ] ,
338+ List ( Arc :: new ( Primitive ( I32 , NonNullable ) ) , NonNullable ) ,
339+ )
340+ . unwrap ( )
341+ . into_array ( ) ;
342+
343+ drop ( chunked_list. execute :: < Canonical > ( & mut ctx) . unwrap ( ) ) ;
344+ assert ! (
345+ allocations. load( Ordering :: Relaxed ) >= 2 ,
346+ "expected offset+size allocations through MemorySession"
347+ ) ;
348+ }
268349}
0 commit comments