@@ -773,9 +773,8 @@ pub(super) async fn export_arrow_validity_buffer(
773773 arrow_offset : usize ,
774774 ctx : & mut CudaExecutionCtx ,
775775) -> VortexResult < ( Option < BufferHandle > , i64 ) > {
776- // Container export paths reach here without going through `execute_cuda`, so decode the
777- // validity on the GPU here to turn any lazy/compressed bool array (e.g. dict-encoded, or from
778- // take/scan) into a device-resident canonical bool before export.
776+ // Validity is exported separately from the array data. Decode it here so Arrow
777+ // gets a device-resident validity buffer alongside the array it belongs to.
779778 let validity = execute_validity_cuda ( validity, len, ctx) . await ?;
780779 match validity {
781780 Validity :: NonNullable | Validity :: AllValid => Ok ( ( None , 0 ) ) ,
@@ -795,14 +794,17 @@ pub(super) async fn export_arrow_validity_buffer(
795794 } ) ?;
796795 let BoolDataParts { bits, meta } = array. into_data ( ) . into_parts ( len) ;
797796 let bitmap = ctx. ensure_on_device ( bits) . await ?;
798- // Repack only when a bit-level offset can't be expressed by Arrow's byte-addressed
799- // validity buffer plus array offset.
800- let bitmap = if arrow_offset == 0 && meta. offset ( ) == 0 {
797+ // ArrowDeviceArray uses ArrowArray layout with its buffers being device pointers.
798+ //
799+ // Validity is one bit per row, addressed via the Arrow array offset. Reuse the bitmap
800+ // when Vortex's validity offset already matches Arrow's; otherwise repack on the GPU
801+ // so row i is at Arrow bit `arrow_offset + i`.
802+ let bitmap = if meta. offset ( ) == arrow_offset {
801803 bitmap
802804 } else {
803805 repack_arrow_validity_buffer ( & bitmap, meta. offset ( ) , len, arrow_offset, ctx) ?
804806 } ;
805- Ok ( ( Some ( bitmap) , UNKNOWN_NULL_COUNT ) )
807+ Ok ( ( Some ( bitmap) , ARROW_UNKNOWN_NULL_COUNT ) )
806808 }
807809 }
808810}
@@ -816,7 +818,7 @@ fn validity_bitmap_byte_len(len: usize, arrow_offset: usize) -> VortexResult<usi
816818}
817819
818820/// Arrow C Data uses -1 when the null count has not been computed.
819- const UNKNOWN_NULL_COUNT : i64 = -1 ;
821+ const ARROW_UNKNOWN_NULL_COUNT : i64 = -1 ;
820822
821823/// Allocate a zeroed device buffer with cuDF-safe padding for Arrow validity masks.
822824fn device_zeroed_byte_buffer (
@@ -1663,7 +1665,10 @@ mod tests {
16631665 )
16641666 ) ;
16651667 assert_eq ! ( exported. array. array. length, 4 ) ;
1666- assert_eq ! ( exported. array. array. null_count, super :: UNKNOWN_NULL_COUNT ) ;
1668+ assert_eq ! (
1669+ exported. array. array. null_count,
1670+ super :: ARROW_UNKNOWN_NULL_COUNT
1671+ ) ;
16671672 assert_eq ! ( exported. array. array. n_buffers, 2 ) ;
16681673 assert_eq ! ( exported. array. array. n_children, 0 ) ;
16691674 assert ! ( !exported. array. array. dictionary. is_null( ) ) ;
@@ -1721,7 +1726,7 @@ mod tests {
17211726 let children = unsafe { std:: slice:: from_raw_parts ( exported. array . array . children , 1 ) } ;
17221727 let dict_child = unsafe { & * children[ 0 ] } ;
17231728 assert ! ( !dict_child. dictionary. is_null( ) ) ;
1724- assert_eq ! ( dict_child. null_count, super :: UNKNOWN_NULL_COUNT ) ;
1729+ assert_eq ! ( dict_child. null_count, super :: ARROW_UNKNOWN_NULL_COUNT ) ;
17251730
17261731 unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
17271732 Ok ( ( ) )
@@ -1754,7 +1759,7 @@ mod tests {
17541759 [ 0 , 1 , 0 ]
17551760 ) ;
17561761 let dictionary = unsafe { & * exported. array . array . dictionary } ;
1757- assert_eq ! ( dictionary. null_count, super :: UNKNOWN_NULL_COUNT ) ;
1762+ assert_eq ! ( dictionary. null_count, super :: ARROW_UNKNOWN_NULL_COUNT ) ;
17581763 assert_eq ! ( private_data_buffer_i32_values( dictionary, 1 ) ?. len( ) , 2 ) ;
17591764
17601765 unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
@@ -2031,7 +2036,7 @@ mod tests {
20312036 assert_binary_layout (
20322037 & exported. array . array ,
20332038 5 ,
2034- super :: UNKNOWN_NULL_COUNT ,
2039+ super :: ARROW_UNKNOWN_NULL_COUNT ,
20352040 & [ 0 , 0 , 3 , 3 , 8 , i32:: try_from ( 8 + out_of_line. len ( ) ) ?] ,
20362041 & [ b"\x00 \xff \xfe " . as_slice ( ) , b"short" , out_of_line] . concat ( ) ,
20372042 ) ?;
@@ -2545,7 +2550,7 @@ mod tests {
25452550 [ 0 , 1 , 0 , 1 , 2 ]
25462551 ) ;
25472552 let dictionary = unsafe { & * elements. dictionary } ;
2548- assert_eq ! ( dictionary. null_count, super :: UNKNOWN_NULL_COUNT ) ;
2553+ assert_eq ! ( dictionary. null_count, super :: ARROW_UNKNOWN_NULL_COUNT ) ;
25492554
25502555 unsafe { release_exported_array ( & raw mut exported. array . array ) } ;
25512556 Ok ( ( ) )
@@ -2791,7 +2796,7 @@ mod tests {
27912796 let mut exported = utf8. export_device_array_with_schema ( & mut ctx) . await ?;
27922797 let field = Field :: try_from ( & exported. schema ) ?;
27932798 assert_eq ! ( field, Field :: new( "" , DataType :: Utf8View , true ) ) ;
2794- assert_varbinview_shape ( & exported. array . array , 3 , super :: UNKNOWN_NULL_COUNT ) ?;
2799+ assert_varbinview_shape ( & exported. array . array , 3 , super :: ARROW_UNKNOWN_NULL_COUNT ) ?;
27952800 assert_eq ! ( exported. array. device_type, ARROW_DEVICE_CUDA ) ;
27962801
27972802 let private_data = unsafe { & * exported. array . array . private_data . cast :: < PrivateData > ( ) } ;
@@ -2819,7 +2824,7 @@ mod tests {
28192824 assert_binary_layout (
28202825 & exported. array . array ,
28212826 3 ,
2822- super :: UNKNOWN_NULL_COUNT ,
2827+ super :: ARROW_UNKNOWN_NULL_COUNT ,
28232828 & [ 0 , 0 , 2 , i32:: try_from ( 2 + sliced_out_of_line. len ( ) ) ?] ,
28242829 & [ b"\x00 \xff " . as_slice ( ) , sliced_out_of_line] . concat ( ) ,
28252830 ) ?;
@@ -2937,7 +2942,7 @@ mod tests {
29372942 let mut primitive = assert_nullable_export (
29382943 PrimitiveArray :: from_option_iter ( [ Some ( 1i32 ) , None , Some ( 3 ) ] ) . into_array ( ) ,
29392944 2 ,
2940- super :: UNKNOWN_NULL_COUNT ,
2945+ super :: ARROW_UNKNOWN_NULL_COUNT ,
29412946 & mut ctx,
29422947 )
29432948 . await ?;
@@ -2966,7 +2971,7 @@ mod tests {
29662971 . into_array ( )
29672972 . slice ( 1 ..4 ) ?,
29682973 2 ,
2969- super :: UNKNOWN_NULL_COUNT ,
2974+ super :: ARROW_UNKNOWN_NULL_COUNT ,
29702975 & mut ctx,
29712976 )
29722977 . await ?;
@@ -3015,7 +3020,7 @@ mod tests {
30153020 )
30163021 . into_array ( ) ,
30173022 2 ,
3018- super :: UNKNOWN_NULL_COUNT ,
3023+ super :: ARROW_UNKNOWN_NULL_COUNT ,
30193024 & mut ctx,
30203025 )
30213026 . await ?;
@@ -3045,7 +3050,7 @@ mod tests {
30453050 )
30463051 . into_array ( ) ,
30473052 2 ,
3048- super :: UNKNOWN_NULL_COUNT ,
3053+ super :: ARROW_UNKNOWN_NULL_COUNT ,
30493054 & mut ctx,
30503055 )
30513056 . await ?;
@@ -3068,7 +3073,7 @@ mod tests {
30683073 ] )
30693074 . into_array ( ) ,
30703075 4 ,
3071- super :: UNKNOWN_NULL_COUNT ,
3076+ super :: ARROW_UNKNOWN_NULL_COUNT ,
30723077 & mut ctx,
30733078 )
30743079 . await ?;
@@ -3092,7 +3097,7 @@ mod tests {
30923097 ) ?
30933098 . into_array ( ) ,
30943099 1 ,
3095- super :: UNKNOWN_NULL_COUNT ,
3100+ super :: ARROW_UNKNOWN_NULL_COUNT ,
30963101 & mut ctx,
30973102 )
30983103 . await ?;
@@ -3126,7 +3131,7 @@ mod tests {
31263131 ) ?
31273132 . into_array ( ) ,
31283133 1 ,
3129- super :: UNKNOWN_NULL_COUNT ,
3134+ super :: ARROW_UNKNOWN_NULL_COUNT ,
31303135 & mut ctx,
31313136 )
31323137 . await ?;
0 commit comments