@@ -27,8 +27,9 @@ use arrow::{downcast_dictionary_array, downcast_primitive_array};
2727#[ cfg( not( feature = "force_hash_collisions" ) ) ]
2828use crate :: cast:: {
2929 as_binary_view_array, as_boolean_array, as_fixed_size_list_array,
30- as_generic_binary_array, as_large_list_array, as_list_array, as_map_array,
31- as_string_array, as_string_view_array, as_struct_array, as_union_array,
30+ as_generic_binary_array, as_large_list_array, as_large_list_view_array,
31+ as_list_array, as_list_view_array, as_map_array, as_string_array,
32+ as_string_view_array, as_struct_array, as_union_array,
3233} ;
3334use crate :: error:: Result ;
3435use crate :: error:: { _internal_datafusion_err, _internal_err} ;
@@ -538,6 +539,45 @@ where
538539 Ok ( ( ) )
539540}
540541
542+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
543+ fn hash_list_view_array < OffsetSize > (
544+ array : & GenericListViewArray < OffsetSize > ,
545+ random_state : & RandomState ,
546+ hashes_buffer : & mut [ u64 ] ,
547+ ) -> Result < ( ) >
548+ where
549+ OffsetSize : OffsetSizeTrait ,
550+ {
551+ let values = array. values ( ) ;
552+ let offsets = array. value_offsets ( ) ;
553+ let sizes = array. value_sizes ( ) ;
554+ let nulls = array. nulls ( ) ;
555+ let mut values_hashes = vec ! [ 0u64 ; values. len( ) ] ;
556+ create_hashes ( [ values] , random_state, & mut values_hashes) ?;
557+ if let Some ( nulls) = nulls {
558+ for ( i, ( offset, size) ) in offsets. iter ( ) . zip ( sizes. iter ( ) ) . enumerate ( ) {
559+ if nulls. is_valid ( i) {
560+ let hash = & mut hashes_buffer[ i] ;
561+ let start = offset. as_usize ( ) ;
562+ let end = start + size. as_usize ( ) ;
563+ for values_hash in & values_hashes[ start..end] {
564+ * hash = combine_hashes ( * hash, * values_hash) ;
565+ }
566+ }
567+ }
568+ } else {
569+ for ( i, ( offset, size) ) in offsets. iter ( ) . zip ( sizes. iter ( ) ) . enumerate ( ) {
570+ let hash = & mut hashes_buffer[ i] ;
571+ let start = offset. as_usize ( ) ;
572+ let end = start + size. as_usize ( ) ;
573+ for values_hash in & values_hashes[ start..end] {
574+ * hash = combine_hashes ( * hash, * values_hash) ;
575+ }
576+ }
577+ }
578+ Ok ( ( ) )
579+ }
580+
541581#[ cfg( not( feature = "force_hash_collisions" ) ) ]
542582fn hash_union_array (
543583 array : & UnionArray ,
@@ -714,6 +754,14 @@ fn hash_single_array(
714754 let array = as_large_list_array( array) ?;
715755 hash_list_array( array, random_state, hashes_buffer) ?;
716756 }
757+ DataType :: ListView ( _) => {
758+ let array = as_list_view_array( array) ?;
759+ hash_list_view_array( array, random_state, hashes_buffer) ?;
760+ }
761+ DataType :: LargeListView ( _) => {
762+ let array = as_large_list_view_array( array) ?;
763+ hash_list_view_array( array, random_state, hashes_buffer) ?;
764+ }
717765 DataType :: Map ( _, _) => {
718766 let array = as_map_array( array) ?;
719767 hash_map_array( array, random_state, hashes_buffer) ?;
@@ -1128,6 +1176,106 @@ mod tests {
11281176 assert_eq ! ( hashes[ 1 ] , hashes[ 6 ] ) ; // null vs empty list
11291177 }
11301178
1179+ #[ test]
1180+ // Tests actual values of hashes, which are different if forcing collisions
1181+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
1182+ fn create_hashes_for_list_view_arrays ( ) {
1183+ use arrow:: buffer:: { NullBuffer , ScalarBuffer } ;
1184+
1185+ // Create values array: [0, 1, 2, 3, null, 5]
1186+ let values = Arc :: new ( Int32Array :: from ( vec ! [
1187+ Some ( 0 ) ,
1188+ Some ( 1 ) ,
1189+ Some ( 2 ) ,
1190+ Some ( 3 ) ,
1191+ None ,
1192+ Some ( 5 ) ,
1193+ ] ) ) as ArrayRef ;
1194+ let field = Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ;
1195+
1196+ // Create ListView with the following logical structure:
1197+ // Row 0: [0, 1, 2] (offset=0, size=3)
1198+ // Row 1: null (null bit set)
1199+ // Row 2: [3, null, 5] (offset=3, size=3)
1200+ // Row 3: [3, null, 5] (offset=3, size=3) - same as row 2
1201+ // Row 4: null (null bit set)
1202+ // Row 5: [0, 1, 2] (offset=0, size=3) - same as row 0
1203+ // Row 6: [] (offset=0, size=0) - empty list
1204+ let offsets = ScalarBuffer :: from ( vec ! [ 0i32 , 0 , 3 , 3 , 0 , 0 , 0 ] ) ;
1205+ let sizes = ScalarBuffer :: from ( vec ! [ 3i32 , 0 , 3 , 3 , 0 , 3 , 0 ] ) ;
1206+ let nulls = Some ( NullBuffer :: from ( vec ! [
1207+ true , false , true , true , false , true , true ,
1208+ ] ) ) ;
1209+
1210+ let list_view_array =
1211+ Arc :: new ( ListViewArray :: new ( field, offsets, sizes, values, nulls) )
1212+ as ArrayRef ;
1213+
1214+ let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
1215+ let mut hashes = vec ! [ 0 ; list_view_array. len( ) ] ;
1216+ create_hashes ( & [ list_view_array] , & random_state, & mut hashes) . unwrap ( ) ;
1217+
1218+ assert_eq ! ( hashes[ 0 ] , hashes[ 5 ] ) ; // same content [0, 1, 2]
1219+ assert_eq ! ( hashes[ 1 ] , hashes[ 4 ] ) ; // both null
1220+ assert_eq ! ( hashes[ 2 ] , hashes[ 3 ] ) ; // same content [3, null, 5]
1221+ assert_eq ! ( hashes[ 1 ] , hashes[ 6 ] ) ; // null vs empty list
1222+
1223+ // Negative tests: different content should produce different hashes
1224+ assert_ne ! ( hashes[ 0 ] , hashes[ 2 ] ) ; // [0, 1, 2] vs [3, null, 5]
1225+ assert_ne ! ( hashes[ 0 ] , hashes[ 6 ] ) ; // [0, 1, 2] vs []
1226+ assert_ne ! ( hashes[ 2 ] , hashes[ 6 ] ) ; // [3, null, 5] vs []
1227+ }
1228+
1229+ #[ test]
1230+ // Tests actual values of hashes, which are different if forcing collisions
1231+ #[ cfg( not( feature = "force_hash_collisions" ) ) ]
1232+ fn create_hashes_for_large_list_view_arrays ( ) {
1233+ use arrow:: buffer:: { NullBuffer , ScalarBuffer } ;
1234+
1235+ // Create values array: [0, 1, 2, 3, null, 5]
1236+ let values = Arc :: new ( Int32Array :: from ( vec ! [
1237+ Some ( 0 ) ,
1238+ Some ( 1 ) ,
1239+ Some ( 2 ) ,
1240+ Some ( 3 ) ,
1241+ None ,
1242+ Some ( 5 ) ,
1243+ ] ) ) as ArrayRef ;
1244+ let field = Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ;
1245+
1246+ // Create LargeListView with the following logical structure:
1247+ // Row 0: [0, 1, 2] (offset=0, size=3)
1248+ // Row 1: null (null bit set)
1249+ // Row 2: [3, null, 5] (offset=3, size=3)
1250+ // Row 3: [3, null, 5] (offset=3, size=3) - same as row 2
1251+ // Row 4: null (null bit set)
1252+ // Row 5: [0, 1, 2] (offset=0, size=3) - same as row 0
1253+ // Row 6: [] (offset=0, size=0) - empty list
1254+ let offsets = ScalarBuffer :: from ( vec ! [ 0i64 , 0 , 3 , 3 , 0 , 0 , 0 ] ) ;
1255+ let sizes = ScalarBuffer :: from ( vec ! [ 3i64 , 0 , 3 , 3 , 0 , 3 , 0 ] ) ;
1256+ let nulls = Some ( NullBuffer :: from ( vec ! [
1257+ true , false , true , true , false , true , true ,
1258+ ] ) ) ;
1259+
1260+ let large_list_view_array = Arc :: new ( LargeListViewArray :: new (
1261+ field, offsets, sizes, values, nulls,
1262+ ) ) as ArrayRef ;
1263+
1264+ let random_state = RandomState :: with_seeds ( 0 , 0 , 0 , 0 ) ;
1265+ let mut hashes = vec ! [ 0 ; large_list_view_array. len( ) ] ;
1266+ create_hashes ( & [ large_list_view_array] , & random_state, & mut hashes) . unwrap ( ) ;
1267+
1268+ assert_eq ! ( hashes[ 0 ] , hashes[ 5 ] ) ; // same content [0, 1, 2]
1269+ assert_eq ! ( hashes[ 1 ] , hashes[ 4 ] ) ; // both null
1270+ assert_eq ! ( hashes[ 2 ] , hashes[ 3 ] ) ; // same content [3, null, 5]
1271+ assert_eq ! ( hashes[ 1 ] , hashes[ 6 ] ) ; // null vs empty list
1272+
1273+ // Negative tests: different content should produce different hashes
1274+ assert_ne ! ( hashes[ 0 ] , hashes[ 2 ] ) ; // [0, 1, 2] vs [3, null, 5]
1275+ assert_ne ! ( hashes[ 0 ] , hashes[ 6 ] ) ; // [0, 1, 2] vs []
1276+ assert_ne ! ( hashes[ 2 ] , hashes[ 6 ] ) ; // [3, null, 5] vs []
1277+ }
1278+
11311279 #[ test]
11321280 // Tests actual values of hashes, which are different if forcing collisions
11331281 #[ cfg( not( feature = "force_hash_collisions" ) ) ]
0 commit comments