1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use arrow:: array:: { Array , ArrayRef , MapArray , StructArray } ;
19- use arrow:: compute:: { concat , sort_to_indices, take, SortOptions } ;
18+ use arrow:: array:: { Array , ArrayRef , MapArray , StructArray , UInt32Array } ;
19+ use arrow:: compute:: { sort_to_indices, take, SortOptions } ;
2020use arrow:: datatypes:: DataType ;
2121use datafusion:: common:: { exec_err, DataFusionError } ;
2222use datafusion:: physical_plan:: ColumnarValue ;
@@ -31,59 +31,62 @@ pub fn spark_map_sort(args: &[ColumnarValue]) -> Result<ColumnarValue, DataFusio
3131 }
3232
3333 let arr_arg: ArrayRef = match & args[ 0 ] {
34- ColumnarValue :: Array ( array) => Arc :: < dyn Array > :: clone ( array) ,
34+ ColumnarValue :: Array ( array) => Arc :: clone ( array) ,
3535 ColumnarValue :: Scalar ( scalar) => scalar. to_array_of_size ( 1 ) ?,
3636 } ;
3737
3838 let ( maps_arg, map_field, is_sorted) = match arr_arg. data_type ( ) {
3939 DataType :: Map ( map_field, is_sorted) => {
40- let maps_arg = arr_arg. as_any ( ) . downcast_ref :: < MapArray > ( ) . unwrap ( ) ;
40+ let maps_arg = arr_arg
41+ . as_any ( )
42+ . downcast_ref :: < MapArray > ( )
43+ . expect ( "invariant: array data type is Map but downcast to MapArray failed" ) ;
4144 ( maps_arg, map_field, is_sorted)
4245 }
4346 _ => return exec_err ! ( "spark_map_sort expects Map type as argument" ) ,
4447 } ;
4548
49+ // Fast paths: nothing to sort, all maps null, or input already declared sorted.
50+ if maps_arg. is_empty ( ) || maps_arg. null_count ( ) == maps_arg. len ( ) || * is_sorted {
51+ return Ok ( ColumnarValue :: Array ( arr_arg) ) ;
52+ }
53+
4654 let maps_arg_entries = maps_arg. entries ( ) ;
4755 let maps_arg_offsets = maps_arg. offsets ( ) ;
4856
49- let mut sorted_map_entries_vec: Vec < ArrayRef > = Vec :: with_capacity ( maps_arg. len ( ) ) ;
57+ let sort_options = SortOptions {
58+ descending : false ,
59+ nulls_first : true ,
60+ } ;
61+
62+ // Build one global permutation over the full entries struct, respecting per-map boundaries,
63+ // then issue a single `take`. This avoids per-map struct copies and a final `concat`.
64+ let mut global_indices: Vec < u32 > = Vec :: with_capacity ( maps_arg_entries. len ( ) ) ;
5065
5166 for idx in 0 ..maps_arg. len ( ) {
5267 let map_start = maps_arg_offsets[ idx] as usize ;
5368 let map_end = maps_arg_offsets[ idx + 1 ] as usize ;
54- let map_len = map_end - map_start;
55-
56- let map_entries = maps_arg_entries. slice ( map_start, map_len) ;
57-
58- if map_len == 0 {
59- sorted_map_entries_vec. push ( Arc :: new ( map_entries) ) ;
69+ if map_end == map_start {
6070 continue ;
6171 }
6272
63- let map_keys = map_entries. column ( 0 ) ;
64- let sort_options = SortOptions {
65- descending : false ,
66- nulls_first : true ,
67- } ;
68- let sorted_indices = sort_to_indices ( & map_keys, Some ( sort_options) , None ) ?;
69-
70- let sorted_map_entries = take ( & map_entries, & sorted_indices, None ) ?;
71- sorted_map_entries_vec. push ( sorted_map_entries) ;
73+ let map_keys = maps_arg_entries
74+ . column ( 0 )
75+ . slice ( map_start, map_end - map_start) ;
76+ let local_indices = sort_to_indices ( & map_keys, Some ( sort_options) , None ) ?;
77+ global_indices. extend ( local_indices. values ( ) . iter ( ) . map ( |i| map_start as u32 + * i) ) ;
7278 }
7379
74- let sorted_map_entries_arr: Vec < & dyn Array > = sorted_map_entries_vec
75- . iter ( )
76- . map ( |arr| arr. as_ref ( ) )
77- . collect ( ) ;
78- let combined_sorted_map_entries = concat ( & sorted_map_entries_arr) ?;
79- let sorted_map_struct = combined_sorted_map_entries
80+ let indices = UInt32Array :: from ( global_indices) ;
81+ let sorted_entries = take ( maps_arg_entries, & indices, None ) ?;
82+ let sorted_map_struct = sorted_entries
8083 . as_any ( )
8184 . downcast_ref :: < StructArray > ( )
82- . unwrap ( ) ;
85+ . expect ( "invariant: take on StructArray must return StructArray" ) ;
8386
8487 // Preserve the original is_sorted flag to keep schema consistent
8588 let sorted_map_arr = Arc :: new ( MapArray :: try_new (
86- Arc :: < arrow :: datatypes :: Field > :: clone ( map_field) ,
89+ Arc :: clone ( map_field) ,
8790 maps_arg. offsets ( ) . clone ( ) ,
8891 sorted_map_struct. clone ( ) ,
8992 maps_arg. nulls ( ) . cloned ( ) ,
0 commit comments