apache
diff --git a/‎docs/source/user-guide/latest/compatibility/expressions/map.md‎
Lines changed: 11 additions & 0 deletions b/‎docs/source/user-guide/latest/compatibility/expressions/map.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎native/spark-expr/Cargo.toml‎
Lines changed: 4 additions & 0 deletions b/‎native/spark-expr/Cargo.toml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎native/spark-expr/benches/map_sort.rs‎
Lines changed: 107 additions & 0 deletions b/‎native/spark-expr/benches/map_sort.rs‎
Lines changed: 107 additions & 0 deletions
diff --git a/‎native/spark-expr/src/comet_scalar_funcs.rs‎
Lines changed: 5 additions & 0 deletions b/‎native/spark-expr/src/comet_scalar_funcs.rs‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎native/spark-expr/src/lib.rs‎
Lines changed: 2 additions & 0 deletions b/‎native/spark-expr/src/lib.rs‎
Lines changed: 2 additions & 0 deletions
@@ -19,5 +19,16 @@ under the License.
 
 # Map Expressions
 
+## MapSort (Spark 4.0+)
+
+Spark 4.0 inserts `MapSort` to normalize map values when they appear in shuffle hash partitioning
+keys, in `try_element_at`, and in other contexts where map ordering must be deterministic. Comet
+runs `MapSort` natively, so map shuffle and group-by-on-map stay on Comet under Spark 4.0.
+
+When `spark.comet.exec.strictFloatingPoint=true`, `MapSort` falls back to Spark for maps whose
+keys contain `Float` or `Double` (consistent with `SortOrder` and `SortArray`). Arrow's sort uses
+IEEE total ordering for floating-point, which differs from Spark's `Double.compare` semantics for
+`NaN` and `-0.0`.
+
 <!--BEGIN:EXPR_COMPAT[map]-->
 <!--END:EXPR_COMPAT-->
@@ -112,3 +112,7 @@ harness = false
 [[bench]]
 name = "cast_non_int_numeric_timestamp"
 harness = false
+
+[[bench]]
+name = "map_sort"
+harness = false
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Benchmarks for spark_map_sort.
+
+use arrow::array::builder::{Int32Builder, MapBuilder, StringBuilder};
+use arrow::array::{ArrayRef, MapArray, MapFieldNames};
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
+use datafusion::physical_plan::ColumnarValue;
+use datafusion_comet_spark_expr::spark_map_sort;
+use std::hint::black_box;
+use std::sync::Arc;
+
+const BATCH_SIZE: usize = 8192;
+
+fn map_field_names() -> MapFieldNames {
+    MapFieldNames {
+        entry: "entries".into(),
+        key: "key".into(),
+        value: "value".into(),
+    }
+}
+
+/// Build a MapArray with `BATCH_SIZE` rows where each map has `entries_per_map` entries.
+/// Keys are integers in reverse order so every map needs a real sort.
+fn build_int_key_map(entries_per_map: usize) -> MapArray {
+    let mut builder = MapBuilder::new(
+        Some(map_field_names()),
+        Int32Builder::new(),
+        Int32Builder::new(),
+    );
+    for row in 0..BATCH_SIZE {
+        for entry_idx in 0..entries_per_map {
+            // Reverse order so input is unsorted; vary across rows so different maps differ.
+            let key = (entries_per_map - entry_idx) as i32 + (row % 7) as i32;
+            let value = entry_idx as i32;
+            builder.keys().append_value(key);
+            builder.values().append_value(value);
+        }
+        builder.append(true).unwrap();
+    }
+    builder.finish()
+}
+
+/// Same shape as `build_int_key_map` but with string keys.
+fn build_string_key_map(entries_per_map: usize) -> MapArray {
+    let mut builder = MapBuilder::new(
+        Some(map_field_names()),
+        StringBuilder::new(),
+        Int32Builder::new(),
+    );
+    for row in 0..BATCH_SIZE {
+        for entry_idx in 0..entries_per_map {
+            let key = format!("key_{:04}", entries_per_map - entry_idx + (row % 7));
+            let value = entry_idx as i32;
+            builder.keys().append_value(&key);
+            builder.values().append_value(value);
+        }
+        builder.append(true).unwrap();
+    }
+    builder.finish()
+}
+
+fn bench_map_sort(c: &mut Criterion) {
+    let mut group = c.benchmark_group("spark_map_sort");
+
+    for entries in [4usize, 16, 64] {
+        let int_map: ArrayRef = Arc::new(build_int_key_map(entries));
+        group.bench_with_input(
+            BenchmarkId::new("int_keys", entries),
+            &int_map,
+            |b, array| {
+                let args = vec![ColumnarValue::Array(Arc::clone(array))];
+                b.iter(|| black_box(spark_map_sort(black_box(&args)).unwrap()));
+            },
+        );
+
+        let string_map: ArrayRef = Arc::new(build_string_key_map(entries));
+        group.bench_with_input(
+            BenchmarkId::new("string_keys", entries),
+            &string_map,
+            |b, array| {
+                let args = vec![ColumnarValue::Array(Arc::clone(array))];
+                b.iter(|| black_box(spark_map_sort(black_box(&args)).unwrap()));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_map_sort);
+criterion_main!(benches);
@@ -16,6 +16,7 @@
 // under the License.
 
 use crate::hash_funcs::*;
+use crate::map_funcs::spark_map_sort;
 use crate::math_funcs::abs::abs;
 use crate::math_funcs::checked_arithmetic::{checked_add, checked_div, checked_mul, checked_sub};
 use crate::math_funcs::log::spark_log;
@@ -191,6 +192,10 @@ pub fn create_comet_physical_fun_with_eval_mode(
             let func = Arc::new(crate::string_funcs::spark_get_json_object);
             make_comet_scalar_udf!("get_json_object", func, without data_type)
         }
+        "map_sort" => {
+            let func = Arc::new(spark_map_sort);
+            make_comet_scalar_udf!("spark_map_sort", func, without data_type)
+        }
         _ => registry.udf(fun_name).map_err(|e| {
             DataFusionError::Execution(format!(
                 "Function {fun_name} not found in the registry: {e}",
 
@@ -57,6 +57,8 @@ pub use bloom_filter::{BloomFilterAgg, BloomFilterMightContain};
 
 mod conditional_funcs;
 mod conversion_funcs;
+mod map_funcs;
+pub use map_funcs::spark_map_sort;
 mod math_funcs;
 mod nondetermenistic_funcs;