Skip to content

Commit ae6237f

Browse files
andygroveclaude
andcommitted
feat: add MapSort expression support for Spark 4.0
Add native map_sort scalar function that sorts map entries by key in ascending order, and wire it up via the Spark 4.0 CometExprShim so that MapSort expressions are accelerated instead of falling back to Spark. Re-enable all CometColumnarShuffleSuite map tests that were skipped for Spark 4.0. Closes #1941 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 5076f63 commit ae6237f

6 files changed

Lines changed: 742 additions & 60 deletions

File tree

native/spark-expr/src/comet_scalar_funcs.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
use crate::hash_funcs::*;
19+
use crate::map_funcs::spark_map_sort;
1920
use crate::math_funcs::abs::abs;
2021
use crate::math_funcs::checked_arithmetic::{checked_add, checked_div, checked_mul, checked_sub};
2122
use crate::math_funcs::log::spark_log;
@@ -191,6 +192,10 @@ pub fn create_comet_physical_fun_with_eval_mode(
191192
let func = Arc::new(crate::string_funcs::spark_get_json_object);
192193
make_comet_scalar_udf!("get_json_object", func, without data_type)
193194
}
195+
"map_sort" => {
196+
let func = Arc::new(spark_map_sort);
197+
make_comet_scalar_udf!("spark_map_sort", func, without data_type)
198+
}
194199
_ => registry.udf(fun_name).map_err(|e| {
195200
DataFusionError::Execution(format!(
196201
"Function {fun_name} not found in the registry: {e}",

native/spark-expr/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ pub use bloom_filter::{BloomFilterAgg, BloomFilterMightContain};
5757

5858
mod conditional_funcs;
5959
mod conversion_funcs;
60+
mod map_funcs;
6061
mod math_funcs;
6162
mod nondetermenistic_funcs;
6263

0 commit comments

Comments
 (0)