@@ -93,6 +93,57 @@ fn array_cat(exprs: Vec<PyExpr>) -> PyExpr {
9393 array_concat ( exprs)
9494}
9595
96+ #[ pyfunction]
97+ fn array_distance ( array1 : PyExpr , array2 : PyExpr ) -> PyExpr {
98+ let args = vec ! [ array1. into( ) , array2. into( ) ] ;
99+ Expr :: ScalarFunction ( datafusion:: logical_expr:: expr:: ScalarFunction :: new_udf (
100+ datafusion:: functions_nested:: distance:: array_distance_udf ( ) ,
101+ args,
102+ ) )
103+ . into ( )
104+ }
105+
106+ #[ pyfunction]
107+ fn arrays_zip ( exprs : Vec < PyExpr > ) -> PyExpr {
108+ let exprs = exprs. into_iter ( ) . map ( |x| x. into ( ) ) . collect ( ) ;
109+ datafusion:: functions_nested:: expr_fn:: arrays_zip ( exprs) . into ( )
110+ }
111+
112+ #[ pyfunction]
113+ #[ pyo3( signature = ( string, delimiter, null_string=None ) ) ]
114+ fn string_to_array ( string : PyExpr , delimiter : PyExpr , null_string : Option < PyExpr > ) -> PyExpr {
115+ let mut args = vec ! [ string. into( ) , delimiter. into( ) ] ;
116+ if let Some ( null_string) = null_string {
117+ args. push ( null_string. into ( ) ) ;
118+ }
119+ Expr :: ScalarFunction ( datafusion:: logical_expr:: expr:: ScalarFunction :: new_udf (
120+ datafusion:: functions_nested:: string:: string_to_array_udf ( ) ,
121+ args,
122+ ) )
123+ . into ( )
124+ }
125+
126+ #[ pyfunction]
127+ #[ pyo3( signature = ( start, stop, step=None ) ) ]
128+ fn gen_series ( start : PyExpr , stop : PyExpr , step : Option < PyExpr > ) -> PyExpr {
129+ let mut args = vec ! [ start. into( ) , stop. into( ) ] ;
130+ if let Some ( step) = step {
131+ args. push ( step. into ( ) ) ;
132+ }
133+ Expr :: ScalarFunction ( datafusion:: logical_expr:: expr:: ScalarFunction :: new_udf (
134+ datafusion:: functions_nested:: range:: gen_series_udf ( ) ,
135+ args,
136+ ) )
137+ . into ( )
138+ }
139+
140+ #[ pyfunction]
141+ fn make_map ( keys : Vec < PyExpr > , values : Vec < PyExpr > ) -> PyExpr {
142+ let keys = keys. into_iter ( ) . map ( |x| x. into ( ) ) . collect ( ) ;
143+ let values = values. into_iter ( ) . map ( |x| x. into ( ) ) . collect ( ) ;
144+ datafusion:: functions_nested:: map:: map ( keys, values) . into ( )
145+ }
146+
96147#[ pyfunction]
97148#[ pyo3( signature = ( array, element, index=None ) ) ]
98149fn array_position ( array : PyExpr , element : PyExpr , index : Option < i64 > ) -> PyExpr {
@@ -644,8 +695,29 @@ expr_fn_vec!(named_struct);
644695expr_fn ! ( from_unixtime, unixtime) ;
645696expr_fn ! ( arrow_typeof, arg_1) ;
646697expr_fn ! ( arrow_cast, arg_1 datatype) ;
698+ expr_fn_vec ! ( arrow_metadata) ;
699+ expr_fn ! ( union_tag, arg1) ;
647700expr_fn ! ( random) ;
648701
702+ #[ pyfunction]
703+ fn get_field ( expr : PyExpr , name : PyExpr ) -> PyExpr {
704+ functions:: core:: get_field ( )
705+ . call ( vec ! [ expr. into( ) , name. into( ) ] )
706+ . into ( )
707+ }
708+
709+ #[ pyfunction]
710+ fn union_extract ( union_expr : PyExpr , field_name : PyExpr ) -> PyExpr {
711+ functions:: core:: union_extract ( )
712+ . call ( vec ! [ union_expr. into( ) , field_name. into( ) ] )
713+ . into ( )
714+ }
715+
716+ #[ pyfunction]
717+ fn version ( ) -> PyExpr {
718+ functions:: core:: version ( ) . call ( vec ! [ ] ) . into ( )
719+ }
720+
649721// Array Functions
650722array_fn ! ( array_append, array element) ;
651723array_fn ! ( array_to_string, array delimiter) ;
@@ -674,10 +746,20 @@ array_fn!(array_intersect, first_array second_array);
674746array_fn ! ( array_union, array1 array2) ;
675747array_fn ! ( array_except, first_array second_array) ;
676748array_fn ! ( array_resize, array size value) ;
749+ array_fn ! ( array_any_value, array) ;
750+ array_fn ! ( array_max, array) ;
751+ array_fn ! ( array_min, array) ;
752+ array_fn ! ( array_reverse, array) ;
677753array_fn ! ( cardinality, array) ;
678754array_fn ! ( flatten, array) ;
679755array_fn ! ( range, start stop step) ;
680756
757+ // Map Functions
758+ array_fn ! ( map_keys, map) ;
759+ array_fn ! ( map_values, map) ;
760+ array_fn ! ( map_extract, map key) ;
761+ array_fn ! ( map_entries, map) ;
762+
681763aggregate_function ! ( array_agg) ;
682764aggregate_function ! ( max) ;
683765aggregate_function ! ( min) ;
@@ -709,9 +791,10 @@ aggregate_function!(var_pop);
709791aggregate_function ! ( approx_distinct) ;
710792aggregate_function ! ( approx_median) ;
711793
712- // Code is commented out since grouping is not yet implemented
713- // https://github.com/apache/datafusion-python/issues/861
714- // aggregate_function!(grouping);
794+ // The grouping function's physical plan is not implemented, but the
795+ // ResolveGroupingFunction analyzer rule rewrites it before the physical
796+ // planner sees it, so it works correctly at runtime.
797+ aggregate_function ! ( grouping) ;
715798
716799#[ pyfunction]
717800#[ pyo3( signature = ( sort_expression, percentile, num_centroids=None , filter=None ) ) ]
@@ -749,6 +832,19 @@ pub fn approx_percentile_cont_with_weight(
749832 add_builder_fns_to_aggregate ( agg_fn, None , filter, None , None )
750833}
751834
835+ #[ pyfunction]
836+ #[ pyo3( signature = ( sort_expression, percentile, filter=None ) ) ]
837+ pub fn percentile_cont (
838+ sort_expression : PySortExpr ,
839+ percentile : f64 ,
840+ filter : Option < PyExpr > ,
841+ ) -> PyDataFusionResult < PyExpr > {
842+ let agg_fn =
843+ functions_aggregate:: expr_fn:: percentile_cont ( sort_expression. sort , lit ( percentile) ) ;
844+
845+ add_builder_fns_to_aggregate ( agg_fn, None , filter, None , None )
846+ }
847+
752848// We handle last_value explicitly because the signature expects an order_by
753849// https://github.com/apache/datafusion/issues/12376
754850#[ pyfunction]
@@ -949,10 +1045,12 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
9491045 m. add_wrapped ( wrap_pyfunction ! ( approx_median) ) ?;
9501046 m. add_wrapped ( wrap_pyfunction ! ( approx_percentile_cont) ) ?;
9511047 m. add_wrapped ( wrap_pyfunction ! ( approx_percentile_cont_with_weight) ) ?;
1048+ m. add_wrapped ( wrap_pyfunction ! ( percentile_cont) ) ?;
9521049 m. add_wrapped ( wrap_pyfunction ! ( range) ) ?;
9531050 m. add_wrapped ( wrap_pyfunction ! ( array_agg) ) ?;
9541051 m. add_wrapped ( wrap_pyfunction ! ( arrow_typeof) ) ?;
9551052 m. add_wrapped ( wrap_pyfunction ! ( arrow_cast) ) ?;
1053+ m. add_wrapped ( wrap_pyfunction ! ( arrow_metadata) ) ?;
9561054 m. add_wrapped ( wrap_pyfunction ! ( ascii) ) ?;
9571055 m. add_wrapped ( wrap_pyfunction ! ( asin) ) ?;
9581056 m. add_wrapped ( wrap_pyfunction ! ( asinh) ) ?;
@@ -997,7 +1095,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
9971095 m. add_wrapped ( wrap_pyfunction ! ( from_unixtime) ) ?;
9981096 m. add_wrapped ( wrap_pyfunction ! ( gcd) ) ?;
9991097 m. add_wrapped ( wrap_pyfunction ! ( greatest) ) ?;
1000- // m.add_wrapped(wrap_pyfunction!(grouping))?;
1098+ m. add_wrapped ( wrap_pyfunction ! ( grouping) ) ?;
10011099 m. add_wrapped ( wrap_pyfunction ! ( in_list) ) ?;
10021100 m. add_wrapped ( wrap_pyfunction ! ( initcap) ) ?;
10031101 m. add_wrapped ( wrap_pyfunction ! ( isnan) ) ?;
@@ -1081,6 +1179,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
10811179 m. add_wrapped ( wrap_pyfunction ! ( trim) ) ?;
10821180 m. add_wrapped ( wrap_pyfunction ! ( trunc) ) ?;
10831181 m. add_wrapped ( wrap_pyfunction ! ( upper) ) ?;
1182+ m. add_wrapped ( wrap_pyfunction ! ( get_field) ) ?;
1183+ m. add_wrapped ( wrap_pyfunction ! ( union_extract) ) ?;
1184+ m. add_wrapped ( wrap_pyfunction ! ( union_tag) ) ?;
1185+ m. add_wrapped ( wrap_pyfunction ! ( version) ) ?;
10841186 m. add_wrapped ( wrap_pyfunction ! ( self :: uuid) ) ?; // Use self to avoid name collision
10851187 m. add_wrapped ( wrap_pyfunction ! ( var_pop) ) ?;
10861188 m. add_wrapped ( wrap_pyfunction ! ( var_sample) ) ?;
@@ -1139,9 +1241,24 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
11391241 m. add_wrapped ( wrap_pyfunction ! ( array_replace_all) ) ?;
11401242 m. add_wrapped ( wrap_pyfunction ! ( array_sort) ) ?;
11411243 m. add_wrapped ( wrap_pyfunction ! ( array_slice) ) ?;
1244+ m. add_wrapped ( wrap_pyfunction ! ( array_any_value) ) ?;
1245+ m. add_wrapped ( wrap_pyfunction ! ( array_distance) ) ?;
1246+ m. add_wrapped ( wrap_pyfunction ! ( array_max) ) ?;
1247+ m. add_wrapped ( wrap_pyfunction ! ( array_min) ) ?;
1248+ m. add_wrapped ( wrap_pyfunction ! ( array_reverse) ) ?;
1249+ m. add_wrapped ( wrap_pyfunction ! ( arrays_zip) ) ?;
1250+ m. add_wrapped ( wrap_pyfunction ! ( string_to_array) ) ?;
1251+ m. add_wrapped ( wrap_pyfunction ! ( gen_series) ) ?;
11421252 m. add_wrapped ( wrap_pyfunction ! ( flatten) ) ?;
11431253 m. add_wrapped ( wrap_pyfunction ! ( cardinality) ) ?;
11441254
1255+ // Map Functions
1256+ m. add_wrapped ( wrap_pyfunction ! ( make_map) ) ?;
1257+ m. add_wrapped ( wrap_pyfunction ! ( map_keys) ) ?;
1258+ m. add_wrapped ( wrap_pyfunction ! ( map_values) ) ?;
1259+ m. add_wrapped ( wrap_pyfunction ! ( map_extract) ) ?;
1260+ m. add_wrapped ( wrap_pyfunction ! ( map_entries) ) ?;
1261+
11451262 // Window Functions
11461263 m. add_wrapped ( wrap_pyfunction ! ( lead) ) ?;
11471264 m. add_wrapped ( wrap_pyfunction ! ( lag) ) ?;
0 commit comments