Skip to content

Commit 271e86b

Browse files
committed
Merge branch 'main' into feat/expose-agg-fns
2 parents d16cff1 + d07fdb3 commit 271e86b

File tree

5 files changed

+1122
-10
lines changed

5 files changed

+1122
-10
lines changed

AGENTS.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,20 @@ Skills follow the [Agent Skills](https://agentskills.io) open standard. Each ski
2525

2626
- `SKILL.md` — The skill definition with YAML frontmatter (name, description, argument-hint) and detailed instructions.
2727
- Additional supporting files as needed.
28+
29+
## Python Function Docstrings
30+
31+
Every Python function must include a docstring with usage examples.
32+
33+
- **Examples are required**: Each function needs at least one doctest-style example
34+
demonstrating basic usage.
35+
- **Optional parameters**: If a function has optional parameters, include separate
36+
examples that show usage both without and with the optional arguments. Pass
37+
optional arguments using their keyword name (e.g., `step=dfn.lit(3)`) so readers
38+
can immediately see which parameter is being demonstrated.
39+
- **Reuse input data**: Use the same input data across examples wherever possible.
40+
The examples should demonstrate how different optional arguments change the output
41+
for the same input, making the effect of each option easy to understand.
42+
- **Alias functions**: Functions that are simple aliases (e.g., `list_sort` aliasing
43+
`array_sort`) only need a one-line description and a `See Also` reference to the
44+
primary function. They do not need their own examples.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ needing to activate the virtual environment:
275275

276276
```bash
277277
uv run --no-project maturin develop --uv
278-
uv run --no-project pytest .
278+
uv run --no-project pytest
279279
```
280280

281281
To run the FFI tests within the examples folder, after you have built

crates/core/src/functions.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,57 @@ fn array_cat(exprs: Vec<PyExpr>) -> PyExpr {
9393
array_concat(exprs)
9494
}
9595

96+
#[pyfunction]
97+
fn array_distance(array1: PyExpr, array2: PyExpr) -> PyExpr {
98+
let args = vec![array1.into(), array2.into()];
99+
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
100+
datafusion::functions_nested::distance::array_distance_udf(),
101+
args,
102+
))
103+
.into()
104+
}
105+
106+
#[pyfunction]
107+
fn arrays_zip(exprs: Vec<PyExpr>) -> PyExpr {
108+
let exprs = exprs.into_iter().map(|x| x.into()).collect();
109+
datafusion::functions_nested::expr_fn::arrays_zip(exprs).into()
110+
}
111+
112+
#[pyfunction]
113+
#[pyo3(signature = (string, delimiter, null_string=None))]
114+
fn string_to_array(string: PyExpr, delimiter: PyExpr, null_string: Option<PyExpr>) -> PyExpr {
115+
let mut args = vec![string.into(), delimiter.into()];
116+
if let Some(null_string) = null_string {
117+
args.push(null_string.into());
118+
}
119+
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
120+
datafusion::functions_nested::string::string_to_array_udf(),
121+
args,
122+
))
123+
.into()
124+
}
125+
126+
#[pyfunction]
127+
#[pyo3(signature = (start, stop, step=None))]
128+
fn gen_series(start: PyExpr, stop: PyExpr, step: Option<PyExpr>) -> PyExpr {
129+
let mut args = vec![start.into(), stop.into()];
130+
if let Some(step) = step {
131+
args.push(step.into());
132+
}
133+
Expr::ScalarFunction(datafusion::logical_expr::expr::ScalarFunction::new_udf(
134+
datafusion::functions_nested::range::gen_series_udf(),
135+
args,
136+
))
137+
.into()
138+
}
139+
140+
#[pyfunction]
141+
fn make_map(keys: Vec<PyExpr>, values: Vec<PyExpr>) -> PyExpr {
142+
let keys = keys.into_iter().map(|x| x.into()).collect();
143+
let values = values.into_iter().map(|x| x.into()).collect();
144+
datafusion::functions_nested::map::map(keys, values).into()
145+
}
146+
96147
#[pyfunction]
97148
#[pyo3(signature = (array, element, index=None))]
98149
fn array_position(array: PyExpr, element: PyExpr, index: Option<i64>) -> PyExpr {
@@ -644,8 +695,29 @@ expr_fn_vec!(named_struct);
644695
expr_fn!(from_unixtime, unixtime);
645696
expr_fn!(arrow_typeof, arg_1);
646697
expr_fn!(arrow_cast, arg_1 datatype);
698+
expr_fn_vec!(arrow_metadata);
699+
expr_fn!(union_tag, arg1);
647700
expr_fn!(random);
648701

702+
#[pyfunction]
703+
fn get_field(expr: PyExpr, name: PyExpr) -> PyExpr {
704+
functions::core::get_field()
705+
.call(vec![expr.into(), name.into()])
706+
.into()
707+
}
708+
709+
#[pyfunction]
710+
fn union_extract(union_expr: PyExpr, field_name: PyExpr) -> PyExpr {
711+
functions::core::union_extract()
712+
.call(vec![union_expr.into(), field_name.into()])
713+
.into()
714+
}
715+
716+
#[pyfunction]
717+
fn version() -> PyExpr {
718+
functions::core::version().call(vec![]).into()
719+
}
720+
649721
// Array Functions
650722
array_fn!(array_append, array element);
651723
array_fn!(array_to_string, array delimiter);
@@ -674,10 +746,20 @@ array_fn!(array_intersect, first_array second_array);
674746
array_fn!(array_union, array1 array2);
675747
array_fn!(array_except, first_array second_array);
676748
array_fn!(array_resize, array size value);
749+
array_fn!(array_any_value, array);
750+
array_fn!(array_max, array);
751+
array_fn!(array_min, array);
752+
array_fn!(array_reverse, array);
677753
array_fn!(cardinality, array);
678754
array_fn!(flatten, array);
679755
array_fn!(range, start stop step);
680756

757+
// Map Functions
758+
array_fn!(map_keys, map);
759+
array_fn!(map_values, map);
760+
array_fn!(map_extract, map key);
761+
array_fn!(map_entries, map);
762+
681763
aggregate_function!(array_agg);
682764
aggregate_function!(max);
683765
aggregate_function!(min);
@@ -968,6 +1050,7 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
9681050
m.add_wrapped(wrap_pyfunction!(array_agg))?;
9691051
m.add_wrapped(wrap_pyfunction!(arrow_typeof))?;
9701052
m.add_wrapped(wrap_pyfunction!(arrow_cast))?;
1053+
m.add_wrapped(wrap_pyfunction!(arrow_metadata))?;
9711054
m.add_wrapped(wrap_pyfunction!(ascii))?;
9721055
m.add_wrapped(wrap_pyfunction!(asin))?;
9731056
m.add_wrapped(wrap_pyfunction!(asinh))?;
@@ -1096,6 +1179,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
10961179
m.add_wrapped(wrap_pyfunction!(trim))?;
10971180
m.add_wrapped(wrap_pyfunction!(trunc))?;
10981181
m.add_wrapped(wrap_pyfunction!(upper))?;
1182+
m.add_wrapped(wrap_pyfunction!(get_field))?;
1183+
m.add_wrapped(wrap_pyfunction!(union_extract))?;
1184+
m.add_wrapped(wrap_pyfunction!(union_tag))?;
1185+
m.add_wrapped(wrap_pyfunction!(version))?;
10991186
m.add_wrapped(wrap_pyfunction!(self::uuid))?; // Use self to avoid name collision
11001187
m.add_wrapped(wrap_pyfunction!(var_pop))?;
11011188
m.add_wrapped(wrap_pyfunction!(var_sample))?;
@@ -1154,9 +1241,24 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
11541241
m.add_wrapped(wrap_pyfunction!(array_replace_all))?;
11551242
m.add_wrapped(wrap_pyfunction!(array_sort))?;
11561243
m.add_wrapped(wrap_pyfunction!(array_slice))?;
1244+
m.add_wrapped(wrap_pyfunction!(array_any_value))?;
1245+
m.add_wrapped(wrap_pyfunction!(array_distance))?;
1246+
m.add_wrapped(wrap_pyfunction!(array_max))?;
1247+
m.add_wrapped(wrap_pyfunction!(array_min))?;
1248+
m.add_wrapped(wrap_pyfunction!(array_reverse))?;
1249+
m.add_wrapped(wrap_pyfunction!(arrays_zip))?;
1250+
m.add_wrapped(wrap_pyfunction!(string_to_array))?;
1251+
m.add_wrapped(wrap_pyfunction!(gen_series))?;
11571252
m.add_wrapped(wrap_pyfunction!(flatten))?;
11581253
m.add_wrapped(wrap_pyfunction!(cardinality))?;
11591254

1255+
// Map Functions
1256+
m.add_wrapped(wrap_pyfunction!(make_map))?;
1257+
m.add_wrapped(wrap_pyfunction!(map_keys))?;
1258+
m.add_wrapped(wrap_pyfunction!(map_values))?;
1259+
m.add_wrapped(wrap_pyfunction!(map_extract))?;
1260+
m.add_wrapped(wrap_pyfunction!(map_entries))?;
1261+
11601262
// Window Functions
11611263
m.add_wrapped(wrap_pyfunction!(lead))?;
11621264
m.add_wrapped(wrap_pyfunction!(lag))?;

0 commit comments

Comments
 (0)