-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Add lambda support and array_transform udf #18921
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
dbf2aa5
fa4a8fb
b18d214
d844b2d
e1921eb
1f19c64
570cc53
83dfbdd
34137e1
3ded115
82930ec
86d5999
60cabc0
41152c3
2be9e54
90eb08f
d874db7
a59ffe8
cd22c04
0188d40
6f2c92b
b3bdc48
9728a2e
811aa0a
f724ef5
5380884
d75dfe3
a241a51
547c148
6ae73cb
39db62b
7a7d371
83fb18d
e76ff25
51dfa81
6c32ef8
27a3e24
93e66f7
a9d0e6c
69c44fc
66839d3
f0cf8d7
1b7f4bf
6d7c52a
7255820
96d8ad2
474b22e
954a360
5c2c72a
2259f70
8571853
3c9fe39
3486e53
5c0b41d
ca260a7
5a6f470
98d365a
9ad1d6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -398,6 +398,30 @@ pub enum Expr { | |
| OuterReferenceColumn(FieldRef, Column), | ||
| /// Unnest expression | ||
| Unnest(Unnest), | ||
| /// Lambda expression | ||
| Lambda(Lambda), | ||
| LambdaColumn(LambdaColumn), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gstvg Would
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, done in d844b2d |
||
| } | ||
|
|
||
| #[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)] | ||
| pub struct LambdaColumn { | ||
| pub name: String, | ||
| pub field: FieldRef, | ||
| pub spans: Spans, | ||
| } | ||
|
|
||
| impl LambdaColumn { | ||
| pub fn new(name: String, field: FieldRef) -> Self { | ||
| Self { | ||
| name, | ||
| field, | ||
| spans: Spans::new(), | ||
| } | ||
| } | ||
|
|
||
| pub fn spans_mut(&mut self) -> &mut Spans { | ||
| &mut self.spans | ||
| } | ||
| } | ||
|
|
||
| impl Default for Expr { | ||
|
|
@@ -1211,6 +1235,23 @@ impl GroupingSet { | |
| } | ||
| } | ||
|
|
||
| /// Lambda expression. | ||
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] | ||
| pub struct Lambda { | ||
| pub params: Vec<String>, | ||
| pub body: Box<Expr>, | ||
| } | ||
|
|
||
| impl Lambda { | ||
| /// Create a new lambda expression | ||
| pub fn new(params: Vec<String>, body: Expr) -> Self { | ||
| Self { | ||
| params, | ||
| body: Box::new(body), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] | ||
| #[cfg(not(feature = "sql"))] | ||
| pub struct IlikeSelectItem { | ||
|
|
@@ -1525,6 +1566,8 @@ impl Expr { | |
| #[expect(deprecated)] | ||
| Expr::Wildcard { .. } => "Wildcard", | ||
| Expr::Unnest { .. } => "Unnest", | ||
| Expr::Lambda { .. } => "Lambda", | ||
| Expr::LambdaColumn { .. } => "LambdaColumn", | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -2078,7 +2121,9 @@ impl Expr { | |
| | Expr::Wildcard { .. } | ||
| | Expr::WindowFunction(..) | ||
| | Expr::Literal(..) | ||
| | Expr::Placeholder(..) => false, | ||
| | Expr::Placeholder(..) | ||
| | Expr::Lambda(..) | ||
| | Expr::LambdaColumn(..) => false, | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -2674,6 +2719,17 @@ impl HashNode for Expr { | |
| column.hash(state); | ||
| } | ||
| Expr::Unnest(Unnest { expr: _expr }) => {} | ||
| Expr::Lambda(Lambda { params, body: _ }) => { | ||
| params.hash(state); | ||
| } | ||
| Expr::LambdaColumn(LambdaColumn { | ||
| name, | ||
| field, | ||
| spans: _, | ||
| }) => { | ||
| name.hash(state); | ||
| field.hash(state); | ||
| } | ||
| }; | ||
| } | ||
| } | ||
|
|
@@ -2987,6 +3043,12 @@ impl Display for SchemaDisplay<'_> { | |
| } | ||
| } | ||
| } | ||
| Expr::Lambda(Lambda { params, body }) => { | ||
| write!(f, "({}) -> {body}", display_comma_separated(params)) | ||
| } | ||
| Expr::LambdaColumn(c) => { | ||
| write!(f, "{}", c.name) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -3167,6 +3229,9 @@ impl Display for SqlDisplay<'_> { | |
| } | ||
| } | ||
| } | ||
| Expr::Lambda(Lambda { params, body }) => { | ||
| write!(f, "({}) -> {}", params.join(", "), SchemaDisplay(body)) | ||
| } | ||
| _ => write!(f, "{}", self.0), | ||
| } | ||
| } | ||
|
|
@@ -3474,6 +3539,12 @@ impl Display for Expr { | |
| Expr::Unnest(Unnest { expr }) => { | ||
| write!(f, "{UNNEST_COLUMN_PREFIX}({expr})") | ||
| } | ||
| Expr::Lambda(Lambda { params, body }) => { | ||
| write!(f, "({}) -> {body}", params.join(", ")) | ||
| } | ||
| Expr::LambdaColumn(c) => { | ||
| write!(f, "{}", c.name) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,17 +18,22 @@ | |
| use super::{Between, Expr, Like}; | ||
| use crate::expr::{ | ||
| AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList, | ||
| InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, | ||
| InSubquery, Lambda, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction, | ||
| WindowFunctionParams, | ||
| }; | ||
| use crate::expr::{FieldMetadata, LambdaColumn}; | ||
| use crate::type_coercion::functions::{ | ||
| data_types_with_scalar_udf, fields_with_aggregate_udf, fields_with_window_udf, | ||
| fields_with_aggregate_udf, fields_with_window_udf, | ||
| }; | ||
| use crate::{ | ||
| type_coercion::functions::data_types_with_scalar_udf, udf::ReturnFieldArgs, utils, | ||
| LogicalPlan, Projection, Subquery, WindowFunctionDefinition, | ||
| }; | ||
| use arrow::datatypes::FieldRef; | ||
| use arrow::{ | ||
| compute::can_cast_types, | ||
| datatypes::{DataType, Field}, | ||
| }; | ||
| use crate::udf::ReturnFieldArgs; | ||
| use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition}; | ||
| use arrow::compute::can_cast_types; | ||
| use arrow::datatypes::{DataType, Field, FieldRef}; | ||
| use datafusion_common::metadata::FieldMetadata; | ||
| use datafusion_common::{ | ||
| not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema, | ||
| Result, Spans, TableReference, | ||
|
|
@@ -229,6 +234,10 @@ impl ExprSchemable for Expr { | |
| // Grouping sets do not really have a type and do not appear in projections | ||
| Ok(DataType::Null) | ||
| } | ||
| Expr::Lambda(Lambda { params: _, body }) => body.get_type(schema), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it an issue that this is technically incorrect? The type of the lambda expression itself is actually a function type, where here it's presented as just its return type. Can this lead to incorrect type analysis? I haven't thought this through fully, but would it maybe be better to not have lambda functions be an
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's either this, .map(|arg: Expr| {
let field = arg.to_field(schema)?.1;
match arg {
Expr::Lambda(_lambda) => Ok(ValueOrLambda::Lambda(field)),
_ => Ok(ValueOrLambda::Value(field)),
}
})We can just change it to .map(|arg: Expr| {
match arg {
Expr::Lambda(lambda) => Ok(ValueOrLambda::Lambda(lambda.body.to_field(schema)?.1)),
_ => Ok(ValueOrLambda::Value(arg.to_field(schema)?.1)),
}
})During higher order function type coercion, lambda arguments are ignored and only value arguments are checked and coerced so the return type for a lamba is irrelevant. Some other expressions return
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It does seem that the contract of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, done at 96d8ad2, thanks |
||
| Expr::LambdaColumn(LambdaColumn { name: _, field, .. }) => { | ||
| Ok(field.data_type().clone()) | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -347,6 +356,8 @@ impl ExprSchemable for Expr { | |
| // in projections | ||
| Ok(true) | ||
| } | ||
| Expr::Lambda(l) => l.body.nullable(input_schema), | ||
| Expr::LambdaColumn(c) => Ok(c.field.is_nullable()), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -543,6 +554,7 @@ impl ExprSchemable for Expr { | |
| .into_iter() | ||
| .map(|f| (f.data_type().clone(), f)) | ||
| .unzip(); | ||
|
|
||
| // Verify that function is invoked with correct number and type of arguments as defined in `TypeSignature` | ||
| let new_data_types = data_types_with_scalar_udf(&arg_types, func) | ||
| .map_err(|err| { | ||
|
|
@@ -573,9 +585,16 @@ impl ExprSchemable for Expr { | |
| _ => None, | ||
| }) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| let lambdas = args | ||
| .iter() | ||
| .map(|e| matches!(e, Expr::Lambda { .. })) | ||
| .collect::<Vec<_>>(); | ||
|
|
||
| let args = ReturnFieldArgs { | ||
| arg_fields: &new_fields, | ||
| scalar_arguments: &arguments, | ||
| lambdas: &lambdas, | ||
| }; | ||
|
|
||
| func.return_field_from_args(args) | ||
|
|
@@ -600,11 +619,13 @@ impl ExprSchemable for Expr { | |
| | Expr::Wildcard { .. } | ||
| | Expr::GroupingSet(_) | ||
| | Expr::Placeholder(_) | ||
| | Expr::Unnest(_) => Ok(Arc::new(Field::new( | ||
| | Expr::Unnest(_) | ||
| | Expr::Lambda(_) => Ok(Arc::new(Field::new( | ||
| &schema_name, | ||
| self.get_type(schema)?, | ||
| self.nullable(schema)?, | ||
| ))), | ||
| Expr::LambdaColumn(c) => Ok(Arc::clone(&c.field)), | ||
| }?; | ||
|
|
||
| Ok(( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will try to create PR to arrow with my utils to handle nulls and list sliced but don't wait for me