Skip to content

Commit f131ec9

Browse files
committed
Merge branch 'simplify_coerce_values_for_lambdas_usage' into test-semver
2 parents ef8409c + fd8dc6a commit f131ec9

2 files changed

Lines changed: 28 additions & 46 deletions

File tree

datafusion/expr/src/higher_order_function.rs

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,6 @@ pub struct HigherOrderSignature {
8787
pub type_signature: HigherOrderTypeSignature,
8888
/// The volatility of the function. See [Volatility] for more information.
8989
pub volatility: Volatility,
90-
/// Whether [HigherOrderUDF::coerce_values_for_lambdas] should be called
91-
pub coerce_values_for_lambdas: bool,
9290
/// The max number of times to call [HigherOrderUDF::lambda_parameters] before raising an error.
9391
/// Used to guard against implementations that causes an infinite loop by endlessly returning
9492
/// [LambdaParametersProgress::Partial]. Defaults to 256
@@ -103,7 +101,6 @@ impl HigherOrderSignature {
103101
HigherOrderSignature {
104102
type_signature,
105103
volatility,
106-
coerce_values_for_lambdas: false,
107104
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
108105
}
109106
}
@@ -113,7 +110,6 @@ impl HigherOrderSignature {
113110
Self {
114111
type_signature: HigherOrderTypeSignature::UserDefined,
115112
volatility,
116-
coerce_values_for_lambdas: false,
117113
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
118114
}
119115
}
@@ -123,7 +119,6 @@ impl HigherOrderSignature {
123119
Self {
124120
type_signature: HigherOrderTypeSignature::VariadicAny,
125121
volatility,
126-
coerce_values_for_lambdas: false,
127122
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
128123
}
129124
}
@@ -133,18 +128,9 @@ impl HigherOrderSignature {
133128
Self {
134129
type_signature: HigherOrderTypeSignature::Any(arg_count),
135130
volatility,
136-
coerce_values_for_lambdas: false,
137131
lambda_parameters_max_iterations: LAMBDA_PARAMETERS_MAX_ITERATIONS,
138132
}
139133
}
140-
141-
/// Set [Self::coerce_values_for_lambdas] to true to indicate that [HigherOrderUDF::coerce_values_for_lambdas]
142-
/// should be called
143-
pub fn with_coerce_values_for_lambdas(mut self) -> Self {
144-
self.coerce_values_for_lambdas = true;
145-
146-
self
147-
}
148134
}
149135

150136
impl PartialEq for dyn HigherOrderUDF {
@@ -621,12 +607,12 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
621607
///
622608
/// assert_eq!(
623609
/// coerce_to,
624-
/// vec![
610+
/// Some(vec![
625611
/// // return the same type for the array being reduced
626612
/// DataType::new_list(DataType::Float32, true),
627613
/// // coerce the initial value to the output of the merge lambda
628614
/// DataType::Float32,
629-
/// ]
615+
/// ])
630616
/// );
631617
///
632618
/// ```
@@ -636,7 +622,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
636622
///
637623
/// The implementation can assume that some other part of the code has coerced
638624
/// the actual argument types to match [`Self::signature`], except the coercion defined by
639-
/// [Self::coerce_values_for_lambdas], if applicable.
625+
/// [Self::coerce_values_for_lambdas].
640626
///
641627
/// [`HigherOrderFunction`]: crate::expr::HigherOrderFunction
642628
/// [`HigherOrderFunction::lambda_parameters`]: crate::expr::HigherOrderFunction::lambda_parameters
@@ -649,8 +635,7 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
649635
/// Coerce value arguments of a function call to types that the function can evaluate also taking into
650636
/// account the *output type of it's lambdas*. This differs from [HigherOrderUDF::coerce_value_types]
651637
/// that only has access to the type of it's value arguments because it's called before the output type
652-
/// of lambdas are known. So that this method is called, the function must have it's
653-
/// [HigherOrderSignature::coerce_values_for_lambdas] set to true
638+
/// of lambdas are known.
654639
///
655640
/// See the [type coercion module](crate::type_coercion)
656641
/// documentation for more details on type coercion
@@ -659,29 +644,27 @@ pub trait HigherOrderUDF: Debug + DynEq + DynHash + Send + Sync + Any {
659644
/// * `fields`: The argument types of the value arguments of this function, or the output type of lambdas
660645
///
661646
/// # Return value
662-
/// A Vec with the same number of [ValueOrLambda::Value] in `fields`. DataFusion will `CAST` the
663-
/// function call arguments to these specific types.
647+
/// If `Some`, contains a Vec with the same number of [ValueOrLambda::Value] in `fields`.
648+
/// DataFusion will `CAST` the function call arguments to these specific types. If `None`, no
649+
/// coercion will be applied beyond the one defined by the function signature.
664650
///
665651
/// For example, a flexible array_reduce implementation (see [Self::lambda_parameters] docs), when working
666652
/// with the expression below, may want to coerce it's initial value argument, the *integer* `0`,
667-
/// to match the output it's merge function, which is a *float*:
653+
/// to match the output of it's merge function, which is a *float*:
668654
///
669655
/// `array_reduce([1.2, 2.1], 0, (acc, v) -> acc + v + 1.5, v -> v > 2.0)`
670656
fn coerce_values_for_lambdas(
671657
&self,
672658
_fields: &[ValueOrLambda<DataType, DataType>],
673-
) -> Result<Vec<DataType>> {
674-
not_impl_err!(
675-
"{} coerce_values_for_lambdas is not implemented",
676-
self.name()
677-
)
659+
) -> Result<Option<Vec<DataType>>> {
660+
Ok(None)
678661
}
679662

680663
/// What type will be returned by this function, given the arguments?
681664
///
682665
/// The implementation can assume that some other part of the code has coerced
683666
/// the actual argument types to match [`Self::signature`], including the coercion
684-
/// defined by [Self::coerce_values_for_lambdas], if applicable.
667+
/// defined by [Self::coerce_values_for_lambdas].
685668
///
686669
/// # Example creating `Field`
687670
///

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,9 @@ pub fn fields_with_udf<F: UDFCoercionExt>(
158158
/// argument must be coerced to match `signature`.
159159
/// For lambda arguments, returns a clone of the associated data
160160
///
161-
/// Note this does not invokes [HigherOrderUDF::coerce_values_for_lambdas]
162-
/// if requested by the function signature. If that's required, use
163-
/// [value_fields_with_higher_order_udf_and_lambdas] instead
161+
/// Note this does not invokes [HigherOrderUDF::coerce_values_for_lambdas].
162+
/// If that's required, use [value_fields_with_higher_order_udf_and_lambdas]
163+
/// instead
164164
///
165165
/// For more details on coercion in general, please see the
166166
/// [`type_coercion`](crate::type_coercion) module.
@@ -235,8 +235,8 @@ pub fn value_fields_with_higher_order_udf<L: Clone>(
235235

236236
/// Performs type coercion for higher order function arguments,
237237
/// including those defined by [HigherOrderUDF::coerce_values_for_lambdas],
238-
/// if defined by the signature. Note that compared to
239-
/// [value_fields_with_higher_order_udf], this function requires
238+
/// if it returns `Some(...)` instead of the default `None`. Note that
239+
/// compared to [value_fields_with_higher_order_udf], this function requires
240240
/// the [ValueOrLambda::Lambda] variant to contain the output field of the lambda.
241241
///
242242
/// For value arguments, returns the field to which each
@@ -251,16 +251,16 @@ pub fn value_fields_with_higher_order_udf_and_lambdas(
251251
) -> Result<Vec<ValueOrLambda<FieldRef, FieldRef>>> {
252252
let mut new_fields = value_fields_with_higher_order_udf(current_fields, func)?;
253253

254-
if func.signature().coerce_values_for_lambdas {
255-
let new_types = new_fields
256-
.iter()
257-
.map(|f| match f {
258-
ValueOrLambda::Value(f) => ValueOrLambda::Value(f.data_type().clone()),
259-
ValueOrLambda::Lambda(f) => ValueOrLambda::Lambda(f.data_type().clone()),
260-
})
261-
.collect::<Vec<_>>();
254+
let new_types = new_fields
255+
.iter()
256+
.map(|f| match f {
257+
ValueOrLambda::Value(f) => ValueOrLambda::Value(f.data_type().clone()),
258+
ValueOrLambda::Lambda(f) => ValueOrLambda::Lambda(f.data_type().clone()),
259+
})
260+
.collect::<Vec<_>>();
262261

263-
let mut new_value_types = func.coerce_values_for_lambdas(&new_types)?.into_iter();
262+
if let Some(new_value_types) = func.coerce_values_for_lambdas(&new_types)? {
263+
let mut new_value_types = new_value_types.into_iter();
264264

265265
let value_types_count = new_types
266266
.iter()
@@ -1851,7 +1851,7 @@ mod tests {
18511851
fn coerce_values_for_lambdas(
18521852
&self,
18531853
fields: &[ValueOrLambda<DataType, DataType>],
1854-
) -> Result<Vec<DataType>> {
1854+
) -> Result<Option<Vec<DataType>>> {
18551855
// thoerical impl of array_reduce without finish
18561856
let [
18571857
ValueOrLambda::Value(list),
@@ -1862,7 +1862,7 @@ mod tests {
18621862
unreachable!()
18631863
};
18641864

1865-
Ok(vec![list.clone(), merge.clone()])
1865+
Ok(Some(vec![list.clone(), merge.clone()]))
18661866
}
18671867

18681868
fn lambda_parameters(
@@ -1925,8 +1925,7 @@ mod tests {
19251925
#[test]
19261926
fn test_higher_order_function_coerce_values_for_lambdas() {
19271927
let fun = MockHigherOrderUDF {
1928-
signature: HigherOrderSignature::variadic_any(Volatility::Immutable)
1929-
.with_coerce_values_for_lambdas(),
1928+
signature: HigherOrderSignature::variadic_any(Volatility::Immutable),
19301929
coerced_value_types: vec![],
19311930
};
19321931

0 commit comments

Comments
 (0)