Skip to content

Commit 2ddec30

Browse files
committed
improve OneOf signature diagnostics
Signed-off-by: yaommen <myanstu@163.com>
1 parent bc2b36c commit 2ddec30

File tree

11 files changed

+631
-65
lines changed

11 files changed

+631
-65
lines changed

datafusion/expr/src/type_coercion/functions.rs

Lines changed: 524 additions & 39 deletions
Large diffs are not rendered by default.

datafusion/expr/src/udaf.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ use std::vec;
2626

2727
use arrow::datatypes::{DataType, Field, FieldRef};
2828

29-
use datafusion_common::{Result, ScalarValue, Statistics, exec_err, not_impl_err};
29+
use datafusion_common::{
30+
DataFusionError, Result, ScalarValue, Statistics, exec_err, not_impl_err,
31+
};
3032
use datafusion_expr_common::dyn_eq::{DynEq, DynHash};
3133
use datafusion_expr_common::operator::Operator;
3234
use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
@@ -272,6 +274,16 @@ impl AggregateUDF {
272274
self.inner.coerce_types(arg_types)
273275
}
274276

277+
/// Returns a function-specific signature failure for argument combinations
278+
/// that are better described semantically than by generic signature
279+
/// diagnostics.
280+
pub fn diagnose_failed_signature(
281+
&self,
282+
arg_types: &[DataType],
283+
) -> Option<DataFusionError> {
284+
self.inner.diagnose_failed_signature(arg_types)
285+
}
286+
275287
/// See [`AggregateUDFImpl::with_beneficial_ordering`] for more details.
276288
pub fn with_beneficial_ordering(
277289
self,
@@ -799,6 +811,16 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync + Any {
799811
not_impl_err!("Function {} does not implement coerce_types", self.name())
800812
}
801813

814+
/// Returns a semantic signature failure for argument combinations that
815+
/// should be surfaced directly instead of going through generic `OneOf`
816+
/// diagnostics.
817+
fn diagnose_failed_signature(
818+
&self,
819+
_arg_types: &[DataType],
820+
) -> Option<DataFusionError> {
821+
None
822+
}
823+
802824
/// If this function is max, return true
803825
/// If the function is min, return false
804826
/// Otherwise return None (the default)
@@ -1339,6 +1361,13 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl {
13391361
self.inner.coerce_types(arg_types)
13401362
}
13411363

1364+
fn diagnose_failed_signature(
1365+
&self,
1366+
arg_types: &[DataType],
1367+
) -> Option<DataFusionError> {
1368+
self.inner.diagnose_failed_signature(arg_types)
1369+
}
1370+
13421371
fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
13431372
self.inner.return_field(arg_fields)
13441373
}

datafusion/functions-aggregate/src/average.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ use arrow::datatypes::{
3232
DurationSecondType, Field, FieldRef, Float64Type, TimeUnit, UInt64Type, i256,
3333
};
3434
use datafusion_common::types::{NativeType, logical_float64};
35-
use datafusion_common::{Result, ScalarValue, exec_err, not_impl_err};
35+
use datafusion_common::{
36+
DataFusionError, Result, ScalarValue, exec_err, not_impl_err, plan_datafusion_err,
37+
};
3638
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
3739
use datafusion_expr::utils::format_state_name;
3840
use datafusion_expr::{
@@ -134,6 +136,18 @@ impl AggregateUDFImpl for Avg {
134136
&self.signature
135137
}
136138

139+
fn diagnose_failed_signature(
140+
&self,
141+
arg_types: &[DataType],
142+
) -> Option<DataFusionError> {
143+
match arg_types {
144+
[DataType::Boolean] => {
145+
Some(plan_datafusion_err!("Avg not supported for Boolean"))
146+
}
147+
_ => None,
148+
}
149+
}
150+
137151
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
138152
match &arg_types[0] {
139153
DataType::Decimal32(precision, scale) => {

datafusion/functions-aggregate/src/sum.rs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ use datafusion_common::types::{
3232
NativeType, logical_float64, logical_int8, logical_int16, logical_int32,
3333
logical_int64, logical_uint8, logical_uint16, logical_uint32, logical_uint64,
3434
};
35-
use datafusion_common::{HashMap, Result, ScalarValue, exec_err, not_impl_err};
35+
use datafusion_common::{
36+
DataFusionError, HashMap, Result, ScalarValue, exec_err, not_impl_err,
37+
plan_datafusion_err,
38+
};
3639
use datafusion_expr::expr::AggregateFunction;
3740
use datafusion_expr::expr_fn::cast;
3841
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
@@ -208,6 +211,18 @@ impl AggregateUDFImpl for Sum {
208211
&self.signature
209212
}
210213

214+
fn diagnose_failed_signature(
215+
&self,
216+
arg_types: &[DataType],
217+
) -> Option<DataFusionError> {
218+
match arg_types {
219+
[DataType::Boolean] => {
220+
Some(plan_datafusion_err!("Sum not supported for Boolean"))
221+
}
222+
_ => None,
223+
}
224+
}
225+
211226
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
212227
match &arg_types[0] {
213228
DataType::Int64 => Ok(DataType::Int64),

datafusion/functions-window/src/nth_value.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use datafusion_doc::window_doc_sections::DOC_SECTION_ANALYTICAL;
2828
use datafusion_expr::window_state::WindowAggState;
2929
use datafusion_expr::{
3030
Documentation, LimitEffect, Literal, PartitionEvaluator, ReversedUDWF, Signature,
31-
TypeSignature, Volatility, WindowUDFImpl,
31+
Volatility, WindowUDFImpl,
3232
};
3333
use datafusion_functions_window_common::field;
3434
use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
@@ -96,17 +96,14 @@ pub struct NthValue {
9696
impl NthValue {
9797
/// Create a new `nth_value` function
9898
pub fn new(kind: NthValueKind) -> Self {
99-
Self {
100-
signature: Signature::one_of(
101-
vec![
102-
TypeSignature::Nullary,
103-
TypeSignature::Any(1),
104-
TypeSignature::Any(2),
105-
],
106-
Volatility::Immutable,
107-
),
108-
kind,
109-
}
99+
let signature = match kind {
100+
NthValueKind::First | NthValueKind::Last => {
101+
Signature::any(1, Volatility::Immutable)
102+
}
103+
NthValueKind::Nth => Signature::any(2, Volatility::Immutable),
104+
};
105+
106+
Self { signature, kind }
110107
}
111108

112109
pub fn first() -> Self {

datafusion/sqllogictest/test_files/array.slt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8399,17 +8399,17 @@ select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond,
83998399
[2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00]
84008400

84018401
## mixing types for timestamps is not supported
8402-
query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
8402+
query error DataFusion error: Error during planning: Function 'generate_series' failed to match any signature
84038403
select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR);
84048404

84058405
## mixing types not allowed even if an argument is null
8406-
query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
8406+
query error DataFusion error: Error during planning: Function 'generate_series' failed to match any signature
84078407
select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL);
84088408

8409-
query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
8409+
query error DataFusion error: Error during planning: Function 'generate_series' failed to match any signature
84108410
select generate_series(1, '2024-01-01', '2025-01-02');
84118411

8412-
query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
8412+
query error DataFusion error: Error during planning: Function 'generate_series' failed to match any signature
84138413
select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 day');
84148414

84158415
## should return NULL

datafusion/sqllogictest/test_files/errors.slt

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,39 @@ from aggregate_test_100
125125
order by c9
126126

127127
# WindowFunction wrong signature
128-
statement error DataFusion error: Error during planning: Internal error: Function 'nth_value' failed to match any signature
128+
statement error DataFusion error: Error during planning: The function 'nth_value' expected 2 arguments but received 3
129129
select
130130
c9,
131131
nth_value(c5, 2, 3) over (order by c9) as nv1
132132
from aggregate_test_100
133133
order by c9
134134

135+
statement error DataFusion error: Error during planning: The function 'nth_value' expected 2 arguments but received 1
136+
select
137+
c9,
138+
nth_value(c5) over (order by c9) as nv2
139+
from aggregate_test_100
140+
order by c9
141+
142+
statement error DataFusion error: Error during planning: The function 'first_value' expected 1 arguments but received 2
143+
select
144+
c9,
145+
first_value(c5, 2) over (order by c9) as fv1
146+
from aggregate_test_100
147+
order by c9
148+
149+
statement error DataFusion error: Error during planning: 'nth_value' does not support zero arguments
150+
select nth_value() over (order by x) from (values (1), (2)) as t(x)
151+
152+
statement error DataFusion error: Error during planning: 'first_value' does not support zero arguments
153+
select first_value() over (order by x) from (values (1), (2)) as t(x)
154+
155+
query error DataFusion error: Error during planning: Sum not supported for Boolean
156+
select sum(bool_col) from (values (true), (false), (null)) as t(bool_col);
157+
158+
query error DataFusion error: Error during planning: Avg not supported for Boolean
159+
select avg(bool_col) from (values (true), (false), (null)) as t(bool_col);
160+
135161

136162
# nth_value with wrong name
137163
statement error DataFusion error: Error during planning: Invalid function 'nth_vlue'.\nDid you mean 'nth_value'?

datafusion/sqllogictest/test_files/functions.slt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,10 @@ SELECT substr('alphabet', NULL, 2)
208208
----
209209
NULL
210210

211-
statement error Function 'substr' failed to match any signature
211+
statement error DataFusion error: Error during planning: Function 'substr' requires String, but received Int64
212212
SELECT substr(1, 3)
213213

214-
statement error Function 'substr' failed to match any signature
214+
statement error DataFusion error: Error during planning: Function 'substr' requires String, but received Int64
215215
SELECT substr(1, 3, 4)
216216

217217
query T

datafusion/sqllogictest/test_files/named_arguments.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ SELECT substr("STR" => 'hello world', "start_pos" => 7);
8686

8787
# Error: wrong number of arguments
8888
# This query provides only 1 argument but substr requires 2 or 3
89-
query error Function 'substr' failed to match any signature
89+
query error DataFusion error: Error during planning: Function 'substr' expects 2 to 3 arguments but received 1
9090
SELECT substr(str => 'hello world');
9191

9292
#############

datafusion/sqllogictest/test_files/spark/math/hex.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ SELECT hex(column1) FROM VALUES (arrow_cast('hello', 'LargeBinary')), (NULL), (a
5656
NULL
5757
776F726C64
5858

59-
statement error Function 'hex' expects 1 arguments but received 2
59+
statement error DataFusion error: Error during planning: Function 'hex' expects 1 arguments but received 2
6060
SELECT hex(1, 2);
6161

6262
query T

0 commit comments

Comments
 (0)