Skip to content

Commit 7d6cd13

Browse files
tshauckalamb
andauthored
fix: throw error on sub-day generate_series increments (#11907)
* fix: throw error on sub-day generate_series increments * refactor: avoid `loop` * Add a few more tests * Update datafusion/functions-nested/src/range.rs Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> * refactor: tweak from feedback * fix: fix dup rows --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 64a9280 commit 7d6cd13

2 files changed

Lines changed: 61 additions & 33 deletions

File tree

datafusion/functions-nested/src/range.rs

Lines changed: 46 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818
//! [`ScalarUDFImpl`] definitions for range and gen_series functions.
1919
2020
use crate::utils::make_scalar_function;
21-
use arrow::array::{Array, ArrayRef, Int64Array, ListArray};
21+
use arrow::array::{Array, ArrayRef, Date32Builder, Int64Array, ListArray, ListBuilder};
2222
use arrow::datatypes::{DataType, Field};
2323
use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
24-
use arrow_array::{Date32Array, NullArray};
25-
use arrow_buffer::{
26-
BooleanBufferBuilder, IntervalMonthDayNano, NullBuffer, OffsetBuffer,
27-
};
24+
use arrow_array::NullArray;
25+
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
2826
use arrow_schema::DataType::{Date32, Int64, Interval, List};
2927
use arrow_schema::IntervalUnit::MonthDayNano;
3028
use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array};
@@ -33,6 +31,7 @@ use datafusion_expr::{
3331
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
3432
};
3533
use std::any::Any;
34+
use std::iter::from_fn;
3635
use std::sync::Arc;
3736

3837
make_udf_expr_and_func!(
@@ -166,8 +165,11 @@ impl ScalarUDFImpl for GenSeries {
166165
match args[0].data_type() {
167166
Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args),
168167
Date32 => make_scalar_function(|args| gen_range_date(args, true))(args),
169-
_ => {
170-
exec_err!("unsupported type for range")
168+
dt => {
169+
exec_err!(
170+
"unsupported type for range. Expected Int64 or Date32, got: {}",
171+
dt
172+
)
171173
}
172174
}
173175
}
@@ -311,39 +313,53 @@ fn gen_range_date(args: &[ArrayRef], include_upper: bool) -> Result<ArrayRef> {
311313
Some(as_interval_mdn_array(&args[2])?),
312314
);
313315

314-
let mut values = vec![];
315-
let mut offsets = vec![0];
316+
// values are date32s
317+
let values_builder = Date32Builder::new();
318+
let mut list_builder = ListBuilder::new(values_builder);
319+
316320
for (idx, stop) in stop_array.iter().enumerate() {
317321
let mut stop = stop.unwrap_or(0);
318-
let start = start_array.as_ref().map(|x| x.value(idx)).unwrap_or(0);
319-
let step = step_array.as_ref().map(|arr| arr.value(idx)).unwrap_or(
320-
IntervalMonthDayNano {
321-
months: 0,
322-
days: 0,
323-
nanoseconds: 1,
324-
},
325-
);
322+
323+
let start = if let Some(start_array_values) = start_array {
324+
start_array_values.value(idx)
325+
} else {
326+
list_builder.append_null();
327+
continue;
328+
};
329+
330+
let step = if let Some(step) = step_array {
331+
step.value(idx)
332+
} else {
333+
list_builder.append_null();
334+
continue;
335+
};
336+
326337
let (months, days, _) = IntervalMonthDayNanoType::to_parts(step);
338+
339+
if months == 0 && days == 0 {
340+
return exec_err!("Cannot generate date range less than 1 day.");
341+
}
342+
327343
let neg = months < 0 || days < 0;
328344
if !include_upper {
329345
stop = Date32Type::subtract_month_day_nano(stop, step);
330346
}
331347
let mut new_date = start;
332-
loop {
333-
if neg && new_date < stop || !neg && new_date > stop {
334-
break;
348+
349+
let values = from_fn(|| {
350+
if (neg && new_date < stop) || (!neg && new_date > stop) {
351+
None
352+
} else {
353+
let current_date = new_date;
354+
new_date = Date32Type::add_month_day_nano(new_date, step);
355+
Some(Some(current_date))
335356
}
336-
values.push(new_date);
337-
new_date = Date32Type::add_month_day_nano(new_date, step);
338-
}
339-
offsets.push(values.len() as i32);
357+
});
358+
359+
list_builder.append_value(values);
340360
}
341361

342-
let arr = Arc::new(ListArray::try_new(
343-
Arc::new(Field::new("item", Date32, true)),
344-
OffsetBuffer::new(offsets.into()),
345-
Arc::new(Date32Array::from(values)),
346-
None,
347-
)?);
362+
let arr = Arc::new(list_builder.finish());
363+
348364
Ok(arr)
349365
}

datafusion/sqllogictest/test_files/array.slt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1971,7 +1971,7 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), co
19711971
# `from` may be larger than `to` and `stride` is positive
19721972
query ????
19731973
select array_slice(a, -1, 2, 1), array_slice(a, -1, 2),
1974-
array_slice(a, 3, 2, 1), array_slice(a, 3, 2)
1974+
array_slice(a, 3, 2, 1), array_slice(a, 3, 2)
19751975
from (values ([1.0, 2.0, 3.0, 3.0]), ([4.0, 5.0, 3.0]), ([6.0])) t(a);
19761976
----
19771977
[] [] [] []
@@ -5711,7 +5711,7 @@ select
57115711

57125712
# Test range for other edge cases
57135713
query ????????
5714-
select
5714+
select
57155715
range(9223372036854775807, 9223372036854775807, -1) as c1,
57165716
range(9223372036854775807, 9223372036854775806, -1) as c2,
57175717
range(9223372036854775807, 9223372036854775807, 1) as c3,
@@ -5787,6 +5787,9 @@ select range(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR)
57875787
----
57885788
[]
57895789

5790+
query error DataFusion error: Execution error: Cannot generate date range less than 1 day\.
5791+
select range(DATE '1993-03-01', DATE '1993-03-01', INTERVAL '1' HOUR)
5792+
57905793
query ?????????
57915794
select generate_series(5),
57925795
generate_series(2, 5),
@@ -5801,6 +5804,9 @@ select generate_series(5),
58015804
----
58025805
[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
58035806

5807+
query error DataFusion error: Execution error: unsupported type for range. Expected Int64 or Date32, got: Timestamp\(Nanosecond, None\)
5808+
select generate_series('2021-01-01'::timestamp, '2021-01-02'::timestamp, INTERVAL '1' HOUR);
5809+
58045810
## should return NULL
58055811
query ?
58065812
select generate_series(DATE '1992-09-01', NULL, INTERVAL '1' YEAR);
@@ -5832,6 +5838,12 @@ select generate_series(DATE '1993-03-01', DATE '1989-04-01', INTERVAL '1' YEAR)
58325838
----
58335839
[]
58345840

5841+
query error DataFusion error: Execution error: Cannot generate date range less than 1 day.
5842+
select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '1' HOUR)
5843+
5844+
query error DataFusion error: Execution error: Cannot generate date range less than 1 day.
5845+
select generate_series(DATE '2000-01-01', DATE '2000-01-03', INTERVAL '-1' HOUR)
5846+
58355847
# Test generate_series with zero step
58365848
query error DataFusion error: Execution error: step can't be 0 for function generate_series\(start \[, stop, step\]\)
58375849
select generate_series(1, 1, 0);
@@ -5849,7 +5861,7 @@ select
58495861

58505862
# Test generate_series for other edge cases
58515863
query ????
5852-
select
5864+
select
58535865
generate_series(9223372036854775807, 9223372036854775807, -1) as c1,
58545866
generate_series(9223372036854775807, 9223372036854775807, 1) as c2,
58555867
generate_series(-9223372036854775808, -9223372036854775808, -1) as c3,

0 commit comments

Comments
 (0)