Skip to content

Commit b0349ff

Browse files
mhiltonalamb
andauthored
feat: support nanosecond date_part (#20674)
Support using 'nanosecond' as a part in the date_part function. If nanosecond is requested then the date_part will return the seconds, and smaller units, scaled to nanoseconds. This is consistent with the behaviour of 'millisecond' and 'microsecond'. In order to accomodate the required range of results, a request for 'nanosecond' will return a 64-bit integer, rather than a 32-bit integer as is returned for everything else (except 'epoch'). ## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #20671 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> The feature is a) useful & b) documented ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Yes, some added and modified SLT tests. ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> Only in as much as the implementation now matches the documentation (for this specific case). --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent d412ba5 commit b0349ff

File tree

2 files changed

+124
-17
lines changed

2 files changed

+124
-17
lines changed

datafusion/functions/src/datetime/date_part.rs

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use std::str::FromStr;
2020
use std::sync::Arc;
2121

2222
use arrow::array::timezone::Tz;
23-
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
23+
use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, Int64Array};
2424
use arrow::compute::kernels::cast_utils::IntervalUnit;
2525
use arrow::compute::{DatePart, binary, date_part};
2626
use arrow::datatypes::DataType::{
@@ -167,6 +167,9 @@ impl ScalarUDFImpl for DatePartFunc {
167167
.map(|part| {
168168
if is_epoch(part) {
169169
Field::new(self.name(), DataType::Float64, nullable)
170+
} else if is_nanosecond(part) {
171+
// See notes on [seconds_ns] for rationale
172+
Field::new(self.name(), DataType::Int64, nullable)
170173
} else {
171174
Field::new(self.name(), DataType::Int32, nullable)
172175
}
@@ -218,7 +221,7 @@ impl ScalarUDFImpl for DatePartFunc {
218221
IntervalUnit::Second => seconds_as_i32(array.as_ref(), Second)?,
219222
IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?,
220223
IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?,
221-
IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?,
224+
IntervalUnit::Nanosecond => seconds_ns(array.as_ref())?,
222225
// century and decade are not supported by `DatePart`, although they are supported in postgres
223226
_ => return exec_err!("Date part '{part}' not supported"),
224227
}
@@ -321,6 +324,12 @@ fn is_epoch(part: &str) -> bool {
321324
matches!(part.to_lowercase().as_str(), "epoch")
322325
}
323326

327+
fn is_nanosecond(part: &str) -> bool {
328+
IntervalUnit::from_str(part_normalization(part))
329+
.map(|p| matches!(p, IntervalUnit::Nanosecond))
330+
.unwrap_or(false)
331+
}
332+
324333
fn date_to_scalar(date: NaiveDate, target_type: &DataType) -> Option<ScalarValue> {
325334
Some(match target_type {
326335
Date32 => ScalarValue::Date32(Some(Date32Type::from_naive_date(date))),
@@ -517,3 +526,39 @@ fn epoch(array: &dyn Array) -> Result<ArrayRef> {
517526
};
518527
Ok(Arc::new(f))
519528
}
529+
530+
/// Invoke [`date_part`] on an `array` (e.g. Timestamp) and convert the
531+
/// result to a total number of nanoseconds as an Int64 array.
532+
///
533+
/// This returns an Int64 rather than Int32 because there 1 billion
534+
/// `nanosecond`s in each second, so representing up to 60 seconds as
535+
/// nanoseconds can be values up to 60 billion, which does not fit in Int32.
536+
fn seconds_ns(array: &dyn Array) -> Result<ArrayRef> {
537+
let secs = date_part(array, DatePart::Second)?;
538+
// This assumes array is primitive and not a dictionary
539+
let secs = as_int32_array(secs.as_ref())?;
540+
let subsecs = date_part(array, DatePart::Nanosecond)?;
541+
let subsecs = as_int32_array(subsecs.as_ref())?;
542+
543+
// Special case where there are no nulls.
544+
if subsecs.null_count() == 0 {
545+
let r: Int64Array = binary(secs, subsecs, |secs, subsecs| {
546+
(secs as i64) * 1_000_000_000 + (subsecs as i64)
547+
})?;
548+
Ok(Arc::new(r))
549+
} else {
550+
// Nulls in secs are preserved, nulls in subsecs are treated as zero to account for the case
551+
// where the number of nanoseconds overflows.
552+
let r: Int64Array = secs
553+
.iter()
554+
.zip(subsecs)
555+
.map(|(secs, subsecs)| {
556+
secs.map(|secs| {
557+
let subsecs = subsecs.unwrap_or(0);
558+
(secs as i64) * 1_000_000_000 + (subsecs as i64)
559+
})
560+
})
561+
.collect();
562+
Ok(Arc::new(r))
563+
}
564+
}

datafusion/sqllogictest/test_files/datetime/date_part.slt

Lines changed: 77 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,22 @@ SELECT date_part('microsecond', ts_nano_no_tz), date_part('microsecond', ts_nano
212212
123456 123456 123456 123000 123000 123000
213213
123456 123456 123456 123000 123000 123000
214214

215+
# date_part (nanosecond)
216+
query IIIIII
217+
SELECT date_part('nanosecond', ts_nano_no_tz), date_part('nanosecond', ts_nano_utc), date_part('nanosecond', ts_nano_eastern), date_part('nanosecond', ts_milli_no_tz), date_part('nanosecond', ts_milli_utc), date_part('nanosecond', ts_milli_eastern) FROM source_ts;
218+
----
219+
0 0 0 0 0 0
220+
0 0 0 0 0 0
221+
0 0 0 0 0 0
222+
0 0 0 0 0 0
223+
0 0 0 0 0 0
224+
0 0 0 0 0 0
225+
0 0 0 0 0 0
226+
30000000000 30000000000 30000000000 30000000000 30000000000 30000000000
227+
123000000 123000000 123000000 123000000 123000000 123000000
228+
123456000 123456000 123456000 123000000 123000000 123000000
229+
123456789 123456789 123456789 123000000 123000000 123000000
230+
215231
### Cleanup
216232
statement ok
217233
drop table source_ts;
@@ -433,6 +449,12 @@ SELECT arrow_typeof(date_part('minute', to_timestamp('2020-09-08T12:12:00+00:00'
433449
----
434450
Int32
435451

452+
# nanosecond can exceed Int32 and returns Int64
453+
query T
454+
SELECT arrow_typeof(date_part('nanosecond', to_timestamp('2020-09-08T12:12:00+00:00')))
455+
----
456+
Int64
457+
436458
query I
437459
SELECT EXTRACT(second FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
438460
----
@@ -448,8 +470,11 @@ SELECT EXTRACT(microsecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
448470
----
449471
12123456
450472

451-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
473+
# note the output is more than Int32 can store
474+
query I
452475
SELECT EXTRACT(nanosecond FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
476+
----
477+
12123456780
453478

454479
query I
455480
SELECT EXTRACT("second" FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
@@ -466,8 +491,10 @@ SELECT EXTRACT("microsecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00'
466491
----
467492
12123456
468493

469-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
494+
query I
470495
SELECT EXTRACT("nanosecond" FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
496+
----
497+
12123456780
471498

472499
query I
473500
SELECT EXTRACT('second' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
@@ -484,9 +511,10 @@ SELECT EXTRACT('microsecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00'
484511
----
485512
12123456
486513

487-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
514+
query I
488515
SELECT EXTRACT('nanosecond' FROM timestamp '2020-09-08T12:00:12.12345678+00:00')
489-
516+
----
517+
12123456780
490518

491519
# Keep precision when coercing Utf8 to Timestamp
492520
query I
@@ -504,9 +532,10 @@ SELECT date_part('microsecond', timestamp '2020-09-08T12:00:12.12345678+00:00')
504532
----
505533
12123456
506534

507-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
535+
query I
508536
SELECT date_part('nanosecond', timestamp '2020-09-08T12:00:12.12345678+00:00')
509-
537+
----
538+
12123456780
510539

511540
query I
512541
SELECT date_part('second', '2020-09-08T12:00:12.12345678+00:00')
@@ -523,8 +552,30 @@ SELECT date_part('microsecond', '2020-09-08T12:00:12.12345678+00:00')
523552
----
524553
12123456
525554

526-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
555+
query I
527556
SELECT date_part('nanosecond', '2020-09-08T12:00:12.12345678+00:00')
557+
----
558+
12123456780
559+
560+
query I
561+
SELECT EXTRACT(nanosecond FROM ts)
562+
FROM (VALUES
563+
(timestamp '2020-09-08T12:00:12.12345678+00:00'),
564+
(NULL::timestamp)
565+
) AS t(ts)
566+
----
567+
12123456780
568+
NULL
569+
570+
query I
571+
SELECT date_part('nanosecond', ts)
572+
FROM (VALUES
573+
(timestamp '2020-09-08T12:00:12.12345678+00:00'),
574+
(NULL::timestamp)
575+
) AS t(ts)
576+
----
577+
12123456780
578+
NULL
528579

529580
# test_date_part_time
530581

@@ -579,8 +630,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
579630
----
580631
50000000
581632

582-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
633+
query I
583634
SELECT extract(nanosecond from arrow_cast('23:32:50'::time, 'Time32(Second)'))
635+
----
636+
50000000000
584637

585638
query R
586639
SELECT date_part('epoch', arrow_cast('23:32:50'::time, 'Time32(Second)'))
@@ -643,8 +696,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123'::time, 'Time32(Millise
643696
----
644697
50123000
645698

646-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
699+
query I
647700
SELECT extract(nanosecond from arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)'))
701+
----
702+
50123000000
648703

649704
query R
650705
SELECT date_part('epoch', arrow_cast('23:32:50.123'::time, 'Time32(Millisecond)'))
@@ -707,8 +762,10 @@ SELECT extract(microsecond from arrow_cast('23:32:50.123456'::time, 'Time64(Micr
707762
----
708763
50123456
709764

710-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
765+
query I
711766
SELECT extract(nanosecond from arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)'))
767+
----
768+
50123456000
712769

713770
query R
714771
SELECT date_part('epoch', arrow_cast('23:32:50.123456'::time, 'Time64(Microsecond)'))
@@ -797,8 +854,10 @@ SELECT extract(us from arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond
797854
----
798855
50123456
799856

800-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
857+
query I
801858
SELECT date_part('nanosecond', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
859+
----
860+
50123456789
802861

803862
query R
804863
SELECT date_part('epoch', arrow_cast('23:32:50.123456789'::time, 'Time64(Nanosecond)'))
@@ -1172,8 +1231,11 @@ SELECT (date_part('microsecond', now()) = EXTRACT(microsecond FROM now()))
11721231
----
11731232
true
11741233

1175-
query error DataFusion error: This feature is not implemented: Date part Nanosecond not supported
1234+
query B
11761235
SELECT (date_part('nanosecond', now()) = EXTRACT(nanosecond FROM now()))
1236+
----
1237+
true
1238+
11771239

11781240
query I
11791241
SELECT date_part('ISODOW', CAST('2000-01-01' AS DATE))
@@ -1211,7 +1273,7 @@ query D
12111273
select c from t1 where extract(year from c) <> 2024;
12121274
----
12131275
1990-01-01
1214-
2030-01-01
1276+
2030-01-01
12151277

12161278
query D
12171279
select c from t1 where extract(year from c) > 2024;
@@ -1495,7 +1557,7 @@ query TT
14951557
explain select c from t1 where extract (nanosecond from c) = 2024
14961558
----
14971559
logical_plan
1498-
01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int32(2024)
1560+
01)Filter: date_part(Utf8("NANOSECOND"), t1.c) = Int64(2024)
14991561
02)--TableScan: t1 projection=[c]
15001562
physical_plan
15011563
01)FilterExec: date_part(NANOSECOND, c@0) = 2024
@@ -1582,7 +1644,7 @@ query D
15821644
select c1_date32 from t2 where extract(year from c1_date32) <> 2024;
15831645
----
15841646
1990-05-20
1585-
2030-12-31
1647+
2030-12-31
15861648

15871649
query P
15881650
select c2_ts_sec from t2 where extract(year from c2_ts_sec) > 2024;

0 commit comments

Comments
 (0)