Skip to content

Commit fcbf12f

Browse files
authored
feat: add GetTimestamp support via codegen dispatcher (#4454)
1 parent 2596861 commit fcbf12f

4 files changed

Lines changed: 145 additions & 1 deletion

File tree

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ object QueryPlanSerde extends Logging with CometExprShim with CometTypeShim {
234234
classOf[FromUnixTime] -> CometFromUnixTime,
235235
classOf[FromUTCTimestamp] -> CometFromUTCTimestamp,
236236
classOf[ToUTCTimestamp] -> CometToUTCTimestamp,
237+
classOf[GetTimestamp] -> CometGetTimestamp,
237238
classOf[LastDay] -> CometLastDay,
238239
classOf[Hour] -> CometHour,
239240
classOf[MakeDate] -> CometMakeDate,

spark/src/main/scala/org/apache/comet/serde/datetime.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ package org.apache.comet.serde
2121

2222
import java.util.Locale
2323

24-
import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year}
24+
import org.apache.spark.sql.catalyst.expressions.{AddMonths, Attribute, ConvertTimezone, DateAdd, DateDiff, DateFormatClass, DateFromUnixDate, DateSub, DayOfMonth, DayOfWeek, DayOfYear, Days, FromUTCTimestamp, GetDateField, GetTimestamp, Hour, Hours, LastDay, Literal, MakeDate, MakeTimestamp, MicrosToTimestamp, MillisToTimestamp, Minute, Month, MonthsBetween, NextDay, Quarter, Second, SecondsToTimestamp, ToUnixTimestamp, ToUTCTimestamp, TruncDate, TruncTimestamp, UnixDate, UnixMicros, UnixMillis, UnixSeconds, UnixTimestamp, WeekDay, WeekOfYear, Year}
2525
import org.apache.spark.sql.internal.SQLConf
2626
import org.apache.spark.sql.types.{DateType, DoubleType, FloatType, IntegerType, LongType, StringType, TimestampNTZType, TimestampType}
2727
import org.apache.spark.unsafe.types.UTF8String
@@ -789,3 +789,5 @@ object CometUnixMillis extends CometCodegenDispatch[UnixMillis]
789789
object CometUnixMicros extends CometCodegenDispatch[UnixMicros]
790790

791791
object CometToUnixTimestamp extends CometCodegenDispatch[ToUnixTimestamp]
792+
793+
object CometGetTimestamp extends CometCodegenDispatch[GetTimestamp]
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- Routes GetTimestamp through the codegen dispatcher.
19+
-- GetTimestamp is generated by to_timestamp(string, format), to_date(string, format)
20+
-- and try_to_timestamp(string, format).
21+
-- Config: spark.sql.session.timeZone=UTC
22+
-- Config: spark.comet.exec.scalaUDF.codegen.enabled=true
23+
24+
statement
25+
CREATE TABLE test_get_timestamp(s string) USING parquet
26+
27+
statement
28+
INSERT INTO test_get_timestamp VALUES
29+
('2024-06-15 10:30:45'),
30+
('1970-01-01 00:00:00'),
31+
('1969-12-31 23:59:59'),
32+
('2024-13-01 00:00:00'),
33+
('garbage'),
34+
(''),
35+
(NULL)
36+
37+
-- to_timestamp(string, format) -> GetTimestamp with TimestampType output
38+
query
39+
SELECT to_timestamp(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp
40+
41+
-- to_date(string, format) -> Cast(GetTimestamp(...), DateType)
42+
query
43+
SELECT to_date(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp
44+
45+
-- try_to_timestamp(string, format) -> GetTimestamp with failOnError=false
46+
query
47+
SELECT try_to_timestamp(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp
48+
49+
-- literal arguments
50+
query
51+
SELECT to_timestamp('2024-06-15 10:30:45', 'yyyy-MM-dd HH:mm:ss')
52+
53+
query
54+
SELECT to_date('2024-06-15', 'yyyy-MM-dd')
55+
56+
query
57+
SELECT try_to_timestamp('foo', 'yyyy-MM-dd')
58+
59+
query
60+
SELECT to_timestamp(NULL, 'yyyy-MM-dd HH:mm:ss')
61+
62+
-- date-only format
63+
statement
64+
CREATE TABLE test_get_timestamp_dates(s string) USING parquet
65+
66+
statement
67+
INSERT INTO test_get_timestamp_dates VALUES
68+
('2024-06-15'),
69+
('1970-01-01'),
70+
(NULL)
71+
72+
query
73+
SELECT to_timestamp(s, 'yyyy-MM-dd') FROM test_get_timestamp_dates
74+
75+
query
76+
SELECT to_date(s, 'yyyy-MM-dd') FROM test_get_timestamp_dates
77+
78+
-- column-as-format (non-literal format)
79+
statement
80+
CREATE TABLE test_get_timestamp_fmt(s string, fmt string) USING parquet
81+
82+
statement
83+
INSERT INTO test_get_timestamp_fmt VALUES
84+
('2024-06-15 10:30:45', 'yyyy-MM-dd HH:mm:ss'),
85+
('2024-06-15', 'yyyy-MM-dd'),
86+
('06/15/2024', 'MM/dd/yyyy'),
87+
(NULL, 'yyyy-MM-dd'),
88+
('2024-06-15', NULL)
89+
90+
query
91+
SELECT to_timestamp(s, fmt) FROM test_get_timestamp_fmt
92+
93+
-- to_timestamp_ntz(string, format) -> GetTimestamp with TimestampNTZType output
94+
query
95+
SELECT to_timestamp_ntz(s, 'yyyy-MM-dd HH:mm:ss') FROM test_get_timestamp
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ANSI mode: GetTimestamp throws on parse failure. The codegen dispatcher inherits
19+
-- the throw from Spark's own GetTimestamp.doGenCode. The time parser policy is pinned
20+
-- to CORRECTED so the JDK java.time formatter (and the CANNOT_PARSE_TIMESTAMP error class)
21+
-- is exercised regardless of the runtime default.
22+
-- Config: spark.sql.session.timeZone=UTC
23+
-- Config: spark.sql.ansi.enabled=true
24+
-- Config: spark.sql.legacy.timeParserPolicy=CORRECTED
25+
-- Config: spark.comet.exec.scalaUDF.codegen.enabled=true
26+
-- The CANNOT_PARSE_TIMESTAMP error class was standardized in Spark 3.5.
27+
-- MinSparkVersion: 3.5
28+
29+
query expect_error(CANNOT_PARSE_TIMESTAMP)
30+
SELECT to_timestamp('not a date', 'yyyy-MM-dd')
31+
32+
query expect_error(CANNOT_PARSE_TIMESTAMP)
33+
SELECT to_timestamp('2024-13-99', 'yyyy-MM-dd')
34+
35+
query expect_error(CANNOT_PARSE_TIMESTAMP)
36+
SELECT to_date('not a date', 'yyyy-MM-dd')
37+
38+
-- try_to_timestamp does NOT throw under ANSI mode (failOnError=false)
39+
query
40+
SELECT try_to_timestamp('not a date', 'yyyy-MM-dd')
41+
42+
-- Sentinel: confirms Comet ran the expression natively. If the dispatcher silently rejects
43+
-- GetTimestamp, the error queries above pass vacuously via Spark fallback. This valid
44+
-- query uses checkSparkAnswerAndOperator and fails if Comet did not execute it natively.
45+
query
46+
SELECT to_timestamp('2024-06-15 10:30:45', 'yyyy-MM-dd HH:mm:ss')

0 commit comments

Comments
 (0)