Skip to content

Commit 4ccb228

Browse files
committed
Fixes for TimeType
1 parent 965a642 commit 4ccb228

10 files changed

Lines changed: 95 additions & 54 deletions

File tree

common/src/main/scala/org/apache/spark/sql/comet/util/Utils.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ object Utils extends CometTypeShim with Logging {
108108
case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH =>
109109
YearMonthIntervalType()
110110
case di: ArrowType.Interval if di.getUnit == IntervalUnit.DAY_TIME => DayTimeIntervalType()
111+
case t: ArrowType.Time if t.getUnit == TimeUnit.NANOSECOND && t.getBitWidth == 64 =>
112+
// scalastyle:off classforname
113+
val clazz = Class.forName("org.apache.spark.sql.types.TimeType$")
114+
// scalastyle:on classforname
115+
val module = clazz.getField("MODULE$").get(null)
116+
clazz.getMethod("apply").invoke(module).asInstanceOf[DataType]
111117
case _ => throw new UnsupportedOperationException(s"Unsupported data type: ${dt.toString}")
112118
}
113119

@@ -142,6 +148,8 @@ object Utils extends CometTypeShim with Logging {
142148
}
143149
case TimestampNTZType =>
144150
new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
151+
case dt if dt.getClass.getSimpleName.startsWith("TimeType") =>
152+
new ArrowType.Time(TimeUnit.NANOSECOND, 64)
145153
case _ =>
146154
throw new UnsupportedOperationException(
147155
s"Unsupported data type: [${dt.getClass.getName}] ${dt.catalogString}")

docs/source/user-guide/latest/expressions.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ of expressions that be disabled.
120120
| DayOfYear | `dayofyear` |
121121
| WeekOfYear | `weekofyear` |
122122
| Quarter | `quarter` |
123+
| MakeDate | `make_date` |
124+
| MakeTime | `make_time` |
125+
| ToTime | `to_time` |
126+
| TryToTime | `try_to_time` |
123127

124128
## Math Expressions
125129

native/core/src/execution/columnar_to_row.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,10 @@ impl<'a> TypedArray<'a> {
201201
DataType::Timestamp(TimeUnit::Microsecond, _) => Ok(TypedArray::TimestampMicro(
202202
downcast_array!(array, TimestampMicrosecondArray)?,
203203
)),
204-
DataType::Time64(TimeUnit::Nanosecond) => Ok(TypedArray::Time64Nano(
205-
downcast_array!(array, Time64NanosecondArray)?,
206-
)),
204+
DataType::Time64(TimeUnit::Nanosecond) => Ok(TypedArray::Time64Nano(downcast_array!(
205+
array,
206+
Time64NanosecondArray
207+
)?)),
207208
DataType::Decimal128(p, _) => Ok(TypedArray::Decimal128(
208209
downcast_array!(array, Decimal128Array)?,
209210
*p,

native/core/src/execution/planner.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,9 @@ impl PhysicalPlanner {
344344
DataType::Map(f, s) => DataType::Map(f, s).try_into()?,
345345
DataType::List(f) => DataType::List(f).try_into()?,
346346
DataType::Null => ScalarValue::Null,
347+
DataType::Time64(TimeUnit::Nanosecond) => {
348+
ScalarValue::Time64Nanosecond(None)
349+
}
347350
dt => {
348351
return Err(GeneralError(format!("{dt:?} is not supported in Comet")))
349352
}

native/spark-expr/src/comet_scalar_funcs.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ use crate::{
2626
spark_lpad, spark_make_decimal, spark_read_side_padding, spark_round, spark_rpad,
2727
spark_to_time, spark_unhex, spark_unscaled_value, EvalMode, SparkArrayCompact,
2828
SparkArrayPositionFunc, SparkArraysOverlap, SparkContains, SparkDateDiff,
29-
SparkDateFromUnixDate, SparkDateTrunc, SparkMakeDate, SparkMakeTime,
30-
SparkSecondsToTimestamp, SparkSizeFunc,
29+
SparkDateFromUnixDate, SparkDateTrunc, SparkMakeDate, SparkMakeTime, SparkSecondsToTimestamp,
30+
SparkSizeFunc,
3131
};
3232
use arrow::datatypes::DataType;
3333
use datafusion::common::{DataFusionError, Result as DataFusionResult};

native/spark-expr/src/datetime_funcs/make_time.rs

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,10 @@ fn make_time(hours: i32, minutes: i32, secs_and_micros_unscaled: i128) -> Result
8080
)));
8181
}
8282

83-
let total_nanos =
84-
hours as i64 * 3_600 * NANOS_PER_SECOND + minutes as i64 * 60 * NANOS_PER_SECOND + secs as i64 * NANOS_PER_SECOND + nanos;
83+
let total_nanos = hours as i64 * 3_600 * NANOS_PER_SECOND
84+
+ minutes as i64 * 60 * NANOS_PER_SECOND
85+
+ secs as i64 * NANOS_PER_SECOND
86+
+ nanos;
8587

8688
Ok(total_nanos)
8789
}
@@ -121,19 +123,19 @@ impl ScalarUDFImpl for SparkMakeTime {
121123
let hours_arr = cast_to_int32(&hours_arr)?;
122124
let minutes_arr = cast_to_int32(&minutes_arr)?;
123125

124-
let hours_array = hours_arr.as_any().downcast_ref::<Int32Array>().ok_or_else(|| {
125-
DataFusionError::Execution("make_time: failed to cast hours to Int32".to_string())
126-
})?;
126+
let hours_array = hours_arr
127+
.as_any()
128+
.downcast_ref::<Int32Array>()
129+
.ok_or_else(|| {
130+
DataFusionError::Execution("make_time: failed to cast hours to Int32".to_string())
131+
})?;
127132

128-
let minutes_array =
129-
minutes_arr
130-
.as_any()
131-
.downcast_ref::<Int32Array>()
132-
.ok_or_else(|| {
133-
DataFusionError::Execution(
134-
"make_time: failed to cast minutes to Int32".to_string(),
135-
)
136-
})?;
133+
let minutes_array = minutes_arr
134+
.as_any()
135+
.downcast_ref::<Int32Array>()
136+
.ok_or_else(|| {
137+
DataFusionError::Execution("make_time: failed to cast minutes to Int32".to_string())
138+
})?;
137139

138140
let secs_array = secs_arr
139141
.as_any()
@@ -190,10 +192,7 @@ mod tests {
190192
// 1.5 seconds (unscaled: 1_500_000)
191193
assert_eq!(make_time(0, 0, 1_500_000).unwrap(), 1_500_000_000);
192194
// 23:59:59.999999 (unscaled: 59_999_999)
193-
assert_eq!(
194-
make_time(23, 59, 59_999_999).unwrap(),
195-
86_399_999_999_000
196-
);
195+
assert_eq!(make_time(23, 59, 59_999_999).unwrap(), 86_399_999_999_000);
197196
// 12:30:45.123456 (unscaled: 45_123_456)
198197
assert_eq!(
199198
make_time(12, 30, 45_123_456).unwrap(),

native/spark-expr/src/datetime_funcs/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ mod hours;
2323
mod make_date;
2424
mod make_time;
2525
mod seconds_to_timestamp;
26-
mod to_time;
2726
mod timestamp_trunc;
27+
mod to_time;
2828
mod unix_timestamp;
2929

3030
pub use date_diff::SparkDateDiff;
@@ -37,6 +37,6 @@ pub use hours::SparkHoursTransform;
3737
pub use make_date::SparkMakeDate;
3838
pub use make_time::SparkMakeTime;
3939
pub use seconds_to_timestamp::SparkSecondsToTimestamp;
40-
pub use to_time::{spark_to_time, to_time_return_type};
4140
pub use timestamp_trunc::TimestampTruncExpr;
41+
pub use to_time::{spark_to_time, to_time_return_type};
4242
pub use unix_timestamp::SparkUnixTimestamp;

native/spark-expr/src/datetime_funcs/to_time.rs

Lines changed: 45 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,12 @@ pub fn spark_to_time(args: &[ColumnarValue], fail_on_error: bool) -> Result<Colu
4545
.unwrap_or(1);
4646

4747
let str_arr = args[0].clone().into_array(num_rows)?;
48-
let str_array = str_arr.as_any().downcast_ref::<StringArray>().ok_or_else(|| {
49-
DataFusionError::Execution("to_time: expected String argument".to_string())
50-
})?;
48+
let str_array = str_arr
49+
.as_any()
50+
.downcast_ref::<StringArray>()
51+
.ok_or_else(|| {
52+
DataFusionError::Execution("to_time: expected String argument".to_string())
53+
})?;
5154

5255
let len = str_array.len();
5356
let mut builder = Time64NanosecondArray::builder(len);
@@ -120,13 +123,21 @@ fn string_to_time(s: &str) -> Option<i64> {
120123
}
121124
hour
122125
} else {
123-
if hour < 1 || hour > 12 {
126+
if !(1..=12).contains(&hour) {
124127
return None;
125128
}
126129
if is_am {
127-
if hour == 12 { 0 } else { hour }
130+
if hour == 12 {
131+
0
132+
} else {
133+
hour
134+
}
128135
} else if is_pm {
129-
if hour == 12 { 12 } else { hour + 12 }
136+
if hour == 12 {
137+
12
138+
} else {
139+
hour + 12
140+
}
130141
} else {
131142
return None;
132143
}
@@ -148,7 +159,7 @@ fn string_to_time(s: &str) -> Option<i64> {
148159
/// Parse time components from a string like "HH:mm:ss.ffffff" or "T HH:mm:ss".
149160
/// Returns (hour, minute, second, microseconds) or None if invalid.
150161
fn parse_time_components(s: &str) -> Option<(i32, i32, i32, i32)> {
151-
let bytes = s.trim_start().as_bytes();
162+
let bytes = s.as_bytes();
152163
if bytes.is_empty() {
153164
return None;
154165
}
@@ -223,7 +234,7 @@ fn parse_digits(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
223234

224235
while pos < bytes.len() {
225236
let b = bytes[pos];
226-
if b >= b'0' && b <= b'9' {
237+
if b.is_ascii_digit() {
227238
value = value * 10 + (b - b'0') as i32;
228239
count += 1;
229240
pos += 1;
@@ -233,11 +244,7 @@ fn parse_digits(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
233244
}
234245

235246
if count == 0 || count > 2 {
236-
// Hour/minute/second: 1-2 digits
237-
// Exception: we allow 1-2 digits for time components
238-
if count == 0 {
239-
return None;
240-
}
247+
return None;
241248
}
242249

243250
Some((value, pos))
@@ -252,7 +259,7 @@ fn parse_fractional(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
252259

253260
while pos < bytes.len() && count < 6 {
254261
let b = bytes[pos];
255-
if b >= b'0' && b <= b'9' {
262+
if b.is_ascii_digit() {
256263
value = value * 10 + (b - b'0') as i32;
257264
count += 1;
258265
pos += 1;
@@ -266,7 +273,7 @@ fn parse_fractional(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
266273
}
267274

268275
// Skip any remaining digits beyond 6 (truncation)
269-
while pos < bytes.len() && bytes[pos] >= b'0' && bytes[pos] <= b'9' {
276+
while pos < bytes.len() && bytes[pos].is_ascii_digit() {
270277
pos += 1;
271278
}
272279

@@ -292,8 +299,14 @@ mod tests {
292299
fn test_basic_time_parsing() {
293300
// HH:mm
294301
assert_eq!(string_to_time("00:00"), Some(0));
295-
assert_eq!(string_to_time("12:30"), Some(12 * NANOS_PER_HOUR + 30 * NANOS_PER_MINUTE));
296-
assert_eq!(string_to_time("23:59"), Some(23 * NANOS_PER_HOUR + 59 * NANOS_PER_MINUTE));
302+
assert_eq!(
303+
string_to_time("12:30"),
304+
Some(12 * NANOS_PER_HOUR + 30 * NANOS_PER_MINUTE)
305+
);
306+
assert_eq!(
307+
string_to_time("23:59"),
308+
Some(23 * NANOS_PER_HOUR + 59 * NANOS_PER_MINUTE)
309+
);
297310

298311
// HH:mm:ss
299312
assert_eq!(
@@ -320,10 +333,7 @@ mod tests {
320333
Some(1_000 * NANOS_PER_MICRO)
321334
);
322335
// 6 digits
323-
assert_eq!(
324-
string_to_time("00:00:00.000001"),
325-
Some(1 * NANOS_PER_MICRO)
326-
);
336+
assert_eq!(string_to_time("00:00:00.000001"), Some(1 * NANOS_PER_MICRO));
327337
// Full precision
328338
assert_eq!(
329339
string_to_time("23:59:59.999999"),
@@ -439,4 +449,18 @@ mod tests {
439449
assert_eq!(string_to_time("12:30:45 "), string_to_time("12:30:45"));
440450
assert_eq!(string_to_time("1:00:00 AM "), string_to_time("1:00:00 AM"));
441451
}
452+
453+
#[test]
454+
fn test_three_digit_components() {
455+
// 3-digit hour/minute/second must be rejected (Spark requires 1-2 digits)
456+
assert_eq!(string_to_time("001:02:03"), None);
457+
assert_eq!(string_to_time("12:001:03"), None);
458+
assert_eq!(string_to_time("12:02:003"), None);
459+
}
460+
461+
#[test]
462+
fn test_leading_space_with_t_prefix() {
463+
// Leading space before T should be rejected (Spark only right-trims)
464+
assert_eq!(string_to_time(" T12:30"), None);
465+
}
442466
}

spark/src/test/resources/sql-tests/expressions/datetime/make_time.sql

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
-- under the License.
1717

1818
-- MinSparkVersion: 4.1
19+
-- Config: spark.sql.timeType.enabled=true
1920

2021
statement
2122
CREATE TABLE test_make_time(hours int, minutes int, secs decimal(16,6)) USING parquet
@@ -32,16 +33,16 @@ INSERT INTO test_make_time VALUES
3233
(12, 30, NULL),
3334
(NULL, NULL, NULL)
3435

35-
-- column arguments
36-
query
36+
-- column arguments (spark_answer_only: shuffle does not support TimeType yet)
37+
query spark_answer_only
3738
SELECT hours, minutes, secs, make_time(hours, minutes, secs) FROM test_make_time ORDER BY hours, minutes, secs
3839

39-
-- literal hour, column minutes and secs
40-
query
40+
-- literal hour, column minutes and secs (spark_answer_only: shuffle does not support TimeType yet)
41+
query spark_answer_only
4142
SELECT make_time(10, minutes, secs) FROM test_make_time ORDER BY minutes, secs
4243

43-
-- column hours, literal minutes and secs
44-
query
44+
-- column hours, literal minutes and secs (spark_answer_only: shuffle does not support TimeType yet)
45+
query spark_answer_only
4546
SELECT make_time(hours, 15, 30.5) FROM test_make_time ORDER BY hours
4647

4748
-- all literals

spark/src/test/resources/sql-tests/expressions/datetime/to_time.sql

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
-- under the License.
1717

1818
-- MinSparkVersion: 4.1
19+
-- Config: spark.sql.timeType.enabled=true
1920

2021
statement
2122
CREATE TABLE test_to_time(s STRING) USING parquet
@@ -46,8 +47,8 @@ INSERT INTO test_to_time VALUES
4647
('1:00:00PM'),
4748
(NULL)
4849

49-
-- column argument: basic time formats
50-
query
50+
-- column argument: basic time formats (spark_answer_only: shuffle does not support TimeType yet)
51+
query spark_answer_only
5152
SELECT s, to_time(s) FROM test_to_time ORDER BY s
5253

5354
-- literal HH:mm

0 commit comments

Comments
 (0)