Skip to content

Commit 4979e81

Browse files
committed
fix: handle dictionary-encoded timestamps and add TimestampNTZ regression test
- Add support for dictionary-encoded timestamps in extract_date_part - Add comprehensive test for hour/minute/second with TimestampNTZ in non-UTC timezones - Addresses reviewer feedback on PR #3265 for issue #3180
1 parent c5cf290 commit 4979e81

2 files changed

Lines changed: 38 additions & 1 deletion

File tree

native/spark-expr/src/datetime_funcs/extract_date_part.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,20 @@ macro_rules! extract_date_part {
7676

7777
match args {
7878
[ColumnarValue::Array(array)] => {
79+
// First, normalize dictionary-encoded arrays (common in Parquet/Iceberg)
80+
let array = match array.data_type() {
81+
DataType::Dictionary(_, value_type) => {
82+
// Cast dictionary to the underlying timestamp type
83+
arrow::compute::cast(&array, value_type.as_ref())
84+
.map_err(|e| DataFusionError::Execution(e.to_string()))?
85+
}
86+
_ => array.clone(),
87+
};
88+
89+
// Then handle timezone conversion based on timestamp type
7990
let array = match array.data_type() {
8091
// TimestampNTZ → DO NOT apply timezone conversion
81-
DataType::Timestamp(_, None) => array.clone(),
92+
DataType::Timestamp(_, None) => array,
8293

8394
// Timestamp with timezone → convert from UTC to session timezone
8495
DataType::Timestamp(_, Some(_)) => array_with_timezone(

spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,4 +395,30 @@ class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanH
395395
// Test null handling
396396
checkSparkAnswerAndOperator("SELECT unix_date(NULL)")
397397
}
398+
399+
test("hour/minute/second with TimestampNTZ in non-UTC timezone") {
400+
// Regression test for issue #3180
401+
// TimestampNTZ stores local time without timezone information
402+
// hour/minute/second should extract directly from local time without timezone conversion
403+
val schema = StructType(Seq(StructField("ts_ntz", DataTypes.TimestampNTZType, true)))
404+
405+
// Create test data with known TimestampNTZ values
406+
val data = Seq(
407+
Row(java.time.LocalDateTime.of(2024, 1, 15, 10, 30, 45)), // 10:30:45
408+
Row(java.time.LocalDateTime.of(2024, 6, 20, 14, 15, 20)), // 14:15:20
409+
Row(java.time.LocalDateTime.of(2024, 12, 31, 23, 59, 59)), // 23:59:59
410+
Row(null))
411+
412+
val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
413+
df.createOrReplaceTempView("timestamp_ntz_tbl")
414+
415+
// Test in multiple timezones - results should be the same since TimestampNTZ has no timezone
416+
for (timezone <- Seq("UTC", "America/Los_Angeles", "Asia/Tokyo")) {
417+
withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timezone) {
418+
// hour() should return the hour from local time directly
419+
checkSparkAnswerAndOperator(
420+
"SELECT ts_ntz, hour(ts_ntz), minute(ts_ntz), second(ts_ntz) FROM timestamp_ntz_tbl ORDER BY ts_ntz")
421+
}
422+
}
423+
}
398424
}

0 commit comments

Comments
 (0)