support_timestamp_to_int_type

coderfender · coderfender · commit 97a40dfcc42e · 2026-02-16T12:02:43.000-08:00
diff --git a/native/spark-expr/src/conversion_funcs/cast.rs b/native/spark-expr/src/conversion_funcs/cast.rs
@@ -25,6 +25,7 @@ use arrow::array::{
     PrimitiveBuilder, StringArray, StructArray, TimestampMicrosecondBuilder,
 };
 use arrow::compute::can_cast_types;
+use arrow::datatypes::DataType::Int64;
 use arrow::datatypes::{
     i256, ArrowDictionaryKeyType, ArrowNativeType, DataType, Decimal256Type, GenericBinaryType,
     Schema,
@@ -915,6 +916,9 @@ fn cast_array(
         (Boolean, Decimal128(precision, scale)) => {
             cast_boolean_to_decimal(&array, *precision, *scale)
         }
+        (Int8 | Int16 | Int32 | Int64, Timestamp(_, _)) => {
+            cast_int_to_timestamp(&array, cast_options)
+        }
         _ if cast_options.is_adapting_schema
             || is_datafusion_spark_compatible(from_type, to_type) =>
         {
@@ -933,6 +937,29 @@ fn cast_array(
     Ok(spark_cast_postprocess(cast_result?, from_type, to_type))
 }
 
+fn cast_int_to_timestamp(
+    array_ref: &ArrayRef,
+    cast_options: &SparkCastOptions,
+) -> SparkResult<ArrayRef> {
+    // Input is seconds since epoch, multiply by MICROS_PER_SECOND to get microseconds.
+    let int64_array = cast_with_options(&array_ref, &Int64, &CAST_OPTIONS)?;
+    let int64_arr = int64_array.as_primitive::<Int64Type>();
+
+    let mut builder = TimestampMicrosecondBuilder::with_capacity(int64_arr.len());
+    for i in 0..int64_arr.len() {
+        if int64_arr.is_null(i) {
+            builder.append_null();
+        } else {
+            let micros = int64_arr.value(i).saturating_mul(MICROS_PER_SECOND);
+            builder.append_value(micros);
+        }
+    }
+
+    // input tz is always defined or set to UTC on spark side
+    let tz: Arc<str> = Arc::from(cast_options.timezone.as_str());
+    Ok(Arc::new(builder.finish().with_timezone(tz)) as ArrayRef)
+}
+
 fn cast_date_to_timestamp(
     array_ref: &ArrayRef,
     cast_options: &SparkCastOptions,
diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -249,7 +249,7 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
   private def canCastFromTimestamp(toType: DataType): SupportLevel = {
     toType match {
       case DataTypes.BooleanType | DataTypes.ByteType | DataTypes.ShortType |
-          DataTypes.IntegerType =>
+           DataTypes.IntegerType =>
         // https://github.com/apache/datafusion-comet/issues/352
         // this seems like an edge case that isn't important for us to support
         unsupported(DataTypes.TimestampType, toType)
@@ -279,6 +279,8 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
         Compatible()
       case DataTypes.BinaryType if (evalMode == CometEvalMode.LEGACY) =>
         Compatible()
+      case DataTypes.TimestampType =>
+        Compatible()
       case _ =>
         unsupported(DataTypes.ByteType, toType)
     }
@@ -293,6 +295,8 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
         Compatible()
       case DataTypes.BinaryType if (evalMode == CometEvalMode.LEGACY) =>
         Compatible()
+      case DataTypes.TimestampType =>
+        Compatible()
       case _ =>
         unsupported(DataTypes.ShortType, toType)
     }
@@ -308,6 +312,8 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
       case _: DecimalType =>
         Compatible()
       case DataTypes.BinaryType if (evalMode == CometEvalMode.LEGACY) => Compatible()
+      case DataTypes.TimestampType =>
+        Compatible()
       case _ =>
         unsupported(DataTypes.IntegerType, toType)
     }
@@ -323,6 +329,8 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim {
       case _: DecimalType =>
         Compatible()
       case DataTypes.BinaryType if (evalMode == CometEvalMode.LEGACY) => Compatible()
+      case DataTypes.TimestampType =>
+        Compatible()
       case _ =>
         unsupported(DataTypes.LongType, toType)
     }
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -223,12 +223,22 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       testTry = false)
   }
 
-  ignore("cast ByteType to TimestampType") {
-    // input: -1, expected: 1969-12-31 15:59:59.0, actual: 1969-12-31 15:59:59.999999
-    castTest(
-      generateBytes(),
-      DataTypes.TimestampType,
-      hasIncompatibleType = usingParquetExecWithIncompatTypes)
+  test("cast ByteType to TimestampType") {
+    val compatibleTimezones = Seq(
+      "UTC",
+      "America/New_York",
+      "America/Los_Angeles",
+      "Europe/London",
+      "Asia/Tokyo",
+      "Australia/Sydney")
+    compatibleTimezones.foreach { tz =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        castTest(
+          generateBytes(),
+          DataTypes.TimestampType,
+          hasIncompatibleType = usingParquetExecWithIncompatTypes)
+      }
+    }
   }
 
   // CAST from ShortType
@@ -300,12 +310,22 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
       testTry = false)
   }
 
-  ignore("cast ShortType to TimestampType") {
-    // input: -1003, expected: 1969-12-31 15:43:17.0, actual: 1969-12-31 15:59:59.998997
-    castTest(
-      generateShorts(),
-      DataTypes.TimestampType,
-      hasIncompatibleType = usingParquetExecWithIncompatTypes)
+  test("cast ShortType to TimestampType") {
+    val compatibleTimezones = Seq(
+      "UTC",
+      "America/New_York",
+      "America/Los_Angeles",
+      "Europe/London",
+      "Asia/Tokyo",
+      "Australia/Sydney")
+    compatibleTimezones.foreach { tz =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        castTest(
+          generateShorts(),
+          DataTypes.TimestampType,
+          hasIncompatibleType = usingParquetExecWithIncompatTypes)
+      }
+    }
   }
 
   // CAST from integer
@@ -363,9 +383,19 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
     castTest(generateInts(), DataTypes.BinaryType, testAnsi = false, testTry = false)
   }
 
-  ignore("cast IntegerType to TimestampType") {
-    // input: -1000479329, expected: 1938-04-19 01:04:31.0, actual: 1969-12-31 15:43:19.520671
-    castTest(generateInts(), DataTypes.TimestampType)
+  test("cast IntegerType to TimestampType") {
+    val compatibleTimezones = Seq(
+      "UTC",
+      "America/New_York",
+      "America/Los_Angeles",
+      "Europe/London",
+      "Asia/Tokyo",
+      "Australia/Sydney")
+    compatibleTimezones.foreach { tz =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        castTest(generateInts(), DataTypes.TimestampType)
+      }
+    }
   }
 
   // CAST from LongType
@@ -410,9 +440,26 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
     castTest(generateLongs(), DataTypes.BinaryType, testAnsi = false, testTry = false)
   }
 
-  ignore("cast LongType to TimestampType") {
-    // java.lang.ArithmeticException: long overflow
-    castTest(generateLongs(), DataTypes.TimestampType)
+  test("cast LongType to TimestampType") {
+    // Use assertDataFrameEquals because extreme Long values (Long.MIN_VALUE, Long.MAX_VALUE)
+    // overflow when converted to java.sql.Timestamp during collect(), but the cast itself works.
+    val compatibleTimezones = Seq(
+      "UTC",
+      "America/New_York",
+      "America/Los_Angeles",
+      "Europe/London",
+      "Asia/Tokyo",
+      "Australia/Sydney")
+    compatibleTimezones.foreach { tz =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> tz) {
+        withTempPath { dir =>
+          val input = generateLongs()
+          val data = roundtripParquet(input, dir).coalesce(1)
+          val df = data.withColumn("ts", col("a").cast(DataTypes.TimestampType))
+          assertDataFrameEquals(df)
+        }
+      }
+    }
   }
 
   // CAST from FloatType
@@ -1042,13 +1089,13 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper {
 
   ignore("cast TimestampType to ShortType") {
     // https://github.com/apache/datafusion-comet/issues/352
-    // input: 2023-12-31 10:00:00.0, expected: -21472, actual: null]
+    // input: 2023-12-31 10:00:00.0, expected: -21472, actual: null
     castTest(generateTimestamps(), DataTypes.ShortType)
   }
 
   ignore("cast TimestampType to IntegerType") {
     // https://github.com/apache/datafusion-comet/issues/352
-    // input: 2023-12-31 10:00:00.0, expected: 1704045600, actual: null]
+    // input: 2023-12-31 10:00:00.0, expected: 1704045600, actual: null
     castTest(generateTimestamps(), DataTypes.IntegerType)
   }
 
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala
@@ -332,6 +332,40 @@ abstract class CometTestBase
     }
   }
 
+  protected def assertDataFrameEquals(
+      df: => DataFrame,
+      checkNativeOperators: Boolean = true): Unit = {
+
+    var sparkDf: DataFrame = null
+    withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+      sparkDf = datasetOfRows(spark, df.logicalPlan)
+    }
+    val cometDf = datasetOfRows(spark, df.logicalPlan)
+
+    // Check schemas match
+    assert(
+      sparkDf.schema == cometDf.schema,
+      s"Schemas do not match.\nSpark: ${sparkDf.schema}\nComet: ${cometDf.schema}")
+
+    // Compare using except() - this avoids collect() and toJavaTimestamp conversion
+    val sparkMinusComet = sparkDf.except(cometDf)
+    val cometMinusSpark = cometDf.except(sparkDf)
+
+    val diffCount1 = sparkMinusComet.count()
+    val diffCount2 = cometMinusSpark.count()
+
+    if (diffCount1 != 0 || diffCount2 != 0) {
+      fail(
+        "DataFrames are not equal.\n" +
+          s"Rows in Spark but not in Comet: $diffCount1\n" +
+          s"Rows in Comet but not in Spark: $diffCount2")
+    }
+
+    if (checkNativeOperators) {
+      checkCometOperators(stripAQEPlan(df.queryExecution.executedPlan))
+    }
+  }
+
   /**
    * A helper function for comparing Comet DataFrame with Spark result using absolute tolerance.
    */