Skip to content

Commit 691d7b5

Browse files
committed
Add temporal casts
1 parent a3d45af commit 691d7b5

7 files changed

Lines changed: 104 additions & 6 deletions

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBooleanBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ object CometCastBooleanBenchmark extends CometBenchmarkBase {
7171
s"SELECT $castFunc($colName AS BOOLEAN) FROM parquetV1Table")
7272

7373
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
74-
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
74+
val values = 1024 * 1024 * 5 // 5M rows
7575

7676
// Generate boolean data for boolean-to-other casts
7777
runBenchmarkWithTable("Boolean to other types casts", values) { v =>

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastNumericToNumericBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ object CometCastNumericToNumericBenchmark extends CometBenchmarkBase {
9292
}
9393

9494
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
95-
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
95+
val values = 1024 * 1024 * 5 // 5M rows
9696

9797
// Generate input data once with all numeric types
9898
runBenchmarkWithTable("Numeric to Numeric casts", values) { v =>

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastNumericToStringBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ object CometCastNumericToStringBenchmark extends CometBenchmarkBase {
5454
s"SELECT $castFunc($colName AS STRING) FROM parquetV1Table")
5555

5656
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
57-
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
57+
val values = 1024 * 1024 * 5 // 5M rows
5858

5959
// Generate input data once with all numeric types
6060
runBenchmarkWithTable("Numeric to String casts", values) { v =>

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastNumericToTemporalBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ object CometCastNumericToTemporalBenchmark extends CometBenchmarkBase {
5252
s"SELECT $castFunc(c_long AS TIMESTAMP) FROM parquetV1Table")
5353

5454
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
55-
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
55+
val values = 1024 * 1024 * 5 // 5M rows
5656

5757
// Generate data once for INT to DATE conversions
5858
runBenchmarkWithTable("Int to Date casts", values) { v =>

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastTemporalToNumericBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ object CometCastTemporalToNumericBenchmark extends CometBenchmarkBase {
5656
s"SELECT $castFunc(c_timestamp AS $targetType) FROM parquetV1Table")
5757

5858
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
59-
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
59+
val values = 1024 * 1024 * 5 // 5M rows
6060

6161
// Generate DATE data once for all date-to-numeric benchmarks
6262
runBenchmarkWithTable("Date to Numeric casts", values) { v =>

spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastTemporalToStringBenchmark.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ object CometCastTemporalToStringBenchmark extends CometBenchmarkBase {
4949
s"SELECT $castFunc(c_timestamp AS STRING) FROM parquetV1Table")
5050

5151
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
52-
val values = getBenchmarkRows(1024 * 1024 * 5) // 5M rows default
52+
val values = 1024 * 1024 * 5 // 5M rows
5353

5454
// Generate temporal data once for date benchmarks
5555
runBenchmarkWithTable("Date to String casts", values) { v =>
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.spark.sql.benchmark
21+
22+
case class CastTemporalToTemporalConfig(
23+
name: String,
24+
query: String,
25+
extraCometConfigs: Map[String, String] = Map.empty)
26+
27+
/**
28+
* Benchmark to measure performance of Comet cast between temporal types. To run this benchmark:
29+
* {{{
30+
* SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometCastTemporalToTemporalBenchmark
31+
* }}}
32+
* Results will be written to
33+
* "spark/benchmarks/CometCastTemporalToTemporalBenchmark-**results.txt".
34+
*/
35+
object CometCastTemporalToTemporalBenchmark extends CometBenchmarkBase {
36+
37+
private val castFunctions = Seq("CAST", "TRY_CAST")
38+
39+
// Date to Timestamp
40+
private val dateToTimestampConfigs = for {
41+
castFunc <- castFunctions
42+
} yield CastTemporalToTemporalConfig(
43+
s"$castFunc Date to Timestamp",
44+
s"SELECT $castFunc(c_date AS TIMESTAMP) FROM parquetV1Table")
45+
46+
// Timestamp to Date
47+
private val timestampToDateConfigs = for {
48+
castFunc <- castFunctions
49+
} yield CastTemporalToTemporalConfig(
50+
s"$castFunc Timestamp to Date",
51+
s"SELECT $castFunc(c_timestamp AS DATE) FROM parquetV1Table")
52+
53+
override def runCometBenchmark(mainArgs: Array[String]): Unit = {
54+
val values = 1024 * 1024 * 5 // 5M rows
55+
56+
// Generate DATE data for Date -> Timestamp benchmarks
57+
runBenchmarkWithTable("Date to Timestamp casts", values) { v =>
58+
withTempPath { dir =>
59+
withTempTable("parquetV1Table") {
60+
prepareTable(
61+
dir,
62+
spark.sql(s"""
63+
SELECT CASE
64+
WHEN value % 100 = 0 THEN NULL
65+
ELSE DATE_ADD('2020-01-01', CAST(value % 3650 AS INT))
66+
END AS c_date
67+
FROM $tbl
68+
"""))
69+
70+
dateToTimestampConfigs.foreach { config =>
71+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
72+
}
73+
}
74+
}
75+
}
76+
77+
// Generate TIMESTAMP data for Timestamp -> Date benchmarks
78+
runBenchmarkWithTable("Timestamp to Date casts", values) { v =>
79+
withTempPath { dir =>
80+
withTempTable("parquetV1Table") {
81+
prepareTable(
82+
dir,
83+
spark.sql(s"""
84+
SELECT CASE
85+
WHEN value % 100 = 0 THEN NULL
86+
ELSE TIMESTAMP_MICROS(1577836800000000 + value % 31536000000000)
87+
END AS c_timestamp
88+
FROM $tbl
89+
"""))
90+
91+
timestampToDateConfigs.foreach { config =>
92+
runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
93+
}
94+
}
95+
}
96+
}
97+
}
98+
}

0 commit comments

Comments
 (0)