Skip to content

Commit 0041bc5

Browse files
authored
feat: add support for last_day expression (#3143)
1 parent e4a0142 commit 0041bc5

5 files changed

Lines changed: 33 additions & 1 deletion

File tree

docs/source/user-guide/latest/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ These settings can be used to determine which parts of the plan are accelerated
268268
| `spark.comet.expression.IsNull.enabled` | Enable Comet acceleration for `IsNull` | true |
269269
| `spark.comet.expression.JsonToStructs.enabled` | Enable Comet acceleration for `JsonToStructs` | true |
270270
| `spark.comet.expression.KnownFloatingPointNormalized.enabled` | Enable Comet acceleration for `KnownFloatingPointNormalized` | true |
271+
| `spark.comet.expression.LastDay.enabled` | Enable Comet acceleration for `LastDay` | true |
271272
| `spark.comet.expression.Length.enabled` | Enable Comet acceleration for `Length` | true |
272273
| `spark.comet.expression.LessThan.enabled` | Enable Comet acceleration for `LessThan` | true |
273274
| `spark.comet.expression.LessThanOrEqual.enabled` | Enable Comet acceleration for `LessThanOrEqual` | true |

native/core/src/execution/jni_api.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use datafusion_spark::function::bitwise::bit_get::SparkBitGet;
4444
use datafusion_spark::function::bitwise::bitwise_not::SparkBitwiseNot;
4545
use datafusion_spark::function::datetime::date_add::SparkDateAdd;
4646
use datafusion_spark::function::datetime::date_sub::SparkDateSub;
47+
use datafusion_spark::function::datetime::last_day::SparkLastDay;
4748
use datafusion_spark::function::hash::sha1::SparkSha1;
4849
use datafusion_spark::function::hash::sha2::SparkSha2;
4950
use datafusion_spark::function::math::expm1::SparkExpm1;
@@ -345,6 +346,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) {
345346
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitGet::default()));
346347
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkDateAdd::default()));
347348
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkDateSub::default()));
349+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkLastDay::default()));
348350
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkSha1::default()));
349351
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkConcat::default()));
350352
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkBitwiseNot::default()));

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
190190
classOf[DateSub] -> CometDateSub,
191191
classOf[UnixDate] -> CometUnixDate,
192192
classOf[FromUnixTime] -> CometFromUnixTime,
193+
classOf[LastDay] -> CometLastDay,
193194
classOf[Hour] -> CometHour,
194195
classOf[Minute] -> CometMinute,
195196
classOf[Second] -> CometSecond,

spark/src/main/scala/org/apache/comet/serde/datetime.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ package org.apache.comet.serde
2121

2222
import java.util.Locale
2323

24-
import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateDiff, DateFormatClass, DateSub, DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month, Quarter, Second, TruncDate, TruncTimestamp, UnixDate, UnixTimestamp, WeekDay, WeekOfYear, Year}
24+
import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateDiff, DateFormatClass, DateSub, DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, LastDay, Literal, Minute, Month, Quarter, Second, TruncDate, TruncTimestamp, UnixDate, UnixTimestamp, WeekDay, WeekOfYear, Year}
2525
import org.apache.spark.sql.types.{DateType, IntegerType, StringType, TimestampType}
2626
import org.apache.spark.unsafe.types.UTF8String
2727

@@ -310,6 +310,8 @@ object CometDateAdd extends CometScalarFunction[DateAdd]("date_add")
310310

311311
object CometDateSub extends CometScalarFunction[DateSub]("date_sub")
312312

313+
object CometLastDay extends CometScalarFunction[LastDay]("last_day")
314+
313315
object CometDateDiff extends CometScalarFunction[DateDiff]("date_diff")
314316

315317
/**

spark/src/test/scala/org/apache/comet/CometTemporalExpressionSuite.scala

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,32 @@ class CometTemporalExpressionSuite extends CometTestBase with AdaptiveSparkPlanH
182182
FuzzDataGenerator.generateDataFrame(r, spark, schema, 1000, DataGenOptions())
183183
}
184184

185+
test("last_day") {
186+
val r = new Random(42)
187+
val schema = StructType(Seq(StructField("c0", DataTypes.DateType, true)))
188+
val df = FuzzDataGenerator.generateDataFrame(r, spark, schema, 1000, DataGenOptions())
189+
df.createOrReplaceTempView("tbl")
190+
191+
// Basic test with random dates
192+
checkSparkAnswerAndOperator("SELECT c0, last_day(c0) FROM tbl ORDER BY c0")
193+
194+
// Disable constant folding to ensure literal expressions are executed by Comet
195+
withSQLConf(
196+
SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
197+
"org.apache.spark.sql.catalyst.optimizer.ConstantFolding") {
198+
// Test with literal dates - various months
199+
checkSparkAnswerAndOperator(
200+
"SELECT last_day(DATE('2024-01-15')), last_day(DATE('2024-02-15')), last_day(DATE('2024-12-01'))")
201+
202+
// Test leap year handling (February)
203+
checkSparkAnswerAndOperator(
204+
"SELECT last_day(DATE('2024-02-01')), last_day(DATE('2023-02-01'))")
205+
206+
// Test null handling
207+
checkSparkAnswerAndOperator("SELECT last_day(NULL)")
208+
}
209+
}
210+
185211
test("datediff") {
186212
val r = new Random(42)
187213
val schema = StructType(

0 commit comments

Comments
 (0)