Skip to content

Commit 38609b1

Browse files
committed
Add format support for Time
1 parent 9d1f4d3 commit 38609b1

13 files changed

Lines changed: 1404 additions & 3 deletions

File tree

python/pyspark/sql/connect/functions/builtin.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3657,6 +3657,13 @@ def time_diff(unit: "ColumnOrName", start: "ColumnOrName", end: "ColumnOrName")
36573657
time_diff.__doc__ = pysparkfuncs.time_diff.__doc__
36583658

36593659

3660+
def time_format(time: "ColumnOrName", format: str) -> Column:
3661+
return _invoke_function("time_format", _to_col(time), lit(format))
3662+
3663+
3664+
time_format.__doc__ = pysparkfuncs.time_format.__doc__
3665+
3666+
36603667
def time_trunc(unit: "ColumnOrName", time: "ColumnOrName") -> Column:
36613668
return _invoke_function_over_columns("time_trunc", unit, time)
36623669

python/pyspark/sql/functions/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@
249249
"timestamp_millis",
250250
"timestamp_seconds",
251251
"time_diff",
252+
"time_format",
252253
"time_from_micros",
253254
"time_from_millis",
254255
"time_from_seconds",

python/pyspark/sql/functions/builtin.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12829,6 +12829,78 @@ def time_diff(unit: "ColumnOrName", start: "ColumnOrName", end: "ColumnOrName")
1282912829
return _invoke_function_over_columns("time_diff", unit, start, end)
1283012830

1283112831

12832+
@_try_remote_functions
12833+
def time_format(time: "ColumnOrName", format: str) -> Column:
12834+
"""
12835+
Converts a time to a value of string in the format specified by the time
12836+
format given by the second argument.
12837+
12838+
A pattern could be for instance `HH:mm:ss` and could return a string like '14:30:45'.
12839+
Time-related pattern letters of `datetime pattern`_ can be used.
12840+
12841+
.. versionadded:: 4.2.0
12842+
12843+
Parameters
12844+
----------
12845+
time : :class:`~pyspark.sql.Column` or column name
12846+
input column of TIME values to format.
12847+
format: literal string
12848+
format to use to represent time values.
12849+
12850+
Returns
12851+
-------
12852+
:class:`~pyspark.sql.Column`
12853+
string value representing formatted time.
12854+
12855+
See Also
12856+
--------
12857+
:meth:`pyspark.sql.functions.to_time`
12858+
:meth:`pyspark.sql.functions.try_to_time`
12859+
:meth:`pyspark.sql.functions.date_format`
12860+
12861+
Examples
12862+
--------
12863+
Example 1: Basic 24-hour format
12864+
12865+
>>> from pyspark.sql import functions as sf
12866+
>>> df = spark.sql("SELECT TIME'14:30:45' as time_col")
12867+
>>> df.select("*", sf.time_format('time_col', 'HH:mm:ss')).show()
12868+
+--------+-------------------------------+
12869+
|time_col|time_format(time_col, HH:mm:ss)|
12870+
+--------+-------------------------------+
12871+
|14:30:45| 14:30:45|
12872+
+--------+-------------------------------+
12873+
<BLANKLINE>
12874+
12875+
Example 2: 12-hour format with AM/PM
12876+
12877+
>>> from pyspark.sql import functions as sf
12878+
>>> df = spark.sql("SELECT TIME'14:30:45' as time_col")
12879+
>>> df.select("*", sf.time_format('time_col', 'hh:mm:ss a')).show()
12880+
+--------+---------------------------------+
12881+
|time_col|time_format(time_col, hh:mm:ss a)|
12882+
+--------+---------------------------------+
12883+
|14:30:45| 02:30:45 PM|
12884+
+--------+---------------------------------+
12885+
<BLANKLINE>
12886+
12887+
Example 3: With microseconds
12888+
12889+
>>> from pyspark.sql import functions as sf
12890+
>>> df = spark.sql("SELECT TIME'14:30:45.123456' as time_col")
12891+
>>> df.select("*", sf.time_format('time_col', 'HH:mm:ss.SSSSSS')).show()
12892+
+---------------+--------------------------------------+
12893+
| time_col|time_format(time_col, HH:mm:ss.SSSSSS)|
12894+
+---------------+--------------------------------------+
12895+
|14:30:45.123456| 14:30:45.123456|
12896+
+---------------+--------------------------------------+
12897+
<BLANKLINE>
12898+
"""
12899+
from pyspark.sql.classic.column import _to_java_column
12900+
12901+
return _invoke_function("time_format", _to_java_column(time), _enum_to_value(format))
12902+
12903+
1283212904
@_try_remote_functions
1283312905
def time_trunc(unit: "ColumnOrName", time: "ColumnOrName") -> Column:
1283412906
"""

sql/api/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6889,6 +6889,22 @@ object functions {
68896889
Column.fn("time_diff", unit, start, end)
68906890
}
68916891

6892+
/**
6893+
* Converts a time to a string in the specified format.
6894+
*
6895+
* @param time
6896+
* A column of time values to be formatted.
6897+
* @param format
6898+
* A time format string. for valid patterns.
6899+
* @return
6900+
* String representation of the time in the specified format.
6901+
* @group datetime_funcs
6902+
* @since 4.2.0
6903+
*/
6904+
def time_format(time: Column, format: String): Column = {
6905+
Column.fn("time_format", time, lit(format))
6906+
}
6907+
68926908
/**
68936909
* Returns `time` truncated to the `unit`.
68946910
*

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,7 @@ object FunctionRegistry {
680680
expression[ParseToTimestamp]("to_timestamp"),
681681
expression[ParseToDate]("to_date"),
682682
expression[TimeDiff]("time_diff"),
683+
expression[TimeFormat]("time_format"),
683684
expression[ToTime]("to_time"),
684685
expression[ToBinary]("to_binary"),
685686
expression[ToUnixTimestamp]("to_unix_timestamp"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.util.TimeFormatter
3535
import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
3636
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
3737
import org.apache.spark.sql.internal.types.StringTypeWithCollation
38-
import org.apache.spark.sql.types.{AbstractDataType, AnyTimeType, ByteType, DataType, DayTimeIntervalType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType, NumericType, ObjectType, TimeType}
38+
import org.apache.spark.sql.types.{AbstractDataType, AnyTimeType, ByteType, DataType, DayTimeIntervalType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType, NumericType, ObjectType, StringType, TimeType}
3939
import org.apache.spark.sql.types.DayTimeIntervalType.{HOUR, SECOND}
4040
import org.apache.spark.unsafe.types.UTF8String
4141

@@ -1036,3 +1036,82 @@ case class TimeToMicros(child: Expression)
10361036
override protected def withNewChildInternal(newChild: Expression): TimeToMicros =
10371037
copy(child = newChild)
10381038
}
1039+
1040+
// scalastyle:off line.size.limit
1041+
@ExpressionDescription(
1042+
usage = "_FUNC_(time, format) - Converts a time to a value of string in the format specified by the date format given by the second argument.",
1043+
arguments = """
1044+
Arguments:
1045+
* time - A time value to be converted to string.
1046+
* format - Time format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
1047+
time format patterns. Note: Only time-related patterns (H, h, m, s, S, a) are meaningful for TIME values.
1048+
""",
1049+
examples = """
1050+
Examples:
1051+
> SELECT _FUNC_(TIME'14:30:45', 'HH:mm:ss');
1052+
14:30:45
1053+
> SELECT _FUNC_(TIME'14:30:45', 'hh:mm:ss a');
1054+
02:30:45 PM
1055+
> SELECT _FUNC_(TIME'14:30:45.123456', 'HH:mm:ss.SSSSSS');
1056+
14:30:45.123456
1057+
> SELECT _FUNC_(TIME'09:05:00', 'h:mm a');
1058+
9:05 AM
1059+
""",
1060+
group = "datetime_funcs",
1061+
since = "4.2.0")
1062+
// scalastyle:on line.size.limit
1063+
case class TimeFormat(left: Expression, right: Expression)
1064+
extends BinaryExpression
1065+
with ImplicitCastInputTypes {
1066+
1067+
override def nullIntolerant: Boolean = true
1068+
1069+
override def inputTypes: Seq[AbstractDataType] =
1070+
Seq(AnyTimeType, StringTypeWithCollation(supportsTrimCollation = true))
1071+
1072+
override def dataType: DataType = StringType
1073+
1074+
// Cache the formatter if the format string is a foldable expression
1075+
@transient private lazy val formatterOption: Option[TimeFormatter] =
1076+
if (right.foldable) {
1077+
Option(right.eval()).map { format =>
1078+
TimeFormatter(format.toString, TimeFormatter.defaultLocale, isParsing = false)
1079+
}
1080+
} else {
1081+
None
1082+
}
1083+
1084+
override protected def nullSafeEval(time: Any, format: Any): Any = {
1085+
val nanos = time.asInstanceOf[Long]
1086+
val formatter = formatterOption.getOrElse {
1087+
TimeFormatter(format.toString, TimeFormatter.defaultLocale, isParsing = false)
1088+
}
1089+
UTF8String.fromString(formatter.format(nanos))
1090+
}
1091+
1092+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
1093+
formatterOption.map { tf =>
1094+
val timeFormatter = ctx.addReferenceObj("timeFormatter", tf)
1095+
defineCodeGen(ctx, ev, (time, _) => {
1096+
s"""UTF8String.fromString($timeFormatter.format($time))"""
1097+
})
1098+
}.getOrElse {
1099+
val tfClass = TimeFormatter.getClass.getName.stripSuffix("$")
1100+
val locale = ctx.addReferenceObj(
1101+
"locale", TimeFormatter.defaultLocale, classOf[Locale].getName)
1102+
defineCodeGen(ctx, ev, (time, format) => {
1103+
s"""|UTF8String.fromString($tfClass$$.MODULE$$.apply(
1104+
| $format.toString(),
1105+
| $locale,
1106+
| false)
1107+
|.format($time))""".stripMargin
1108+
})
1109+
}
1110+
}
1111+
1112+
override def prettyName: String = "time_format"
1113+
1114+
override protected def withNewChildrenInternal(
1115+
newLeft: Expression, newRight: Expression): TimeFormat =
1116+
copy(left = newLeft, right = newRight)
1117+
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -666,4 +666,66 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
666666
val timeVal = TimeFromSeconds(Literal(secondsValue))
667667
checkEvaluation(TimeToMicros(timeVal), 14500000L)
668668
}
669+
670+
test("TimeFormat - foldable and non-foldable formats with codegen") {
671+
val time = localTime(14, 30, 45, 123456)
672+
val timeLit = Literal(time, TimeType())
673+
674+
// Foldable format - formatter cached at planning time
675+
val foldableExpr = TimeFormat(timeLit, Literal("HH:mm:ss"))
676+
checkEvaluation(foldableExpr, "14:30:45")
677+
checkEvaluation(
678+
TimeFormat(Literal(localTime(9, 5, 0), TimeType()), Literal("hh:mm:ss a")),
679+
"09:05:00 AM")
680+
checkEvaluation(
681+
TimeFormat(timeLit, Literal("HH:mm:ss.SSSSSS")),
682+
"14:30:45.123456")
683+
684+
// Non-foldable format - formatter created per evaluation
685+
val nonFoldableExpr = TimeFormat(timeLit, NonFoldableLiteral("HH:mm:ss", StringType))
686+
checkEvaluation(nonFoldableExpr, "14:30:45")
687+
checkEvaluation(
688+
TimeFormat(timeLit, NonFoldableLiteral("hh:mm:ss a", StringType)),
689+
"02:30:45 PM")
690+
checkEvaluation(
691+
TimeFormat(timeLit, NonFoldableLiteral("HH:mm:ss.SSSSSS", StringType)),
692+
"14:30:45.123456")
693+
694+
// Multiple formats on same time
695+
Seq("HH:mm:ss", "hh:mm a", "HH:mm", "HH").zip(
696+
Seq("14:30:45", "02:30 PM", "14:30", "14")
697+
).foreach { case (format, expected) =>
698+
checkEvaluation(
699+
TimeFormat(timeLit, NonFoldableLiteral(format, StringType)),
700+
expected)
701+
}
702+
703+
// Edge cases
704+
checkEvaluation(
705+
TimeFormat(Literal(localTime(0, 0, 0), TimeType()), Literal("HH:mm:ss")),
706+
"00:00:00")
707+
checkEvaluation(
708+
TimeFormat(Literal(localTime(23, 59, 59, 999999), TimeType()),
709+
Literal("HH:mm:ss.SSSSSS")),
710+
"23:59:59.999999")
711+
712+
// Null handling
713+
checkEvaluation(
714+
TimeFormat(Literal.create(null, TimeType()), Literal("HH:mm:ss")),
715+
null)
716+
checkEvaluation(
717+
TimeFormat(timeLit, Literal.create(null, StringType)),
718+
null)
719+
checkEvaluation(
720+
TimeFormat(Literal.create(null, TimeType()), NonFoldableLiteral("HH:mm:ss", StringType)),
721+
null)
722+
checkEvaluation(
723+
TimeFormat(timeLit, NonFoldableLiteral(null, StringType)),
724+
null)
725+
726+
// Verify foldable and non-foldable produce same result
727+
val foldableResult = foldableExpr.eval()
728+
val nonFoldableResult = nonFoldableExpr.eval()
729+
assert(foldableResult == nonFoldableResult)
730+
}
669731
}

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@
363363
| org.apache.spark.sql.catalyst.expressions.ThetaSketchEstimate | theta_sketch_estimate | SELECT theta_sketch_estimate(theta_sketch_agg(col)) FROM VALUES (1), (1), (2), (2), (3) tab(col) | struct<theta_sketch_estimate(theta_sketch_agg(col, 12)):bigint> |
364364
| org.apache.spark.sql.catalyst.expressions.ThetaUnion | theta_union | SELECT theta_sketch_estimate(theta_union(theta_sketch_agg(col1), theta_sketch_agg(col2))) FROM VALUES (1, 4), (1, 4), (2, 5), (2, 5), (3, 6) tab(col1, col2) | struct<theta_sketch_estimate(theta_union(theta_sketch_agg(col1, 12), theta_sketch_agg(col2, 12), 12)):bigint> |
365365
| org.apache.spark.sql.catalyst.expressions.TimeDiff | time_diff | SELECT time_diff('HOUR', TIME'20:30:29', TIME'21:30:28') | struct<time_diff(HOUR, TIME '20:30:29', TIME '21:30:28'):bigint> |
366+
| org.apache.spark.sql.catalyst.expressions.TimeFormat | time_format | SELECT time_format(TIME'14:30:45', 'HH:mm:ss') | struct<time_format(TIME '14:30:45', HH:mm:ss):string> |
366367
| org.apache.spark.sql.catalyst.expressions.TimeFromMicros | time_from_micros | SELECT time_from_micros(0) | struct<time_from_micros(0):time(6)> |
367368
| org.apache.spark.sql.catalyst.expressions.TimeFromMillis | time_from_millis | SELECT time_from_millis(0) | struct<time_from_millis(0):time(6)> |
368369
| org.apache.spark.sql.catalyst.expressions.TimeFromSeconds | time_from_seconds | SELECT time_from_seconds(0) | struct<time_from_seconds(0):time(6)> |

0 commit comments

Comments
 (0)