Skip to content

Commit 626da1e

Browse files
JeelRajodiyaalambJefffrey
authored
feat: Add Spark-compatible monthname function to datafusion-spark (#21639)
**Rationale** The `datafusion-spark` crate is missing the `monthname` function. Spark's [`monthname(date)`](https://spark.apache.org/docs/latest/api/sql/index.html#monthname) returns the **three-letter abbreviated month name** (Jan, Feb, ..., Dec) from a date or timestamp — commonly used in Spark SQL workloads. **What changes are included in this PR?** Adds `SparkMonthName` to `datafusion-spark`'s datetime functions. It uses `arrow::compute::date_part(DatePart::Month)` to extract the month number and maps it to the abbreviated name. The signature accepts **Timestamp types** with automatic coercion from Date32/Date64. **Are these changes tested?** Yes — 6 unit tests covering scalar dates, array dates with nulls, null scalars, timestamp microseconds, all 12 months, and return field nullability. **Are there any user-facing changes?** New `monthname` scalar function available when using `datafusion-spark`. --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org> Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
1 parent de41306 commit 626da1e

3 files changed

Lines changed: 298 additions & 0 deletions

File tree

datafusion/spark/src/function/datetime/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ pub mod from_utc_timestamp;
2626
pub mod last_day;
2727
pub mod make_dt_interval;
2828
pub mod make_interval;
29+
pub mod monthname;
2930
pub mod next_day;
3031
pub mod time_trunc;
3132
pub mod to_utc_timestamp;
@@ -52,6 +53,7 @@ make_udf_function!(extract::SparkSecond, second);
5253
make_udf_function!(last_day::SparkLastDay, last_day);
5354
make_udf_function!(make_dt_interval::SparkMakeDtInterval, make_dt_interval);
5455
make_udf_function!(make_interval::SparkMakeInterval, make_interval);
56+
make_udf_function!(monthname::SparkMonthName, monthname);
5557
make_udf_function!(next_day::SparkNextDay, next_day);
5658
make_udf_function!(time_trunc::SparkTimeTrunc, time_trunc);
5759
make_udf_function!(to_utc_timestamp::SparkToUtcTimestamp, to_utc_timestamp);
@@ -117,6 +119,11 @@ pub mod expr_fn {
117119
"Make interval from years, months, weeks, days, hours, mins and secs.",
118120
years months weeks days hours mins secs
119121
));
122+
export_functions!((
123+
monthname,
124+
"Returns the three-letter abbreviated month name from a date or timestamp.",
125+
arg1
126+
));
120127
// TODO: add once ANSI support is added:
121128
// "When both of the input parameters are not NULL and day_of_week is an invalid input, the function throws SparkIllegalArgumentException if spark.sql.ansi.enabled is set to true, otherwise NULL."
122129
export_functions!((
@@ -195,6 +202,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
195202
make_dt_interval(),
196203
make_interval(),
197204
minute(),
205+
monthname(),
198206
next_day(),
199207
second(),
200208
time_trunc(),
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::sync::Arc;
19+
20+
use arrow::array::{AsArray, StringArray};
21+
use arrow::compute::{DatePart, date_part};
22+
use arrow::datatypes::{DataType, Field, FieldRef};
23+
use datafusion_common::types::{NativeType, logical_date};
24+
use datafusion_common::utils::take_function_args;
25+
use datafusion_common::{Result, ScalarValue, internal_err};
26+
use datafusion_expr::{
27+
Coercion, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
28+
Signature, TypeSignatureClass, Volatility,
29+
};
30+
31+
const MONTH_NAMES: [&str; 12] = [
32+
"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
33+
];
34+
35+
fn month_number_to_name(month: i32) -> Option<&'static str> {
36+
MONTH_NAMES.get((month - 1) as usize).copied()
37+
}
38+
39+
/// Spark-compatible `monthname` expression.
40+
/// Returns the three-letter abbreviated month name from a date or timestamp.
41+
///
42+
/// <https://spark.apache.org/docs/latest/api/sql/index.html#monthname>
43+
#[derive(Debug, PartialEq, Eq, Hash)]
44+
pub struct SparkMonthName {
45+
signature: Signature,
46+
}
47+
48+
impl Default for SparkMonthName {
49+
fn default() -> Self {
50+
Self::new()
51+
}
52+
}
53+
54+
impl SparkMonthName {
55+
pub fn new() -> Self {
56+
Self {
57+
signature: Signature::coercible(
58+
vec![Coercion::new_implicit(
59+
TypeSignatureClass::Native(logical_date()),
60+
vec![TypeSignatureClass::Timestamp],
61+
NativeType::Date,
62+
)],
63+
Volatility::Immutable,
64+
),
65+
}
66+
}
67+
}
68+
69+
impl ScalarUDFImpl for SparkMonthName {
70+
fn name(&self) -> &str {
71+
"monthname"
72+
}
73+
74+
fn signature(&self) -> &Signature {
75+
&self.signature
76+
}
77+
78+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
79+
internal_err!("return_field_from_args should be used instead")
80+
}
81+
82+
fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
83+
let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
84+
Ok(Arc::new(Field::new(self.name(), DataType::Utf8, nullable)))
85+
}
86+
87+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
88+
let [arg] = take_function_args(self.name(), args.args)?;
89+
match arg {
90+
ColumnarValue::Scalar(scalar) => {
91+
if scalar.is_null() {
92+
return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
93+
}
94+
let arr = scalar.to_array_of_size(1)?;
95+
let month_arr = date_part(&arr, DatePart::Month)?;
96+
let month_val = month_arr
97+
.as_primitive::<arrow::datatypes::Int32Type>()
98+
.value(0);
99+
let name = month_number_to_name(month_val).map(|s| s.to_string());
100+
Ok(ColumnarValue::Scalar(ScalarValue::Utf8(name)))
101+
}
102+
ColumnarValue::Array(arr) => {
103+
let month_arr = date_part(&arr, DatePart::Month)?;
104+
let int_arr = month_arr.as_primitive::<arrow::datatypes::Int32Type>();
105+
106+
let result: StringArray = int_arr
107+
.iter()
108+
.map(|maybe_month| maybe_month.and_then(month_number_to_name))
109+
.collect();
110+
111+
Ok(ColumnarValue::Array(Arc::new(result)))
112+
}
113+
}
114+
}
115+
}
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Scalar date input
19+
query T
20+
SELECT monthname('2024-03-15'::DATE);
21+
----
22+
Mar
23+
24+
# All 12 months
25+
query T
26+
SELECT monthname('2024-01-15'::DATE);
27+
----
28+
Jan
29+
30+
query T
31+
SELECT monthname('2024-02-15'::DATE);
32+
----
33+
Feb
34+
35+
query T
36+
SELECT monthname('2024-03-15'::DATE);
37+
----
38+
Mar
39+
40+
query T
41+
SELECT monthname('2024-04-15'::DATE);
42+
----
43+
Apr
44+
45+
query T
46+
SELECT monthname('2024-05-15'::DATE);
47+
----
48+
May
49+
50+
query T
51+
SELECT monthname('2024-06-15'::DATE);
52+
----
53+
Jun
54+
55+
query T
56+
SELECT monthname('2024-07-15'::DATE);
57+
----
58+
Jul
59+
60+
query T
61+
SELECT monthname('2024-08-15'::DATE);
62+
----
63+
Aug
64+
65+
query T
66+
SELECT monthname('2024-09-15'::DATE);
67+
----
68+
Sep
69+
70+
query T
71+
SELECT monthname('2024-10-15'::DATE);
72+
----
73+
Oct
74+
75+
query T
76+
SELECT monthname('2024-11-15'::DATE);
77+
----
78+
Nov
79+
80+
query T
81+
SELECT monthname('2024-12-15'::DATE);
82+
----
83+
Dec
84+
85+
# NULL handling
86+
query T
87+
SELECT monthname(NULL::DATE);
88+
----
89+
NULL
90+
91+
# Array input
92+
query T
93+
SELECT monthname(d) FROM (VALUES ('2024-01-01'::DATE), ('2024-06-15'::DATE), ('2024-12-31'::DATE), (NULL::DATE)) AS t(d);
94+
----
95+
Jan
96+
Jun
97+
Dec
98+
NULL
99+
100+
# Timestamp input: Spark coerces TIMESTAMP/TIMESTAMP_NTZ to DATE before evaluation
101+
query T
102+
SELECT monthname('2024-03-15 12:34:56'::TIMESTAMP);
103+
----
104+
Mar
105+
106+
query T
107+
SELECT monthname('2024-07-04 00:00:00'::TIMESTAMP);
108+
----
109+
Jul
110+
111+
query T
112+
SELECT monthname(NULL::TIMESTAMP);
113+
----
114+
NULL
115+
116+
# Timestamp array input
117+
query T
118+
SELECT monthname(ts) FROM (VALUES
119+
('2024-01-15 01:02:03'::TIMESTAMP),
120+
('2024-08-20 10:20:30'::TIMESTAMP),
121+
('2024-11-30 23:59:59'::TIMESTAMP),
122+
(NULL::TIMESTAMP)
123+
) AS t(ts);
124+
----
125+
Jan
126+
Aug
127+
Nov
128+
NULL
129+
130+
# TIMESTAMP_NTZ (Timestamp without timezone) — explicit Microsecond precision
131+
query T
132+
SELECT monthname(arrow_cast('2024-04-10 09:15:00', 'Timestamp(Microsecond, None)'));
133+
----
134+
Apr
135+
136+
# TIMESTAMP_NTZ — explicit Millisecond precision
137+
query T
138+
SELECT monthname(arrow_cast('2024-09-05 18:45:30', 'Timestamp(Millisecond, None)'));
139+
----
140+
Sep
141+
142+
# TIMESTAMP_NTZ — explicit Second precision
143+
query T
144+
SELECT monthname(arrow_cast('2024-02-29 00:00:00', 'Timestamp(Second, None)'));
145+
----
146+
Feb
147+
148+
# TIMESTAMP_NTZ — NULL handling
149+
query T
150+
SELECT monthname(arrow_cast(NULL, 'Timestamp(Microsecond, None)'));
151+
----
152+
NULL
153+
154+
# TIMESTAMP with timezone (Spark TIMESTAMP / LTZ) — coerces to Date32
155+
query T
156+
SELECT monthname(arrow_cast('2024-05-20 03:00:00', 'Timestamp(Nanosecond, Some("UTC"))'));
157+
----
158+
May
159+
160+
query T
161+
SELECT monthname(arrow_cast('2024-10-31 23:59:59', 'Timestamp(Microsecond, Some("America/New_York"))'));
162+
----
163+
Oct
164+
165+
# Error: wrong argument type (string without cast)
166+
statement error Function 'monthname' requires Date, but received String
167+
SELECT monthname('not-a-date');
168+
169+
# Error: wrong argument type (integer)
170+
statement error Function 'monthname' requires Date, but received Int64
171+
SELECT monthname(123);
172+
173+
# Error: no arguments
174+
statement error 'monthname' does not support zero arguments
175+
SELECT monthname();

0 commit comments

Comments
 (0)