Skip to content

Commit 0ae3711

Browse files
committed
add SQL file and string test
1 parent fafb7e2 commit 0ae3711

2 files changed

Lines changed: 91 additions & 0 deletions

File tree

native/spark-expr/src/conversion_funcs/string.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,58 @@ mod tests {
14121412
assert_eq!(ts_array.value(2), 1577836800000000);
14131413
}
14141414

1415+
#[test]
1416+
#[cfg_attr(miri, ignore)]
1417+
fn test_cast_string_with_timezone_offset_to_timestamp_ntz() {
1418+
// Strings with explicit timezone offsets should produce null for TimestampNTZ
1419+
let array: ArrayRef = Arc::new(StringArray::from(vec![
1420+
Some("2020-01-01T12:34:56+05:00"),
1421+
Some("2020-01-01T12:34:56-08:00"),
1422+
Some("2020-01-01T12:34:56Z"),
1423+
Some("2020-01-01T12:34:56.123456+00:00"),
1424+
Some("2020-01-01T12:34:56.123456"),
1425+
]));
1426+
1427+
let string_array = array
1428+
.as_any()
1429+
.downcast_ref::<GenericStringArray<i32>>()
1430+
.expect("Expected a string array");
1431+
1432+
let eval_mode = EvalMode::Legacy;
1433+
let result = cast_utf8_to_timestamp!(
1434+
&string_array,
1435+
eval_mode,
1436+
TimestampMicrosecondType,
1437+
timestamp_parser,
1438+
&Utc,
1439+
None::<&str>
1440+
);
1441+
1442+
assert_eq!(
1443+
result.data_type(),
1444+
&DataType::Timestamp(TimeUnit::Microsecond, None)
1445+
);
1446+
assert_eq!(result.len(), 5);
1447+
1448+
let ts_array = result
1449+
.as_any()
1450+
.downcast_ref::<PrimitiveArray<TimestampMicrosecondType>>()
1451+
.expect("Expected a timestamp array");
1452+
1453+
// All strings with timezone offsets should be null
1454+
assert!(ts_array.is_null(0), "'+05:00' offset should produce null");
1455+
assert!(ts_array.is_null(1), "'-08:00' offset should produce null");
1456+
assert!(ts_array.is_null(2), "'Z' suffix should produce null");
1457+
assert!(
1458+
ts_array.is_null(3),
1459+
"'+00:00' offset with micros should produce null"
1460+
);
1461+
1462+
// The one without offset should parse correctly
1463+
assert!(!ts_array.is_null(4));
1464+
assert_eq!(ts_array.value(4), 1577882096123456);
1465+
}
1466+
14151467
#[test]
14161468
fn test_cast_string_to_timestamp_ntz_via_cast_array() -> DataFusionResult<()> {
14171469
let array: ArrayRef = Arc::new(StringArray::from(vec![
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ConfigMatrix: parquet.enable.dictionary=false,true
19+
20+
-- Test casting string to timestamp_ntz
21+
-- https://github.com/apache/datafusion-comet/issues/3179
22+
23+
statement
24+
CREATE TABLE test_cast_ts_ntz(s string) USING parquet
25+
26+
statement
27+
INSERT INTO test_cast_ts_ntz VALUES ('2020-01-01T12:34:56.123456'), ('2020-01-01'), ('2020-01-01T12:34:56'), ('2020'), ('2020-01'), (NULL), ('not_a_timestamp'), ('2020-01-01T12:34:56+05:00')
28+
29+
-- Cast string to timestamp_ntz: valid formats should parse, invalid should be null
30+
query
31+
SELECT s, cast(s AS timestamp_ntz) FROM test_cast_ts_ntz
32+
33+
-- Verify that timestamp_ntz values are not affected by session timezone
34+
query
35+
SELECT s, cast(s AS timestamp_ntz) FROM test_cast_ts_ntz WHERE s = '2020-01-01T12:34:56.123456'
36+
37+
-- Compare timestamp_ntz vs timestamp (with timezone) to show they differ
38+
query
39+
SELECT s, cast(s AS timestamp_ntz) as ts_ntz, cast(s AS timestamp) as ts FROM test_cast_ts_ntz WHERE s IS NOT NULL AND s != 'not_a_timestamp' AND s != '2020-01-01T12:34:56+05:00'

0 commit comments

Comments
 (0)