@@ -25,7 +25,7 @@ use arrow::datatypes::{
2525 i256, is_validate_decimal_precision, DataType , Date32Type , Decimal256Type , Float32Type ,
2626 Float64Type , Int16Type , Int32Type , Int64Type , Int8Type , TimestampMicrosecondType ,
2727} ;
28- use chrono:: { DateTime , NaiveDate , TimeZone , Timelike } ;
28+ use chrono:: { DateTime , NaiveDate , TimeZone , Timelike , Utc } ;
2929use num:: traits:: CheckedNeg ;
3030use num:: { CheckedSub , Integer } ;
3131use regex:: Regex ;
@@ -34,9 +34,13 @@ use std::str::FromStr;
3434use std:: sync:: Arc ;
3535
3636macro_rules! cast_utf8_to_timestamp {
37- ( $array: expr, $eval_mode: expr, $array_type: ty, $cast_method: ident, $tz: expr) => { {
37+ ( $array: expr, $eval_mode: expr, $array_type: ty, $cast_method: ident, $tz: expr, $with_tz : expr ) => { {
3838 let len = $array. len( ) ;
39- let mut cast_array = PrimitiveArray :: <$array_type>:: builder( len) . with_timezone( "UTC" ) ;
39+ let mut cast_array = if let Some ( tz_str) = $with_tz {
40+ PrimitiveArray :: <$array_type>:: builder( len) . with_timezone( tz_str)
41+ } else {
42+ PrimitiveArray :: <$array_type>:: builder( len)
43+ } ;
4044 for i in 0 ..len {
4145 if $array. is_null( i) {
4246 cast_array. append_null( )
@@ -665,16 +669,28 @@ pub(crate) fn cast_string_to_timestamp(
665669 . downcast_ref :: < GenericStringArray < i32 > > ( )
666670 . expect ( "Expected a string array" ) ;
667671
668- let tz = & timezone:: Tz :: from_str ( timezone_str) . unwrap ( ) ;
669-
670672 let cast_array: ArrayRef = match to_type {
671- DataType :: Timestamp ( _, _) => {
673+ DataType :: Timestamp ( _, Some ( _) ) => {
674+ let tz = & timezone:: Tz :: from_str ( timezone_str) . unwrap ( ) ;
672675 cast_utf8_to_timestamp ! (
673676 string_array,
674677 eval_mode,
675678 TimestampMicrosecondType ,
676679 timestamp_parser,
677- tz
680+ tz,
681+ Some ( "UTC" )
682+ )
683+ }
684+ DataType :: Timestamp ( _, None ) => {
685+ // TimestampNTZ: reuse timestamp_parser with Utc (identity timezone),
686+ // but don't set timezone metadata on the output array
687+ cast_utf8_to_timestamp ! (
688+ string_array,
689+ eval_mode,
690+ TimestampMicrosecondType ,
691+ timestamp_parser,
692+ & Utc ,
693+ None :: <& str >
678694 )
679695 }
680696 _ => unreachable ! ( "Invalid data type {:?} in cast from string" , to_type) ,
@@ -1340,7 +1356,8 @@ mod tests {
13401356 eval_mode,
13411357 TimestampMicrosecondType ,
13421358 timestamp_parser,
1343- tz
1359+ tz,
1360+ Some ( "UTC" )
13441361 ) ;
13451362
13461363 assert_eq ! (
@@ -1350,6 +1367,94 @@ mod tests {
13501367 assert_eq ! ( result. len( ) , 4 ) ;
13511368 }
13521369
1370+ #[ test]
1371+ #[ cfg_attr( miri, ignore) ]
1372+ fn test_cast_string_to_timestamp_ntz ( ) {
1373+ let array: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [
1374+ Some ( "2020-01-01T12:34:56.123456" ) ,
1375+ Some ( "not_a_timestamp" ) ,
1376+ Some ( "2020-01-01" ) ,
1377+ ] ) ) ;
1378+
1379+ let string_array = array
1380+ . as_any ( )
1381+ . downcast_ref :: < GenericStringArray < i32 > > ( )
1382+ . expect ( "Expected a string array" ) ;
1383+
1384+ let eval_mode = EvalMode :: Legacy ;
1385+ let result = cast_utf8_to_timestamp ! (
1386+ & string_array,
1387+ eval_mode,
1388+ TimestampMicrosecondType ,
1389+ timestamp_parser,
1390+ & Utc ,
1391+ None :: <& str >
1392+ ) ;
1393+
1394+ // Key assertion: TimestampNTZ should have NO timezone
1395+ assert_eq ! (
1396+ result. data_type( ) ,
1397+ & DataType :: Timestamp ( TimeUnit :: Microsecond , None )
1398+ ) ;
1399+ assert_eq ! ( result. len( ) , 3 ) ;
1400+
1401+ // Verify the actual values are not timezone-converted
1402+ let ts_array = result
1403+ . as_any ( )
1404+ . downcast_ref :: < PrimitiveArray < TimestampMicrosecondType > > ( )
1405+ . expect ( "Expected a timestamp array" ) ;
1406+
1407+ // 2020-01-01T12:34:56.123456 as naive epoch micros
1408+ assert_eq ! ( ts_array. value( 0 ) , 1577882096123456 ) ;
1409+ // "not_a_timestamp" is invalid and should be null
1410+ assert ! ( ts_array. is_null( 1 ) ) ;
1411+ // 2020-01-01 as naive epoch micros (midnight)
1412+ assert_eq ! ( ts_array. value( 2 ) , 1577836800000000 ) ;
1413+ }
1414+
1415+ #[ test]
1416+ fn test_cast_string_to_timestamp_ntz_via_cast_array ( ) -> DataFusionResult < ( ) > {
1417+ let array: ArrayRef = Arc :: new ( StringArray :: from ( vec ! [
1418+ Some ( "2020-01-01T12:34:56.123456" ) ,
1419+ Some ( "T2" ) ,
1420+ ] ) ) ;
1421+
1422+ let timezone = "America/New_York" . to_string ( ) ;
1423+ let cast_options = SparkCastOptions :: new ( EvalMode :: Legacy , & timezone, false ) ;
1424+
1425+ // Cast to Timestamp with timezone
1426+ let result_tz = cast_array (
1427+ Arc :: clone ( & array) ,
1428+ & DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( "UTC" . into ( ) ) ) ,
1429+ & cast_options,
1430+ ) ?;
1431+ assert_eq ! (
1432+ * result_tz. data_type( ) ,
1433+ DataType :: Timestamp ( TimeUnit :: Microsecond , Some ( "UTC" . into( ) ) )
1434+ ) ;
1435+
1436+ // Cast to TimestampNTZ (no timezone)
1437+ let result_ntz = cast_array (
1438+ Arc :: clone ( & array) ,
1439+ & DataType :: Timestamp ( TimeUnit :: Microsecond , None ) ,
1440+ & cast_options,
1441+ ) ?;
1442+ assert_eq ! (
1443+ * result_ntz. data_type( ) ,
1444+ DataType :: Timestamp ( TimeUnit :: Microsecond , None )
1445+ ) ;
1446+
1447+ // The NTZ result should NOT have timezone metadata
1448+ let ntz_array = result_ntz
1449+ . as_any ( )
1450+ . downcast_ref :: < PrimitiveArray < TimestampMicrosecondType > > ( )
1451+ . expect ( "Expected a timestamp array" ) ;
1452+ // 2020-01-01T12:34:56.123456 stored as-is (no timezone conversion)
1453+ assert_eq ! ( ntz_array. value( 0 ) , 1577882096123456 ) ;
1454+
1455+ Ok ( ( ) )
1456+ }
1457+
13531458 #[ test]
13541459 fn test_cast_dict_string_to_timestamp ( ) -> DataFusionResult < ( ) > {
13551460 // prepare input data
0 commit comments