@@ -45,9 +45,12 @@ pub fn spark_to_time(args: &[ColumnarValue], fail_on_error: bool) -> Result<Colu
4545 . unwrap_or ( 1 ) ;
4646
4747 let str_arr = args[ 0 ] . clone ( ) . into_array ( num_rows) ?;
48- let str_array = str_arr. as_any ( ) . downcast_ref :: < StringArray > ( ) . ok_or_else ( || {
49- DataFusionError :: Execution ( "to_time: expected String argument" . to_string ( ) )
50- } ) ?;
48+ let str_array = str_arr
49+ . as_any ( )
50+ . downcast_ref :: < StringArray > ( )
51+ . ok_or_else ( || {
52+ DataFusionError :: Execution ( "to_time: expected String argument" . to_string ( ) )
53+ } ) ?;
5154
5255 let len = str_array. len ( ) ;
5356 let mut builder = Time64NanosecondArray :: builder ( len) ;
@@ -120,13 +123,21 @@ fn string_to_time(s: &str) -> Option<i64> {
120123 }
121124 hour
122125 } else {
123- if hour < 1 || hour > 12 {
126+ if ! ( 1 ..= 12 ) . contains ( & hour ) {
124127 return None ;
125128 }
126129 if is_am {
127- if hour == 12 { 0 } else { hour }
130+ if hour == 12 {
131+ 0
132+ } else {
133+ hour
134+ }
128135 } else if is_pm {
129- if hour == 12 { 12 } else { hour + 12 }
136+ if hour == 12 {
137+ 12
138+ } else {
139+ hour + 12
140+ }
130141 } else {
131142 return None ;
132143 }
@@ -148,7 +159,7 @@ fn string_to_time(s: &str) -> Option<i64> {
148159/// Parse time components from a string like "HH:mm:ss.ffffff" or "T HH:mm:ss".
149160/// Returns (hour, minute, second, microseconds) or None if invalid.
150161fn parse_time_components ( s : & str ) -> Option < ( i32 , i32 , i32 , i32 ) > {
151- let bytes = s. trim_start ( ) . as_bytes ( ) ;
162+ let bytes = s. as_bytes ( ) ;
152163 if bytes. is_empty ( ) {
153164 return None ;
154165 }
@@ -223,7 +234,7 @@ fn parse_digits(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
223234
224235 while pos < bytes. len ( ) {
225236 let b = bytes[ pos] ;
226- if b >= b'0' && b <= b'9' {
237+ if b. is_ascii_digit ( ) {
227238 value = value * 10 + ( b - b'0' ) as i32 ;
228239 count += 1 ;
229240 pos += 1 ;
@@ -233,11 +244,7 @@ fn parse_digits(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
233244 }
234245
235246 if count == 0 || count > 2 {
236- // Hour/minute/second: 1-2 digits
237- // Exception: we allow 1-2 digits for time components
238- if count == 0 {
239- return None ;
240- }
247+ return None ;
241248 }
242249
243250 Some ( ( value, pos) )
@@ -252,7 +259,7 @@ fn parse_fractional(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
252259
253260 while pos < bytes. len ( ) && count < 6 {
254261 let b = bytes[ pos] ;
255- if b >= b'0' && b <= b'9' {
262+ if b. is_ascii_digit ( ) {
256263 value = value * 10 + ( b - b'0' ) as i32 ;
257264 count += 1 ;
258265 pos += 1 ;
@@ -266,7 +273,7 @@ fn parse_fractional(bytes: &[u8], start: usize) -> Option<(i32, usize)> {
266273 }
267274
268275 // Skip any remaining digits beyond 6 (truncation)
269- while pos < bytes. len ( ) && bytes[ pos] >= b'0' && bytes [ pos ] <= b'9' {
276+ while pos < bytes. len ( ) && bytes[ pos] . is_ascii_digit ( ) {
270277 pos += 1 ;
271278 }
272279
@@ -292,8 +299,14 @@ mod tests {
292299 fn test_basic_time_parsing ( ) {
293300 // HH:mm
294301 assert_eq ! ( string_to_time( "00:00" ) , Some ( 0 ) ) ;
295- assert_eq ! ( string_to_time( "12:30" ) , Some ( 12 * NANOS_PER_HOUR + 30 * NANOS_PER_MINUTE ) ) ;
296- assert_eq ! ( string_to_time( "23:59" ) , Some ( 23 * NANOS_PER_HOUR + 59 * NANOS_PER_MINUTE ) ) ;
302+ assert_eq ! (
303+ string_to_time( "12:30" ) ,
304+ Some ( 12 * NANOS_PER_HOUR + 30 * NANOS_PER_MINUTE )
305+ ) ;
306+ assert_eq ! (
307+ string_to_time( "23:59" ) ,
308+ Some ( 23 * NANOS_PER_HOUR + 59 * NANOS_PER_MINUTE )
309+ ) ;
297310
298311 // HH:mm:ss
299312 assert_eq ! (
@@ -320,10 +333,7 @@ mod tests {
320333 Some ( 1_000 * NANOS_PER_MICRO )
321334 ) ;
322335 // 6 digits
323- assert_eq ! (
324- string_to_time( "00:00:00.000001" ) ,
325- Some ( 1 * NANOS_PER_MICRO )
326- ) ;
336+ assert_eq ! ( string_to_time( "00:00:00.000001" ) , Some ( 1 * NANOS_PER_MICRO ) ) ;
327337 // Full precision
328338 assert_eq ! (
329339 string_to_time( "23:59:59.999999" ) ,
@@ -439,4 +449,18 @@ mod tests {
439449 assert_eq ! ( string_to_time( "12:30:45 " ) , string_to_time( "12:30:45" ) ) ;
440450 assert_eq ! ( string_to_time( "1:00:00 AM " ) , string_to_time( "1:00:00 AM" ) ) ;
441451 }
452+
453+ #[ test]
454+ fn test_three_digit_components ( ) {
455+ // 3-digit hour/minute/second must be rejected (Spark requires 1-2 digits)
456+ assert_eq ! ( string_to_time( "001:02:03" ) , None ) ;
457+ assert_eq ! ( string_to_time( "12:001:03" ) , None ) ;
458+ assert_eq ! ( string_to_time( "12:02:003" ) , None ) ;
459+ }
460+
461+ #[ test]
462+ fn test_leading_space_with_t_prefix ( ) {
463+ // Leading space before T should be rejected (Spark only right-trims)
464+ assert_eq ! ( string_to_time( " T12:30" ) , None ) ;
465+ }
442466}
0 commit comments