@@ -268,7 +268,14 @@ where
268268 S : StringArrayType < ' a > ,
269269{
270270 let ( regex_scalar, is_regex_scalar) = if is_regex_scalar || regex_array. len ( ) == 1 {
271- ( Some ( regex_array. value ( 0 ) ) , true )
271+ (
272+ if regex_array. is_null ( 0 ) {
273+ None
274+ } else {
275+ Some ( regex_array. value ( 0 ) )
276+ } ,
277+ true ,
278+ )
272279 } else {
273280 ( None , false )
274281 } ;
@@ -300,7 +307,7 @@ where
300307 match ( is_regex_scalar, is_start_scalar, is_flags_scalar) {
301308 ( true , true , true ) => {
302309 let regex = match regex_scalar {
303- None | Some ( "" ) => {
310+ None => {
304311 return Ok ( Arc :: new ( Int64Array :: from ( vec ! [ 0 ; values. len( ) ] ) ) ) ;
305312 }
306313 Some ( regex) => regex,
@@ -317,7 +324,7 @@ where
317324 }
318325 ( true , true , false ) => {
319326 let regex = match regex_scalar {
320- None | Some ( "" ) => {
327+ None => {
321328 return Ok ( Arc :: new ( Int64Array :: from ( vec ! [ 0 ; values. len( ) ] ) ) ) ;
322329 }
323330 Some ( regex) => regex,
@@ -346,7 +353,7 @@ where
346353 }
347354 ( true , false , true ) => {
348355 let regex = match regex_scalar {
349- None | Some ( "" ) => {
356+ None => {
350357 return Ok ( Arc :: new ( Int64Array :: from ( vec ! [ 0 ; values. len( ) ] ) ) ) ;
351358 }
352359 Some ( regex) => regex,
@@ -366,7 +373,7 @@ where
366373 }
367374 ( true , false , false ) => {
368375 let regex = match regex_scalar {
369- None | Some ( "" ) => {
376+ None => {
370377 return Ok ( Arc :: new ( Int64Array :: from ( vec ! [ 0 ; values. len( ) ] ) ) ) ;
371378 }
372379 Some ( regex) => regex,
@@ -411,7 +418,7 @@ where
411418 . zip ( regex_array. iter ( ) )
412419 . map ( |( value, regex) | {
413420 let regex = match regex {
414- None | Some ( "" ) => return Ok ( 0 ) ,
421+ None => return Ok ( 0 ) ,
415422 Some ( regex) => regex,
416423 } ;
417424
@@ -447,7 +454,7 @@ where
447454 izip ! ( values. iter( ) , regex_array. iter( ) , flags_array. iter( ) )
448455 . map ( |( value, regex, flags) | {
449456 let regex = match regex {
450- None | Some ( "" ) => return Ok ( 0 ) ,
457+ None => return Ok ( 0 ) ,
451458 Some ( regex) => regex,
452459 } ;
453460
@@ -481,7 +488,7 @@ where
481488 izip ! ( values. iter( ) , regex_array. iter( ) , start_array. iter( ) )
482489 . map ( |( value, regex, start) | {
483490 let regex = match regex {
484- None | Some ( "" ) => return Ok ( 0 ) ,
491+ None => return Ok ( 0 ) ,
485492 Some ( regex) => regex,
486493 } ;
487494
@@ -531,7 +538,7 @@ where
531538 )
532539 . map ( |( value, regex, start, flags) | {
533540 let regex = match regex {
534- None | Some ( "" ) => return Ok ( 0 ) ,
541+ None => return Ok ( 0 ) ,
535542 Some ( regex) => regex,
536543 } ;
537544
@@ -590,6 +597,7 @@ mod tests {
590597 fn test_regexp_count ( ) {
591598 test_case_sensitive_regexp_count_scalar ( ) ;
592599 test_case_sensitive_regexp_count_scalar_start ( ) ;
600+ test_case_sensitive_regexp_count_scalar_empty_pattern ( ) ;
593601 test_case_insensitive_regexp_count_scalar_flags ( ) ;
594602 test_case_sensitive_regexp_count_start_scalar_complex ( ) ;
595603
@@ -719,6 +727,61 @@ mod tests {
719727 } ) ;
720728 }
721729
730+ fn test_case_sensitive_regexp_count_scalar_empty_pattern ( ) {
731+ let values = [ "abc" , "abc" , "" ] ;
732+ let regex = "" ;
733+ let start = [ 1 , 4 , 1 ] ;
734+ let expected: Vec < i64 > = vec ! [ 4 , 1 , 0 ] ;
735+
736+ izip ! ( values. iter( ) , start. iter( ) )
737+ . enumerate ( )
738+ . for_each ( |( pos, ( & v, & s) ) | {
739+ let expected = expected. get ( pos) . cloned ( ) ;
740+
741+ let v_sv = ScalarValue :: Utf8 ( Some ( v. to_string ( ) ) ) ;
742+ let regex_sv = ScalarValue :: Utf8 ( Some ( regex. to_string ( ) ) ) ;
743+ let start_sv = ScalarValue :: Int64 ( Some ( s) ) ;
744+ let re =
745+ regexp_count_with_scalar_values ( & [ v_sv, regex_sv, start_sv. clone ( ) ] ) ;
746+ match re {
747+ Ok ( ColumnarValue :: Scalar ( ScalarValue :: Int64 ( v) ) ) => {
748+ assert_eq ! (
749+ v, expected,
750+ "regexp_count scalar empty-pattern test failed"
751+ ) ;
752+ }
753+ _ => panic ! ( "Unexpected result" ) ,
754+ }
755+
756+ let v_sv = ScalarValue :: LargeUtf8 ( Some ( v. to_string ( ) ) ) ;
757+ let regex_sv = ScalarValue :: LargeUtf8 ( Some ( regex. to_string ( ) ) ) ;
758+ let re =
759+ regexp_count_with_scalar_values ( & [ v_sv, regex_sv, start_sv. clone ( ) ] ) ;
760+ match re {
761+ Ok ( ColumnarValue :: Scalar ( ScalarValue :: Int64 ( v) ) ) => {
762+ assert_eq ! (
763+ v, expected,
764+ "regexp_count scalar empty-pattern test failed"
765+ ) ;
766+ }
767+ _ => panic ! ( "Unexpected result" ) ,
768+ }
769+
770+ let v_sv = ScalarValue :: Utf8View ( Some ( v. to_string ( ) ) ) ;
771+ let regex_sv = ScalarValue :: Utf8View ( Some ( regex. to_string ( ) ) ) ;
772+ let re = regexp_count_with_scalar_values ( & [ v_sv, regex_sv, start_sv] ) ;
773+ match re {
774+ Ok ( ColumnarValue :: Scalar ( ScalarValue :: Int64 ( v) ) ) => {
775+ assert_eq ! (
776+ v, expected,
777+ "regexp_count scalar empty-pattern test failed"
778+ ) ;
779+ }
780+ _ => panic ! ( "Unexpected result" ) ,
781+ }
782+ } ) ;
783+ }
784+
722785 fn test_case_insensitive_regexp_count_scalar_flags ( ) {
723786 let values = [ "" , "aabca" , "abcabc" , "abcAbcab" , "abcabcabc" ] ;
724787 let regex = "abc" ;
0 commit comments