@@ -33,7 +33,7 @@ use regex::Regex;
3333use std:: any:: Any ;
3434use std:: sync:: { Arc , OnceLock } ;
3535
36- #[ derive( Debug ) ]
36+ #[ derive( Debug , PartialEq , Eq , Hash ) ]
3737pub struct RegexpSubstrFunc {
3838 signature : Signature ,
3939}
@@ -318,24 +318,23 @@ fn compile_regex(regex: &str, flags: Option<&str>) -> Result<Regex, ArrowError>
318318 if flags. is_empty ( ) {
319319 regex. to_string ( )
320320 } else {
321- format ! ( "(?{}){}" , flags , regex )
321+ format ! ( "(?{flags }){regex}" )
322322 }
323323 }
324324 } ;
325325
326326 Regex :: new ( & pattern) . map_err ( |_| {
327- ArrowError :: ComputeError ( format ! (
328- "Regular expression did not compile: {}" ,
329- pattern
330- ) )
327+ ArrowError :: ComputeError (
328+ format ! ( "Regular expression did not compile: {pattern}" , ) ,
329+ )
331330 } )
332331}
333332
334333#[ cfg( test) ]
335334mod tests {
336335 use crate :: regex:: regexpsubstr:: { regexp_substr, RegexpSubstrFunc } ;
337336 use arrow:: array:: { Array , ArrayRef , Int64Array , LargeStringArray , StringArray } ;
338- use arrow:: datatypes:: DataType ;
337+ use arrow:: datatypes:: { DataType , Field } ;
339338 use datafusion_common:: ScalarValue ;
340339 use datafusion_expr:: { ScalarFunctionArgs , ScalarUDFImpl } ;
341340 use datafusion_expr_common:: columnar_value:: ColumnarValue ;
@@ -371,14 +370,30 @@ mod tests {
371370 ScalarValue :: LargeUtf8 as fn ( Option < String > ) -> ScalarValue ,
372371 ) ,
373372 ] {
373+ let args_vec = vec ! [
374+ ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
375+ ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
376+ ] ;
377+ let arg_fields = args_vec
378+ . iter ( )
379+ . enumerate ( )
380+ . map ( |( idx, arg) | {
381+ Field :: new ( format ! ( "f_{idx}" ) , arg. data_type ( ) , true ) . into ( )
382+ } )
383+ . collect ( ) ;
374384 let result =
375385 RegexpSubstrFunc :: new ( ) . invoke_with_args ( ScalarFunctionArgs {
376- args : vec ! [
377- ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
378- ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
379- ] ,
386+ args : args_vec,
387+ arg_fields,
380388 number_rows : 1 ,
381- return_type : data_type,
389+ return_field : Arc :: new ( Field :: new (
390+ "f" ,
391+ data_type. clone ( ) ,
392+ true ,
393+ ) ) ,
394+ config_options : Arc :: new (
395+ datafusion_common:: config:: ConfigOptions :: default ( ) ,
396+ ) ,
382397 } ) ;
383398 match result {
384399 Ok ( ColumnarValue :: Scalar (
@@ -422,14 +437,26 @@ mod tests {
422437 ) ,
423438 _ => unreachable ! ( ) ,
424439 } ;
440+ let args_vec = vec ! [
441+ ColumnarValue :: Array ( Arc :: new( array_values) ) ,
442+ ColumnarValue :: Scalar ( regex) ,
443+ ] ;
444+ let arg_fields = args_vec
445+ . iter ( )
446+ . enumerate ( )
447+ . map ( |( idx, arg) | {
448+ Field :: new ( format ! ( "f_{idx}" ) , arg. data_type ( ) , true ) . into ( )
449+ } )
450+ . collect ( ) ;
425451 let result =
426452 RegexpSubstrFunc :: new ( ) . invoke_with_args ( ScalarFunctionArgs {
427- args : vec ! [
428- ColumnarValue :: Array ( Arc :: new( array_values) ) ,
429- ColumnarValue :: Scalar ( regex) ,
430- ] ,
453+ args : args_vec,
454+ arg_fields,
431455 number_rows : 1 ,
432- return_type : data_type,
456+ return_field : Arc :: new ( Field :: new ( "f" , data_type. clone ( ) , true ) ) ,
457+ config_options : Arc :: new (
458+ datafusion_common:: config:: ConfigOptions :: default ( ) ,
459+ ) ,
433460 } ) ;
434461 match result {
435462 Ok ( ColumnarValue :: Array ( array) ) => {
@@ -511,22 +538,34 @@ mod tests {
511538 ScalarValue :: LargeUtf8 as fn ( Option < String > ) -> ScalarValue ,
512539 ) ,
513540 ] {
541+ let args_vec = vec ! [
542+ ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
543+ ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
544+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
545+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
546+ ColumnarValue :: Scalar ( scalar( Some ( flags[ spos] . to_string( ) ) ) ) ,
547+ ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( group_num[ spos] ) ) ) ,
548+ ] ;
549+ let arg_fields = args_vec
550+ . iter ( )
551+ . enumerate ( )
552+ . map ( |( idx, arg) | {
553+ Field :: new ( format ! ( "f_{idx}" ) , arg. data_type ( ) , true ) . into ( )
554+ } )
555+ . collect ( ) ;
514556 let result =
515557 RegexpSubstrFunc :: new ( ) . invoke_with_args ( ScalarFunctionArgs {
516- args : vec ! [
517- ColumnarValue :: Scalar ( scalar( Some ( value. to_string( ) ) ) ) ,
518- ColumnarValue :: Scalar ( scalar( Some ( regex. to_string( ) ) ) ) ,
519- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
520- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some ( 1 ) ) ) ,
521- ColumnarValue :: Scalar ( scalar( Some (
522- flags[ spos] . to_string( ) ,
523- ) ) ) ,
524- ColumnarValue :: Scalar ( ScalarValue :: Int64 ( Some (
525- group_num[ spos] ,
526- ) ) ) ,
527- ] ,
558+ args : args_vec,
559+ arg_fields,
528560 number_rows : 1 ,
529- return_type : data_type,
561+ return_field : Arc :: new ( Field :: new (
562+ "f" ,
563+ data_type. clone ( ) ,
564+ true ,
565+ ) ) ,
566+ config_options : Arc :: new (
567+ datafusion_common:: config:: ConfigOptions :: default ( ) ,
568+ ) ,
530569 } ) ;
531570 match result {
532571 Ok ( ColumnarValue :: Scalar (
0 commit comments