@@ -9,7 +9,7 @@ use std::{cell::Cell, collections::BTreeMap};
99
1010use num_bigint:: BigInt ;
1111use num_traits:: ToPrimitive ;
12- use onig:: { Regex , RegexOptions , Syntax } ;
12+ use onig:: { MatchParam , Regex , RegexOptions , SearchOptions , Syntax } ;
1313
1414use crate :: {
1515 ExprError , ExprResult ,
@@ -366,6 +366,25 @@ fn build_regex(pattern_bytes: Vec<u8>) -> ExprResult<(Regex, String)> {
366366 Ok ( ( re, re_string) )
367367}
368368
369+ /// Run a regex search, treating runtime match errors as no match.
370+ fn regex_search < T : onig:: EncodedChars > (
371+ regex : & Regex ,
372+ chars : T ,
373+ to : usize ,
374+ region : & mut onig:: Region ,
375+ ) -> Option < usize > {
376+ regex
377+ . search_with_param (
378+ chars,
379+ 0 ,
380+ to,
381+ SearchOptions :: SEARCH_OPTION_NONE ,
382+ Some ( region) ,
383+ MatchParam :: default ( ) ,
384+ )
385+ . unwrap_or ( None )
386+ }
387+
369388/// Find matches in the input using the compiled regex
370389fn find_match ( regex : Regex , re_string : String , left_bytes : Vec < u8 > ) -> String {
371390 use onig:: EncodedBytes ;
@@ -380,13 +399,7 @@ fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
380399 // In UTF-8 locale, check if input is valid UTF-8
381400 if let Ok ( left_str) = std:: str:: from_utf8 ( & left_bytes) {
382401 // Valid UTF-8, match as UTF-8
383- let pos = regex. search_with_encoding (
384- left_str,
385- 0 ,
386- left_str. len ( ) ,
387- onig:: SearchOptions :: SEARCH_OPTION_NONE ,
388- Some ( & mut region) ,
389- ) ;
402+ let pos = regex_search ( & regex, left_str, left_str. len ( ) , & mut region) ;
390403
391404 if pos. is_some ( ) {
392405 if regex. captures_len ( ) > 0 {
@@ -421,13 +434,7 @@ fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
421434 . ok ( ) ;
422435
423436 if let Some ( re_ascii) = re_ascii {
424- let pos = re_ascii. search_with_encoding (
425- left_encoded,
426- 0 ,
427- left_bytes. len ( ) ,
428- onig:: SearchOptions :: SEARCH_OPTION_NONE ,
429- Some ( & mut region) ,
430- ) ;
437+ let pos = regex_search ( & re_ascii, left_encoded, left_bytes. len ( ) , & mut region) ;
431438
432439 if pos. is_some ( ) {
433440 if re_ascii. captures_len ( ) > 0 {
@@ -469,13 +476,7 @@ fn find_match(regex: Regex, re_string: String, left_bytes: Vec<u8>) -> String {
469476 UEncoding :: Ascii => {
470477 // In ASCII/C locale, work with bytes directly
471478 let left_encoded = EncodedBytes :: ascii ( & left_bytes) ;
472- let pos = regex. search_with_encoding (
473- left_encoded,
474- 0 ,
475- left_bytes. len ( ) ,
476- onig:: SearchOptions :: SEARCH_OPTION_NONE ,
477- Some ( & mut region) ,
478- ) ;
479+ let pos = regex_search ( & regex, left_encoded, left_bytes. len ( ) , & mut region) ;
479480
480481 if pos. is_some ( ) {
481482 if regex. captures_len ( ) > 0 {
@@ -515,13 +516,7 @@ fn evaluate_match_expression(left_bytes: Vec<u8>, right_bytes: Vec<u8>) -> ExprR
515516 // Try to find the actual capture bytes for ASCII locale
516517 let mut region = onig:: Region :: new ( ) ;
517518 let left_encoded = onig:: EncodedBytes :: ascii ( & left_bytes) ;
518- let pos = regex. search_with_encoding (
519- left_encoded,
520- 0 ,
521- left_bytes. len ( ) ,
522- onig:: SearchOptions :: SEARCH_OPTION_NONE ,
523- Some ( & mut region) ,
524- ) ;
519+ let pos = regex_search ( & regex, left_encoded, left_bytes. len ( ) , & mut region) ;
525520
526521 if pos. is_some ( ) {
527522 if let Some ( ( start, end) ) = region. pos ( 1 ) {
0 commit comments