@@ -140,39 +140,54 @@ fn ends_with_ignore_ascii_case(haystack: &str, needle: &str) -> bool {
140140
141141/// Transforms a like `pattern` to a regex compatible pattern. To achieve that, it does:
142142///
143- /// 1. Replace like wildcards for regex expressions as the pattern will be evaluated using regex match: `%` => `.*` and `_` => `.`
144- /// 2. Escape regex meta characters to match them and not be evaluated as regex special chars. For example: `.` => `\\.`
145- /// 3. Replace escaped like wildcards removing the escape characters to be able to match it as a regex. For example: `\\%` => `%`
143+ /// 1. Replace `LIKE` multi-character wildcards `%` => `.*` (unless they're at the start or end of the pattern,
144+ /// where the regex is just truncated - e.g. `%foo%` => `foo` rather than `^.*foo.*$`)
145+ /// 2. Replace `LIKE` single-character wildcards `_` => `.`
146+ /// 3. Escape regex meta characters to match them and not be evaluated as regex special chars. e.g. `.` => `\\.`
147+ /// 4. Replace escaped `LIKE` wildcards removing the escape characters to be able to match it as a regex. e.g. `\\%` => `%`
146148fn regex_like ( pattern : & str , case_insensitive : bool ) -> Result < Regex , ArrowError > {
147149 let mut result = String :: with_capacity ( pattern. len ( ) * 2 ) ;
148- result. push ( '^' ) ;
149150 let mut chars_iter = pattern. chars ( ) . peekable ( ) ;
151+ match chars_iter. peek ( ) {
152+ // if the pattern starts with `%`, we avoid starting the regex with a slow but meaningless `^.*`
153+ Some ( '%' ) => {
154+ chars_iter. next ( ) ;
155+ }
156+ _ => result. push ( '^' ) ,
157+ } ;
158+
150159 while let Some ( c) = chars_iter. next ( ) {
151- if c == '\\' {
152- let next = chars_iter. peek ( ) ;
153- match next {
154- Some ( next) if is_like_pattern ( * next) => {
155- result. push ( * next) ;
156- // Skipping the next char as it is already appended
157- chars_iter. next ( ) ;
160+ match c {
161+ '\\' => {
162+ match chars_iter. peek ( ) {
163+ Some ( next) if is_like_pattern ( * next) => {
164+ result. push ( * next) ;
165+ // Skipping the next char as it is already appended
166+ chars_iter. next ( ) ;
167+ }
168+ _ => {
169+ result. push ( '\\' ) ;
170+ result. push ( '\\' ) ;
171+ }
158172 }
159- _ => {
160- result. push ( '\\' ) ;
173+ }
174+ '%' => result. push_str ( ".*" ) ,
175+ '_' => result. push ( '.' ) ,
176+ c => {
177+ if regex_syntax:: is_meta_character ( c) {
161178 result. push ( '\\' ) ;
162179 }
180+ result. push ( c) ;
163181 }
164- } else if regex_syntax:: is_meta_character ( c) {
165- result. push ( '\\' ) ;
166- result. push ( c) ;
167- } else if c == '%' {
168- result. push_str ( ".*" ) ;
169- } else if c == '_' {
170- result. push ( '.' ) ;
171- } else {
172- result. push ( c) ;
173182 }
174183 }
175- result. push ( '$' ) ;
184+ // instead of ending the regex with `.*$` and making it needlessly slow, we just end the regex
185+ if result. ends_with ( ".*" ) {
186+ result. pop ( ) ;
187+ result. pop ( ) ;
188+ } else {
189+ result. push ( '$' ) ;
190+ }
176191 RegexBuilder :: new ( & result)
177192 . case_insensitive ( case_insensitive)
178193 . dot_matches_new_line ( true )
@@ -197,9 +212,25 @@ mod tests {
197212 use super :: * ;
198213
199214 #[ test]
200- fn test_replace_like_wildcards ( ) {
201- let a_eq = "_%" ;
202- let expected = "^..*$" ;
215+ fn test_replace_start_end_percent ( ) {
216+ let a_eq = "%foobar%" ;
217+ let expected = "foobar" ;
218+ let r = regex_like ( a_eq, false ) . unwrap ( ) ;
219+ assert_eq ! ( r. to_string( ) , expected) ;
220+ }
221+
222+ #[ test]
223+ fn test_replace_middle_percent ( ) {
224+ let a_eq = "foo%bar" ;
225+ let expected = "^foo.*bar$" ;
226+ let r = regex_like ( a_eq, false ) . unwrap ( ) ;
227+ assert_eq ! ( r. to_string( ) , expected) ;
228+ }
229+
230+ #[ test]
231+ fn test_replace_underscore ( ) {
232+ let a_eq = "foo_bar" ;
233+ let expected = "^foo.bar$" ;
203234 let r = regex_like ( a_eq, false ) . unwrap ( ) ;
204235 assert_eq ! ( r. to_string( ) , expected) ;
205236 }
0 commit comments