@@ -180,11 +180,24 @@ impl FsstMatcher {
180180 symbols : & [ Symbol ] ,
181181 symbol_lengths : & [ u8 ] ,
182182 pattern : & [ u8 ] ,
183+ ) -> VortexResult < Option < Self > > {
184+ Self :: try_new_with ( symbols, symbol_lengths, pattern, false )
185+ }
186+
187+ /// Variant of [`Self::try_new`] that opts in to ASCII case-insensitive
188+ /// matching (SQL `ILIKE`). Letter bytes in the needle then accept
189+ /// either case at every position.
190+ pub fn try_new_with (
191+ symbols : & [ Symbol ] ,
192+ symbol_lengths : & [ u8 ] ,
193+ pattern : & [ u8 ] ,
194+ case_insensitive : bool ,
183195 ) -> VortexResult < Option < Self > > {
184196 let Some ( like_kind) = LikeKind :: parse ( pattern) else {
185197 return Ok ( None ) ;
186198 } ;
187199
200+ let ci = case_insensitive;
188201 let inner = match like_kind {
189202 LikeKind :: Prefix ( b"" ) | LikeKind :: Contains ( b"" ) | LikeKind :: Suffix ( b"" ) => {
190203 MatcherInner :: MatchAll
@@ -193,23 +206,29 @@ impl FsstMatcher {
193206 if prefix. len ( ) > FlatPrefixDfa :: MAX_PREFIX_LEN {
194207 return Ok ( None ) ;
195208 }
196- MatcherInner :: Prefix ( FlatPrefixDfa :: new ( symbols, symbol_lengths, prefix) ?)
209+ MatcherInner :: Prefix ( FlatPrefixDfa :: new ( symbols, symbol_lengths, prefix, ci ) ?)
197210 }
198211 LikeKind :: Suffix ( suffix) => {
199212 if suffix. len ( ) > SuffixMatcher :: MAX_SUFFIX_LEN {
200213 return Ok ( None ) ;
201214 }
202- MatcherInner :: Suffix ( SuffixMatcher :: new ( symbols, symbol_lengths, suffix) ?)
215+ MatcherInner :: Suffix ( SuffixMatcher :: new ( symbols, symbol_lengths, suffix, ci ) ?)
203216 }
204217 LikeKind :: Contains ( needle) => {
205218 if needle. len ( ) <= FoldedContainsDfa :: MAX_NEEDLE_LEN {
206219 MatcherInner :: FoldedContains ( FoldedContainsDfa :: new (
207220 symbols,
208221 symbol_lengths,
209222 needle,
223+ ci,
210224 ) ?)
211225 } else if needle. len ( ) <= FlatContainsDfa :: MAX_NEEDLE_LEN {
212- MatcherInner :: Contains ( FlatContainsDfa :: new ( symbols, symbol_lengths, needle) ?)
226+ MatcherInner :: Contains ( FlatContainsDfa :: new (
227+ symbols,
228+ symbol_lengths,
229+ needle,
230+ ci,
231+ ) ?)
213232 } else {
214233 return Ok ( None ) ;
215234 }
@@ -223,6 +242,7 @@ impl FsstMatcher {
223242 symbols,
224243 symbol_lengths,
225244 & segments,
245+ ci,
226246 ) ?) )
227247 }
228248 } ;
@@ -610,20 +630,57 @@ fn build_fused_table(
610630/// expressed.
611631pub ( super ) const WILDCARD : u8 = b'_' ;
612632
633+ /// ASCII case fold to lowercase. Non-letters pass through.
634+ #[ inline]
635+ fn ascii_to_lower ( b : u8 ) -> u8 {
636+ if b. is_ascii_uppercase ( ) { b + 32 } else { b }
637+ }
638+
613639/// Pattern-position byte equality with wildcard semantics. Returns
614640/// `true` if `a` or `b` is the [`WILDCARD`] byte, or both bytes are
615- /// equal.
641+ /// equal. When `ci` is true, ASCII letter case is ignored.
616642#[ inline]
617- fn pattern_eq ( a : u8 , b : u8 ) -> bool {
618- a == WILDCARD || b == WILDCARD || a == b
643+ fn pattern_eq ( a : u8 , b : u8 , ci : bool ) -> bool {
644+ if a == WILDCARD || b == WILDCARD {
645+ return true ;
646+ }
647+ if ci {
648+ ascii_to_lower ( a) == ascii_to_lower ( b)
649+ } else {
650+ a == b
651+ }
619652}
620653
621654/// Concrete-input byte match against a needle position. The pattern
622655/// position `p` is one of the needle bytes (possibly the wildcard);
623- /// the input byte `b` is always concrete (never a wildcard).
656+ /// the input byte `b` is always concrete (never a wildcard). When `ci`
657+ /// is true, ASCII letter case is ignored.
658+ #[ inline]
659+ #[ expect(
660+ dead_code,
661+ reason = "Reserved for the future correct contains-wildcard DFA."
662+ ) ]
663+ fn pattern_matches_byte ( p : u8 , b : u8 , ci : bool ) -> bool {
664+ if p == WILDCARD {
665+ return true ;
666+ }
667+ if ci {
668+ ascii_to_lower ( p) == ascii_to_lower ( b)
669+ } else {
670+ p == b
671+ }
672+ }
673+
674+ /// For an advancing transition on byte `needle_byte`, set the table
675+ /// row entry. With `ci` true, also set the entry for the case-flipped
676+ /// byte so either case of the same ASCII letter advances.
624677#[ inline]
625- fn pattern_matches_byte ( p : u8 , b : u8 ) -> bool {
626- p == WILDCARD || p == b
678+ fn set_advance ( table : & mut [ u8 ] , row_start : usize , needle_byte : u8 , new_state : u8 , ci : bool ) {
679+ table[ row_start + usize:: from ( needle_byte) ] = new_state;
680+ if ci && needle_byte. is_ascii_alphabetic ( ) {
681+ let flipped = needle_byte ^ 0x20 ;
682+ table[ row_start + usize:: from ( flipped) ] = new_state;
683+ }
627684}
628685
629686/// Build the `(state × byte) → state` KMP transition table.
@@ -641,11 +698,11 @@ fn pattern_matches_byte(p: u8, b: u8) -> bool {
641698///
642699/// This is one 256-byte memcpy + a single override per state, instead
643700/// of running the KMP fallback loop at every cell.
644- fn kmp_byte_transitions ( needle : & [ u8 ] ) -> Vec < u8 > {
701+ fn kmp_byte_transitions ( needle : & [ u8 ] , ci : bool ) -> Vec < u8 > {
645702 let n_states = u8:: try_from ( needle. len ( ) + 1 )
646703 . vortex_expect ( "kmp_byte_transitions: must have needle.len() ≤ 255" ) ;
647704 let accept = n_states - 1 ;
648- let failure = kmp_failure_table ( needle) ;
705+ let failure = kmp_failure_table ( needle, ci ) ;
649706
650707 let mut table = vec ! [ 0u8 ; usize :: from( n_states) * 256 ] ;
651708
@@ -654,7 +711,7 @@ fn kmp_byte_transitions(needle: &[u8]) -> Vec<u8> {
654711 if first == WILDCARD {
655712 table[ 0 ..256 ] . fill ( 1 ) ;
656713 } else {
657- table [ usize :: from ( first ) ] = 1 ;
714+ set_advance ( & mut table , 0 , first , 1 , ci ) ;
658715 }
659716 }
660717
@@ -672,7 +729,7 @@ fn kmp_byte_transitions(needle: &[u8]) -> Vec<u8> {
672729 // Wildcard at position s: every byte advances.
673730 table[ state_row..state_row + 256 ] . fill ( state + 1 ) ;
674731 } else {
675- table[ state_row + usize :: from ( needle[ s] ) ] = state + 1 ;
732+ set_advance ( & mut table, state_row, needle[ s] , state + 1 , ci ) ;
676733 }
677734 }
678735
@@ -685,14 +742,14 @@ fn kmp_byte_transitions(needle: &[u8]) -> Vec<u8> {
685742 table
686743}
687744
688- fn kmp_failure_table ( needle : & [ u8 ] ) -> Vec < u8 > {
745+ fn kmp_failure_table ( needle : & [ u8 ] , ci : bool ) -> Vec < u8 > {
689746 let mut failure = vec ! [ 0u8 ; needle. len( ) ] ;
690747 let mut k = 0u8 ;
691748 for i in 1 ..needle. len ( ) {
692- while k > 0 && !pattern_eq ( needle[ usize:: from ( k) ] , needle[ i] ) {
749+ while k > 0 && !pattern_eq ( needle[ usize:: from ( k) ] , needle[ i] , ci ) {
693750 k = failure[ usize:: from ( k) - 1 ] ;
694751 }
695- if pattern_eq ( needle[ usize:: from ( k) ] , needle[ i] ) {
752+ if pattern_eq ( needle[ usize:: from ( k) ] , needle[ i] , ci ) {
696753 k += 1 ;
697754 }
698755 failure[ i] = k;
0 commit comments