Skip to content

Commit 06cf4a3

Browse files
authored
Fallback from fsst specialised like expression if there are escape characters in the like string (#8038)
"\\" is the escape character but we might need to specialise it in the future to arbitrary symbols fix #8027 Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent 3a6db6c commit 06cf4a3

2 files changed

Lines changed: 17 additions & 0 deletions

File tree

encodings/fsst/src/dfa/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,13 @@ enum LikeKind<'a> {
208208

209209
impl<'a> LikeKind<'a> {
210210
fn parse(pattern: &'a [u8]) -> Option<Self> {
211+
// The fast-path matchers below do not understand SQL LIKE escape sequences (e.g. `\%`
212+
// matching a literal `%`). If the pattern contains a backslash we fall back to the
213+
// general implementation, which correctly interprets escapes.
214+
if pattern.contains(&b'\\') {
215+
return None;
216+
}
217+
211218
// `prefix%` (including just `%` where prefix is empty)
212219
if let Some(prefix) = pattern.strip_suffix(b"%")
213220
&& !prefix.contains(&b'%')

encodings/fsst/src/dfa/tests.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ fn test_like_kind_parse() {
6464
// Suffix and underscore patterns are not supported.
6565
assert!(LikeKind::parse(b"%suffix").is_none());
6666
assert!(LikeKind::parse(b"a_c").is_none());
67+
68+
// Patterns containing the SQL LIKE escape character must not be parsed by the fast path,
69+
// because that path treats `%` and `_` literally and would misinterpret escapes. For
70+
// example, `%\%` (the pattern produced by Spark's `endsWith("%")`) means "ends with `%`",
71+
// not "contains `\`". The fast path should bail so the general implementation handles it.
72+
assert!(LikeKind::parse(br"%\%").is_none());
73+
assert!(LikeKind::parse(br"\%%").is_none());
74+
assert!(LikeKind::parse(br"%\_%").is_none());
75+
assert!(LikeKind::parse(br"\_%").is_none());
76+
assert!(LikeKind::parse(br"%\\%").is_none());
6777
}
6878

6979
/// No symbols — all bytes escaped. Simplest case to see the two tables.

0 commit comments

Comments
 (0)