Skip to content

Commit 5881376

Browse files
committed
fixup
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 173cbda commit 5881376

4 files changed

Lines changed: 106 additions & 6 deletions

File tree

encodings/fsst/src/dfa/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl FsstMatcher {
170170
};
171171

172172
let inner = match like_kind {
173-
LikeKind::Prefix("") => MatcherInner::MatchAll,
173+
LikeKind::Prefix("") | LikeKind::Contains("") => MatcherInner::MatchAll,
174174
LikeKind::Prefix(prefix) => {
175175
let prefix = prefix.as_bytes();
176176
if prefix.len() > FlatPrefixDfa::MAX_PREFIX_LEN {

encodings/fsst/src/dfa/tests.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,36 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::sync::LazyLock;
5+
46
use fsst::ESCAPE_CODE;
57
use fsst::Symbol;
8+
use rstest::rstest;
9+
use vortex_array::Canonical;
10+
use vortex_array::IntoArray;
11+
use vortex_array::VortexSessionExecute;
12+
use vortex_array::arrays::BoolArray;
13+
use vortex_array::arrays::ConstantArray;
14+
use vortex_array::arrays::VarBinArray;
15+
use vortex_array::assert_arrays_eq;
16+
use vortex_array::dtype::DType;
17+
use vortex_array::dtype::Nullability;
18+
use vortex_array::scalar_fn::fns::like::Like;
19+
use vortex_array::scalar_fn::fns::like::LikeOptions;
20+
use vortex_array::session::ArraySession;
621
use vortex_error::VortexResult;
22+
use vortex_session::VortexSession;
723

824
use super::FsstMatcher;
925
use super::LikeKind;
1026
use super::flat_contains::FlatContainsDfa;
1127
use super::prefix::FlatPrefixDfa;
28+
use crate::FSSTArray;
29+
use crate::fsst_compress;
30+
use crate::fsst_train_compressor;
31+
32+
static SESSION: LazyLock<VortexSession> =
33+
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
1234

1335
/// Helper: make a Symbol from a byte string (up to 8 bytes, zero-padded).
1436
fn sym(bytes: &[u8]) -> Symbol {
@@ -182,3 +204,76 @@ fn test_contains_pushdown_rejects_len_255() {
182204
let pattern = format!("%{needle}%");
183205
assert!(FsstMatcher::try_new(&[], &[], &pattern).unwrap().is_none());
184206
}
207+
208+
// ---------------------------------------------------------------------------
209+
// End-to-end edge cases: FSST compress → LIKE → compare booleans
210+
// ---------------------------------------------------------------------------
211+
212+
fn make_fsst(strings: &[Option<&str>]) -> FSSTArray {
213+
let varbin = VarBinArray::from_iter(
214+
strings.iter().copied(),
215+
DType::Utf8(Nullability::NonNullable),
216+
);
217+
let compressor = fsst_train_compressor(&varbin);
218+
fsst_compress(varbin, &compressor)
219+
}
220+
221+
fn run_like(array: FSSTArray, pattern: &str) -> VortexResult<BoolArray> {
222+
use vortex_array::ArrayRef;
223+
use vortex_array::arrays::scalar_fn::ScalarFnArrayExt;
224+
225+
let len = array.len();
226+
let arr: ArrayRef = array.into_array();
227+
let pattern_arr = ConstantArray::new(pattern, len).into_array();
228+
let result = Like
229+
.try_new_array(len, LikeOptions::default(), [arr, pattern_arr])?
230+
.into_array()
231+
.execute::<Canonical>(&mut SESSION.create_execution_ctx())?;
232+
Ok(result.into_bool())
233+
}
234+
235+
#[rstest]
236+
// Empty strings
237+
#[case(&[""], "aaaa%", &[false])]
238+
#[case(&[""], "%aaaa%", &[false])]
239+
#[case(&[""], "%", &[true])]
240+
#[case(&["", "", ""], "%", &[true, true, true])]
241+
// Single-char patterns
242+
#[case(&["a", "b", ""], "a%", &[true, false, false])]
243+
#[case(&["a", "b", ""], "%a%", &[true, false, false])]
244+
// Needle longer than every input string
245+
#[case(&["ab", "abc", ""], "%abcd%", &[false, false, false])]
246+
#[case(&["ab", "abc", ""], "abcd%", &[false, false, false])]
247+
// Exact match (prefix pattern = entire string + %)
248+
#[case(&["abc", "abcd", "ab"], "abc%", &[true, true, false])]
249+
#[case(&["abc", "abcd", "ab"], "%abc%", &[true, true, false])]
250+
// Repeated characters — KMP overlap
251+
#[case(&["aa", "aaa", "aaaa", "aba"], "%aaa%", &[false, true, true, false])]
252+
#[case(&["aab", "aaab", "a"], "aaa%", &[false, true, false])]
253+
// Needle at different positions
254+
#[case(&["xxabcyy", "abcyy", "xxabc", "abc", "xabx"], "%abc%", &[true, true, true, true, false])]
255+
// All identical strings
256+
#[case(&["aaa", "aaa", "aaa"], "%aaa%", &[true, true, true])]
257+
#[case(&["aaa", "aaa", "aaa"], "bbb%", &[false, false, false])]
258+
// Single element arrays
259+
#[case(&["hello"], "hello%", &[true])]
260+
#[case(&["hello"], "hellx%", &[false])]
261+
#[case(&["hello"], "%ello%", &[true])]
262+
#[case(&["hello"], "%ellx%", &[false])]
263+
// Overlapping KMP pattern "abab"
264+
#[case(&["ababab", "abab", "aba", "xababx"], "%abab%", &[true, true, false, true])]
265+
// Prefix that shares chars with rest of string
266+
#[case(&["abab", "abba", "abcd"], "ab%", &[true, true, true])]
267+
#[case(&["abab", "abba", "abcd", "ba"], "ab%", &[true, true, true, false])]
268+
fn test_like_edge_cases(
269+
#[case] strings: &[&str],
270+
#[case] pattern: &str,
271+
#[case] expected: &[bool],
272+
) -> VortexResult<()> {
273+
let opts: Vec<Option<&str>> = strings.iter().map(|s| Some(*s)).collect();
274+
let fsst = make_fsst(&opts);
275+
let result = run_like(fsst, pattern)?;
276+
let expected_arr = BoolArray::from_iter(expected.iter().copied());
277+
assert_arrays_eq!(&result, &expected_arr);
278+
Ok(())
279+
}

fuzz/src/fsst_like.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,15 @@ pub fn run_fsst_like_fuzz(fuzz: FuzzFsstLike) -> VortexFuzzResult<bool> {
133133
let expected_val = expected_bits.value(idx);
134134
let actual_val = actual_bits.value(idx);
135135
if expected_val != actual_val {
136-
return Err(VortexFuzzError::ScalarMismatch(
137-
expected_val.into(),
138-
actual_val.into(),
139-
idx,
136+
return Err(VortexFuzzError::VortexError(
137+
vortex_error::vortex_err!(
138+
"FSST LIKE mismatch at index {idx}:\n \
139+
pattern: {pattern:?}\n \
140+
string: {:?}\n \
141+
expected: {expected_val}\n \
142+
actual: {actual_val}",
143+
&strings[idx],
144+
),
140145
Backtrace::capture(),
141146
));
142147
}

vortex-layout/src/layouts/dict/reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ impl LayoutReader for DictReader {
225225
mask: MaskFuture,
226226
) -> VortexResult<BoxFuture<'static, VortexResult<ArrayRef>>> {
227227
// TODO: fix up expr partitioning with fallible & null sensitive annotations
228-
let values_eval = self.values_eval(root());
228+
let values_eval = self.values_array();
229229
let codes_eval = self
230230
.codes
231231
.projection_evaluation(row_range, &root(), mask)

0 commit comments

Comments
 (0)