Skip to content

Commit ef16508

Browse files
committed
fixup
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk>
1 parent 5881376 commit ef16508

2 files changed

Lines changed: 2 additions & 120 deletions

File tree

encodings/fsst/src/compute/like.rs

Lines changed: 0 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -351,124 +351,4 @@ mod tests {
351351
assert_arrays_eq!(direct.unwrap(), BoolArray::from_iter([true, false, true]));
352352
Ok(())
353353
}
354-
355-
// -----------------------------------------------------------------------
356-
// Fuzz tests: compare FSST kernel against naive string matching
357-
// -----------------------------------------------------------------------
358-
359-
fn random_string(rng: &mut StdRng, max_len: usize) -> String {
360-
let len = rng.random_range(0..=max_len);
361-
// Use a small alphabet to increase substring hit rate.
362-
(0..len)
363-
.map(|_| (b'a' + rng.random_range(0..6u8)) as char)
364-
.collect()
365-
}
366-
367-
fn fuzz_contains(seed: u64, needle_len: usize, n_strings: usize) -> VortexResult<()> {
368-
let mut rng = StdRng::seed_from_u64(seed);
369-
370-
let needle: String = (0..needle_len)
371-
.map(|_| (b'a' + rng.random_range(0..6u8)) as char)
372-
.collect();
373-
374-
let owned: Vec<String> = (0..n_strings)
375-
.map(|_| random_string(&mut rng, 80))
376-
.collect();
377-
let strings: Vec<Option<&str>> = owned.iter().map(|s| Some(s.as_str())).collect();
378-
379-
let expected: Vec<bool> = owned.iter().map(|s| s.contains(&needle)).collect();
380-
381-
let fsst = make_fsst(&strings, Nullability::NonNullable);
382-
let pattern = format!("%{needle}%");
383-
let result = run_like(fsst, &pattern, LikeOptions::default())?;
384-
385-
let got: Vec<bool> = (0..n_strings)
386-
.map(|i| result.to_bit_buffer().value(i))
387-
.collect();
388-
389-
for (i, (e, g)) in expected.iter().zip(got.iter()).enumerate() {
390-
assert_eq!(
391-
e, g,
392-
"mismatch at index {i}: string={:?}, needle={needle:?}, expected={e}, got={g}",
393-
&owned[i],
394-
);
395-
}
396-
Ok(())
397-
}
398-
399-
fn fuzz_prefix(seed: u64, prefix_len: usize, n_strings: usize) -> VortexResult<()> {
400-
let mut rng = StdRng::seed_from_u64(seed);
401-
402-
let prefix: String = (0..prefix_len)
403-
.map(|_| (b'a' + rng.random_range(0..6u8)) as char)
404-
.collect();
405-
406-
let owned: Vec<String> = (0..n_strings)
407-
.map(|_| random_string(&mut rng, 80))
408-
.collect();
409-
let strings: Vec<Option<&str>> = owned.iter().map(|s| Some(s.as_str())).collect();
410-
411-
let expected: Vec<bool> = owned.iter().map(|s| s.starts_with(&prefix)).collect();
412-
413-
let fsst = make_fsst(&strings, Nullability::NonNullable);
414-
let pattern = format!("{prefix}%");
415-
let result = run_like(fsst, &pattern, LikeOptions::default())?;
416-
417-
let got: Vec<bool> = (0..n_strings)
418-
.map(|i| result.to_bit_buffer().value(i))
419-
.collect();
420-
421-
for (i, (e, g)) in expected.iter().zip(got.iter()).enumerate() {
422-
assert_eq!(
423-
e, g,
424-
"mismatch at index {i}: string={:?}, prefix={prefix:?}, expected={e}, got={g}",
425-
&owned[i],
426-
);
427-
}
428-
Ok(())
429-
}
430-
431-
/// Fuzz contains with short needles (1-7 chars) -> BranchlessShiftDfa
432-
#[test]
433-
fn fuzz_contains_short_needle() -> VortexResult<()> {
434-
for seed in 0..50 {
435-
for needle_len in 1..=7 {
436-
fuzz_contains(seed, needle_len, 200)?;
437-
}
438-
}
439-
Ok(())
440-
}
441-
442-
/// Fuzz contains with medium needles (8-14 chars) -> FlatBranchlessDfa
443-
#[test]
444-
fn fuzz_contains_medium_needle() -> VortexResult<()> {
445-
for seed in 0..50 {
446-
for needle_len in [8, 10, 14] {
447-
fuzz_contains(seed, needle_len, 200)?;
448-
}
449-
}
450-
Ok(())
451-
}
452-
453-
/// Fuzz contains with long needles (>14 chars) -> FsstContainsDfa
454-
#[test]
455-
fn fuzz_contains_long_needle() -> VortexResult<()> {
456-
for seed in 0..30 {
457-
for needle_len in [15, 20, 30] {
458-
fuzz_contains(seed, needle_len, 200)?;
459-
}
460-
}
461-
Ok(())
462-
}
463-
464-
/// Fuzz prefix matching
465-
#[test]
466-
fn fuzz_prefix_matching() -> VortexResult<()> {
467-
for seed in 0..50 {
468-
for prefix_len in [1, 3, 5, 10, 13, 20, 40] {
469-
fuzz_prefix(seed, prefix_len, 200)?;
470-
}
471-
}
472-
Ok(())
473-
}
474354
}

encodings/fsst/src/dfa/tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,9 @@ fn run_like(array: FSSTArray, pattern: &str) -> VortexResult<BoolArray> {
237237
#[case(&[""], "aaaa%", &[false])]
238238
#[case(&[""], "%aaaa%", &[false])]
239239
#[case(&[""], "%", &[true])]
240+
#[case(&[""], "%%", &[true])]
240241
#[case(&["", "", ""], "%", &[true, true, true])]
242+
#[case(&["", "abc", ""], "%%", &[true, true, true])]
241243
// Single-char patterns
242244
#[case(&["a", "b", ""], "a%", &[true, false, false])]
243245
#[case(&["a", "b", ""], "%a%", &[true, false, false])]

0 commit comments

Comments
 (0)