Skip to content

Commit db2e2af

Browse files
authored
Fix: dont do lazy filters on write (#6194)
This should fix the CI failures right now. More detailed discussion that I'll write incoming... Basically we shouldn't be eagerly decompressing validity when filtering it, but for now to make sure things aren't broken this will have to do. Edit: This should fix the break on spiral as well @delta003, as reported by @danking --------- Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 0981ab1 commit db2e2af

6 files changed

Lines changed: 26 additions & 20 deletions

File tree

vortex-array/src/arrays/filter/execute/mod.rs

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,22 +31,15 @@ mod struct_;
3131
mod varbinview;
3232

3333
/// Reconstruct a [`Mask`] from an [`Arc<MaskValues>`].
34-
#[inline]
3534
fn values_to_mask(values: &Arc<MaskValues>) -> Mask {
3635
Mask::Values(values.clone())
3736
}
3837

39-
/// A helper function that lazily filters a [`Validity`] with a selection mask.
40-
///
41-
/// If the validity is a [`Validity::Array`], then this wraps it in a `FilterArray` instead of
42-
/// eagerly filtering the values immediately.
38+
/// A helper function that lazily filters a [`Validity`] with selection mask values.
4339
fn filter_validity(validity: Validity, mask: &Arc<MaskValues>) -> Validity {
44-
match validity {
45-
v @ (Validity::NonNullable | Validity::AllValid | Validity::AllInvalid) => v,
46-
Validity::Array(arr) => {
47-
Validity::Array(FilterArray::new(arr.clone(), values_to_mask(mask)).into_array())
48-
}
49-
}
40+
validity
41+
.filter(&values_to_mask(mask))
42+
.vortex_expect("Somehow unable to wrap filter around a validity array")
5043
}
5144

5245
/// Check for some fast-path execution conditions before calling [`execute_filter`].

vortex-array/src/compute/filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ pub(crate) fn warm_up_vtable() -> usize {
7676
/// not the case.
7777
pub fn filter(array: &dyn Array, mask: &Mask) -> VortexResult<ArrayRef> {
7878
// TODO(connor): Remove this function completely!!!
79-
array.filter(mask.clone())
79+
Ok(array.filter(mask.clone())?.to_canonical()?.into_array())
8080
}
8181

8282
struct Filter;

vortex-array/src/patches.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ use crate::IntoArray;
4040
use crate::ToCanonical;
4141
use crate::arrays::PrimitiveArray;
4242
use crate::compute::cast;
43+
use crate::compute::filter;
4344
use crate::compute::is_sorted;
4445
use crate::compute::take;
4546
use crate::search_sorted::SearchResult;
@@ -626,8 +627,8 @@ impl Patches {
626627
}
627628

628629
// SAFETY: filtering indices/values with same mask maintains their 1:1 relationship
629-
let filtered_indices = self.indices.filter(filter_mask.clone())?;
630-
let filtered_values = self.values.filter(filter_mask)?;
630+
let filtered_indices = filter(&self.indices, &filter_mask)?;
631+
let filtered_values = filter(&self.values, &filter_mask)?;
631632

632633
Ok(Some(Self {
633634
array_len: self.array_len,
@@ -1148,8 +1149,10 @@ fn filter_patches_with_mask<T: IntegerPType>(
11481149
}
11491150

11501151
let new_patch_indices = new_patch_indices.into_array();
1151-
let new_patch_values =
1152-
patch_values.filter(Mask::from_indices(patch_values.len(), new_mask_indices))?;
1152+
let new_patch_values = filter(
1153+
patch_values,
1154+
&Mask::from_indices(patch_values.len(), new_mask_indices),
1155+
)?;
11531156

11541157
Ok(Some(Patches::new(
11551158
true_count,

vortex-array/src/validity.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,17 +181,27 @@ impl Validity {
181181
}
182182
}
183183

184-
/// Keep only the entries for which the mask is true.
184+
/// Lazily filters a [`Validity`] with a selection mask, which keeps only the entries for which
185+
/// the mask is true.
185186
///
186187
/// The result has length equal to the number of true values in mask.
188+
///
189+
/// If the validity is a [`Validity::Array`], then this lazily wraps it in a `FilterArray`
190+
/// instead of eagerly filtering the values immediately.
187191
pub fn filter(&self, mask: &Mask) -> VortexResult<Self> {
188192
// NOTE(ngates): we take the mask as a reference to avoid the caller cloning unnecessarily
189193
// if we happen to be NonNullable, AllValid, or AllInvalid.
190194
match self {
191195
v @ (Validity::NonNullable | Validity::AllValid | Validity::AllInvalid) => {
192196
Ok(v.clone())
193197
}
194-
Validity::Array(arr) => Ok(Validity::Array(arr.filter(mask.clone())?)),
198+
Validity::Array(arr) => Ok(Validity::Array(
199+
arr.filter(mask.clone())?
200+
// TODO(connor): This is wrong!!! We should not be eagerly decompressing the
201+
// validity array.
202+
.to_canonical()?
203+
.into_array(),
204+
)),
195205
}
196206
}
197207

vortex-btrblocks/src/float.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ mod tests {
571571
.display_as(DisplayOptions::MetadataOnly)
572572
.to_string()
573573
.to_lowercase();
574-
assert_eq!(display, "vortex.primitive(f32?, len=96)");
574+
assert_eq!(display, "vortex.sparse(f32?, len=96)");
575575

576576
Ok(())
577577
}

vortex-btrblocks/src/string.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ mod tests {
502502
.display_as(DisplayOptions::MetadataOnly)
503503
.to_string()
504504
.to_lowercase();
505-
assert_eq!(display, "vortex.varbinview(utf8?, len=100)");
505+
assert_eq!(display, "vortex.sparse(utf8?, len=100)");
506506

507507
Ok(())
508508
}

0 commit comments

Comments
 (0)