Skip to content

Commit 8150d5a

Browse files
committed
fix[vortex-array]: don't destroy encodings in filter grandchild
I believe this is a regression. A Filter(Slice(Ree)) is pretty common and would eagerly canonicalize its child preventing execute_parent kernels like RunEnd's FilterKernel from firing. This is an issue with dict encoding too. This commit executes its child one step so that execute_parent kernels may match. Signed-off-by: Alfonso Subiotto Marques <alfonso.subiotto@polarsignals.com>
1 parent ac87fe0 commit 8150d5a

2 files changed

Lines changed: 53 additions & 7 deletions

File tree

encodings/runend/src/compute/filter.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ fn filter_run_end_primitive<R: NativePType + AddAssign + From<bool> + AsPrimitiv
115115
#[cfg(test)]
116116
mod tests {
117117
use vortex_array::IntoArray;
118+
use vortex_array::LEGACY_SESSION;
119+
use vortex_array::VortexSessionExecute;
118120
use vortex_array::arrays::PrimitiveArray;
119121
use vortex_array::assert_arrays_eq;
120122
use vortex_error::VortexResult;
@@ -142,4 +144,43 @@ mod tests {
142144
);
143145
Ok(())
144146
}
147+
148+
/// Regression: Filter(Slice(RunEnd)) must preserve RunEnd after execution.
149+
/// Previously Filter.execute() forced its child to canonical, decoding
150+
/// Slice(RunEnd) → Primitive and destroying run structure. The fix lets
151+
/// Filter unwrap one layer at a time so RunEnd's FilterKernel can fire.
152+
#[test]
153+
fn filter_sliced_run_end_preserves_encoding() -> VortexResult<()> {
154+
// 4 runs of 32 each = 128 rows. Large enough that FilterKernel takes
155+
// the run-preserving path (true_count >= 25).
156+
let values: Vec<i32> = [10, 20, 30, 40]
157+
.iter()
158+
.flat_map(|&v| std::iter::repeat_n(v, 32))
159+
.collect();
160+
let arr = RunEnd::encode(PrimitiveArray::from_iter(values).into_array())?;
161+
162+
// Slice off the first 16 rows. Slice(RunEnd), 112 rows, 4 runs.
163+
let sliced = arr.into_array().slice(16..128)?;
164+
165+
// Keep every other row = 112/2 = 56 rows.
166+
let mask = Mask::from_iter((0..sliced.len()).map(|i| i % 2 == 0));
167+
let filtered = sliced.filter(mask)?;
168+
169+
let mut ctx = LEGACY_SESSION.create_execution_ctx();
170+
let executed = filtered.execute_until::<RunEnd>(&mut ctx)?;
171+
assert_eq!(
172+
executed.encoding_id().as_ref(),
173+
"vortex.runend",
174+
"Filter(Slice(RunEnd)) should preserve RunEnd encoding"
175+
);
176+
177+
let expected: Vec<i32> = std::iter::repeat_n(10, 8)
178+
.chain(std::iter::repeat_n(20, 16))
179+
.chain(std::iter::repeat_n(30, 16))
180+
.chain(std::iter::repeat_n(40, 16))
181+
.collect();
182+
assert_arrays_eq!(executed, PrimitiveArray::from_iter(expected));
183+
184+
Ok(())
185+
}
145186
}

vortex-array/src/arrays/filter/vtable.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ use crate::buffer::BufferHandle;
3737
use crate::dtype::DType;
3838
use crate::executor::ExecutionCtx;
3939
use crate::executor::ExecutionResult;
40-
use crate::require_child;
4140
use crate::scalar::Scalar;
4241
use crate::serde::ArrayChildren;
4342
use crate::validity::Validity;
@@ -154,14 +153,20 @@ impl VTable for Filter {
154153
_ => unreachable!("`execute_filter_fast_paths` handles AllTrue and AllFalse"),
155154
};
156155

157-
let array = require_child!(array, array.child(), CHILD_SLOT => AnyCanonical);
156+
if array.child().is_canonical() {
157+
// If the child is already canonical, apply the filter directly.
158+
// TODO(joe): fix the ownership of AnyCanonical
159+
let child = Canonical::from(array.child().as_::<AnyCanonical>());
160+
return Ok(ExecutionResult::done(
161+
execute_filter(child, &mask_values).into_array(),
162+
));
163+
}
158164

159-
// We rely on the optimization pass that runs prior to this execution for filter pushdown,
160-
// so now we can just execute the filter without worrying.
161-
// TODO(joe): fix the ownership of AnyCanonical
162-
let child = Canonical::from(array.child().as_::<AnyCanonical>());
165+
// Execute one step and re-wrap rather than using require_child!(=> AnyCanonical),
166+
// which would decode past intermediate encodings that have their own FilterKernels.
167+
let child = array.child().clone().execute::<ArrayRef>(ctx)?;
163168
Ok(ExecutionResult::done(
164-
execute_filter(child, &mask_values).into_array(),
169+
child.filter(Mask::Values(mask_values))?,
165170
))
166171
}
167172

0 commit comments

Comments
 (0)