Skip to content

Commit d021152

Browse files
authored
fix[file]: read the one-row pruning result in can_prune instead of re… (#8369)
## Summary Closes: #8366 `VortexFile::can_prune` stopped pruning for every `and`/`or` and `eq` predicate after #7575 removed bottom-up constant-folding. This change makes `can_prune` mirror that read-out (execute to `Canonical`, take the row-0 scalar) and adds an end-to-end regression test covering bare, `and`/`or`, and `eq` predicates with non-falsifiable controls. --------- Signed-off-by: Thomas Santerre <thomas@santerre.xyz>
1 parent 0dd63f0 commit d021152

2 files changed

Lines changed: 44 additions & 1 deletion

File tree

vortex-file/src/file.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,13 @@ impl VortexFile {
247247
let mut ctx = self.session.create_execution_ctx();
248248
Ok(match applied.execute::<Columnar>(&mut ctx)? {
249249
Columnar::Constant(s) => s.scalar().as_bool().value() == Some(true),
250-
Columnar::Canonical(_) => false,
250+
Columnar::Canonical(c) => {
251+
c.into_array()
252+
.execute_scalar(0, &mut ctx)?
253+
.as_bool()
254+
.value()
255+
== Some(true)
256+
}
251257
})
252258
}
253259

vortex-file/src/tests.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use vortex_array::dtype::PType::I32;
3535
use vortex_array::dtype::StructFields;
3636
use vortex_array::expr::and;
3737
use vortex_array::expr::cast;
38+
use vortex_array::expr::col;
3839
use vortex_array::expr::eq;
3940
use vortex_array::expr::get_item;
4041
use vortex_array::expr::gt;
@@ -1953,3 +1954,39 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> {
19531954

19541955
Ok(())
19551956
}
1957+
1958+
#[tokio::test]
1959+
#[cfg_attr(miri, ignore)]
1960+
async fn test_can_prune_composite_predicates() -> VortexResult<()> {
1961+
// Regression test for `can_prune` after `ScalarFnConstantRule` was removed
1962+
// (#7575): composite falsification trees no longer constant-fold during
1963+
// execution, so `can_prune` must read the one-row evaluated result instead
1964+
// of requiring a `Columnar::Constant`. `Eq` is affected too: its
1965+
// falsification is internally `or(min > lit, lit > max)`.
1966+
let st = StructArray::from_fields(&[
1967+
("age", buffer![15i32, 18, 22, 25].into_array()),
1968+
("price", buffer![120i32, 130, 140, 150].into_array()),
1969+
])?;
1970+
let mut buf = ByteBufferMut::empty();
1971+
SESSION
1972+
.write_options()
1973+
.write(&mut buf, st.into_array().to_array_stream())
1974+
.await?;
1975+
let file = SESSION.open_options().open_buffer(buf)?;
1976+
1977+
// Bare comparisons: falsified directly by min/max stats.
1978+
assert!(file.can_prune(&gt(col("age"), lit(30)))?);
1979+
assert!(file.can_prune(&lt(col("price"), lit(100)))?);
1980+
1981+
// Composite predicates whose falsifications are boolean trees.
1982+
assert!(file.can_prune(&and(gt(col("age"), lit(30)), lt(col("price"), lit(100))))?);
1983+
assert!(file.can_prune(&or(gt(col("age"), lit(30)), lt(col("age"), lit(10))))?);
1984+
assert!(file.can_prune(&eq(col("age"), lit(5)))?);
1985+
1986+
// Non-falsifiable controls: rows may match, so pruning must refuse.
1987+
assert!(!file.can_prune(&gt(col("age"), lit(20)))?);
1988+
assert!(!file.can_prune(&eq(col("age"), lit(18)))?);
1989+
assert!(!file.can_prune(&and(gt(col("age"), lit(20)), gt(col("price"), lit(100))))?);
1990+
1991+
Ok(())
1992+
}

0 commit comments

Comments
 (0)