Skip to content

Commit a1a9d77

Browse files
authored
CaseWhen uses forward pass with a remaining mask (#6804)
1 parent 15926c3 commit a1a9d77

5 files changed

Lines changed: 563 additions & 130 deletions

File tree

vortex-array/benches/expr/case_when_bench.rs

Lines changed: 81 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ use vortex_array::ArrayRef;
1111
use vortex_array::Canonical;
1212
use vortex_array::IntoArray;
1313
use vortex_array::VortexSessionExecute;
14+
use vortex_array::arrays::BoolArray;
1415
use vortex_array::arrays::StructArray;
1516
use vortex_array::expr::case_when;
1617
use vortex_array::expr::case_when_no_else;
1718
use vortex_array::expr::eq;
1819
use vortex_array::expr::get_item;
1920
use vortex_array::expr::gt;
2021
use vortex_array::expr::lit;
22+
use vortex_array::expr::lt;
2123
use vortex_array::expr::nested_case_when;
2224
use vortex_array::expr::root;
2325
use vortex_array::session::ArraySession;
@@ -39,6 +41,22 @@ fn make_struct_array(size: usize) -> ArrayRef {
3941
.into_array()
4042
}
4143

44+
/// Array with boolean columns cycling through thirds: `c0[i] = i%3==0`, `c1[i] = i%3==1`.
45+
fn make_fragmented_array(size: usize) -> ArrayRef {
46+
StructArray::from_fields(&[
47+
(
48+
"c0",
49+
BoolArray::from_iter((0..size).map(|i| i % 3 == 0)).into_array(),
50+
),
51+
(
52+
"c1",
53+
BoolArray::from_iter((0..size).map(|i| i % 3 == 1)).into_array(),
54+
),
55+
])
56+
.unwrap()
57+
.into_array()
58+
}
59+
4260
/// Benchmark a simple binary CASE WHEN with varying array sizes.
4361
#[divan::bench(args = [1000, 10000, 100000])]
4462
fn case_when_simple(bencher: Bencher, size: usize) {
@@ -64,7 +82,7 @@ fn case_when_simple(bencher: Bencher, size: usize) {
6482
}
6583

6684
/// Benchmark n-ary CASE WHEN with 3 conditions.
67-
#[divan::bench(args = [1000, 10000, 100000])]
85+
#[divan::bench(args = [1000, 10000])]
6886
fn case_when_nary_3_conditions(bencher: Bencher, size: usize) {
6987
let array = make_struct_array(size);
7088

@@ -91,7 +109,7 @@ fn case_when_nary_3_conditions(bencher: Bencher, size: usize) {
91109
}
92110

93111
/// Benchmark n-ary CASE WHEN with 10 conditions.
94-
#[divan::bench(args = [1000, 10000, 100000])]
112+
#[divan::bench(args = [1000, 10000])]
95113
fn case_when_nary_10_conditions(bencher: Bencher, size: usize) {
96114
let array = make_struct_array(size);
97115

@@ -119,7 +137,7 @@ fn case_when_nary_10_conditions(bencher: Bencher, size: usize) {
119137
}
120138

121139
/// Benchmark n-ary CASE WHEN with equality conditions (lookup-table style).
122-
#[divan::bench(args = [1000, 10000, 100000])]
140+
#[divan::bench(args = [1000, 10000])]
123141
fn case_when_nary_equality_lookup(bencher: Bencher, size: usize) {
124142
let array = make_struct_array(size);
125143

@@ -185,6 +203,39 @@ fn case_when_all_true(bencher: Bencher, size: usize) {
185203
});
186204
}
187205

206+
/// Benchmark n-ary CASE WHEN where the first branch dominates (~90% of rows).
207+
/// This highlights the early-exit and deferred-merge optimizations: subsequent conditions
208+
/// match no remaining rows and are skipped entirely.
209+
#[divan::bench(args = [1000, 10000])]
210+
fn case_when_nary_early_dominant(bencher: Bencher, size: usize) {
211+
let array = make_struct_array(size);
212+
213+
// CASE WHEN value < 90% THEN 1 WHEN value < 95% THEN 2 WHEN value < 97.5% THEN 3 ELSE 4
214+
let t1 = (size as i32 * 9) / 10;
215+
let t2 = (size as i32 * 19) / 20;
216+
let t3 = (size as i32 * 39) / 40;
217+
218+
let expr = nested_case_when(
219+
vec![
220+
(lt(get_item("value", root()), lit(t1)), lit(1i32)),
221+
(lt(get_item("value", root()), lit(t2)), lit(2i32)),
222+
(lt(get_item("value", root()), lit(t3)), lit(3i32)),
223+
],
224+
Some(lit(4i32)),
225+
);
226+
227+
bencher
228+
.with_inputs(|| (&expr, &array))
229+
.bench_refs(|(expr, array)| {
230+
let mut ctx = SESSION.create_execution_ctx();
231+
array
232+
.apply(expr)
233+
.unwrap()
234+
.execute::<Canonical>(&mut ctx)
235+
.unwrap()
236+
});
237+
}
238+
188239
/// Benchmark CASE WHEN where all conditions are false.
189240
#[divan::bench(args = [1000, 10000, 100000])]
190241
fn case_when_all_false(bencher: Bencher, size: usize) {
@@ -208,3 +259,30 @@ fn case_when_all_false(bencher: Bencher, size: usize) {
208259
.unwrap()
209260
});
210261
}
262+
263+
/// Benchmark CASE WHEN cycling through 3 branches per row (triggers merge_row_by_row).
264+
/// Run length = 1; exercises branch 0, branch 1, and the else fallback at every 3rd row.
265+
#[divan::bench(args = [100, 1000])]
266+
fn case_when_fragmented(bencher: Bencher, size: usize) {
267+
let array = make_fragmented_array(size);
268+
269+
// CASE WHEN c0 THEN 0 WHEN c1 THEN 1 ELSE 2 END
270+
let expr = nested_case_when(
271+
vec![
272+
(get_item("c0", root()), lit(0i32)),
273+
(get_item("c1", root()), lit(1i32)),
274+
],
275+
Some(lit(2i32)),
276+
);
277+
278+
bencher
279+
.with_inputs(|| (&expr, &array))
280+
.bench_refs(|(expr, array)| {
281+
let mut ctx = SESSION.create_execution_ctx();
282+
array
283+
.apply(expr)
284+
.unwrap()
285+
.execute::<Canonical>(&mut ctx)
286+
.unwrap()
287+
});
288+
}

0 commit comments

Comments
 (0)