@@ -11,13 +11,15 @@ use vortex_array::ArrayRef;
1111use vortex_array:: Canonical ;
1212use vortex_array:: IntoArray ;
1313use vortex_array:: VortexSessionExecute ;
14+ use vortex_array:: arrays:: BoolArray ;
1415use vortex_array:: arrays:: StructArray ;
1516use vortex_array:: expr:: case_when;
1617use vortex_array:: expr:: case_when_no_else;
1718use vortex_array:: expr:: eq;
1819use vortex_array:: expr:: get_item;
1920use vortex_array:: expr:: gt;
2021use vortex_array:: expr:: lit;
22+ use vortex_array:: expr:: lt;
2123use vortex_array:: expr:: nested_case_when;
2224use vortex_array:: expr:: root;
2325use vortex_array:: session:: ArraySession ;
@@ -39,6 +41,22 @@ fn make_struct_array(size: usize) -> ArrayRef {
3941 . into_array ( )
4042}
4143
44+ /// Array with boolean columns cycling through thirds: `c0[i] = i%3==0`, `c1[i] = i%3==1`.
45+ fn make_fragmented_array ( size : usize ) -> ArrayRef {
46+ StructArray :: from_fields ( & [
47+ (
48+ "c0" ,
49+ BoolArray :: from_iter ( ( 0 ..size) . map ( |i| i % 3 == 0 ) ) . into_array ( ) ,
50+ ) ,
51+ (
52+ "c1" ,
53+ BoolArray :: from_iter ( ( 0 ..size) . map ( |i| i % 3 == 1 ) ) . into_array ( ) ,
54+ ) ,
55+ ] )
56+ . unwrap ( )
57+ . into_array ( )
58+ }
59+
4260/// Benchmark a simple binary CASE WHEN with varying array sizes.
4361#[ divan:: bench( args = [ 1000 , 10000 , 100000 ] ) ]
4462fn case_when_simple ( bencher : Bencher , size : usize ) {
@@ -64,7 +82,7 @@ fn case_when_simple(bencher: Bencher, size: usize) {
6482}
6583
6684/// Benchmark n-ary CASE WHEN with 3 conditions.
67- #[ divan:: bench( args = [ 1000 , 10000 , 100000 ] ) ]
85+ #[ divan:: bench( args = [ 1000 , 10000 ] ) ]
6886fn case_when_nary_3_conditions ( bencher : Bencher , size : usize ) {
6987 let array = make_struct_array ( size) ;
7088
@@ -91,7 +109,7 @@ fn case_when_nary_3_conditions(bencher: Bencher, size: usize) {
91109}
92110
93111/// Benchmark n-ary CASE WHEN with 10 conditions.
94- #[ divan:: bench( args = [ 1000 , 10000 , 100000 ] ) ]
112+ #[ divan:: bench( args = [ 1000 , 10000 ] ) ]
95113fn case_when_nary_10_conditions ( bencher : Bencher , size : usize ) {
96114 let array = make_struct_array ( size) ;
97115
@@ -119,7 +137,7 @@ fn case_when_nary_10_conditions(bencher: Bencher, size: usize) {
119137}
120138
121139/// Benchmark n-ary CASE WHEN with equality conditions (lookup-table style).
122- #[ divan:: bench( args = [ 1000 , 10000 , 100000 ] ) ]
140+ #[ divan:: bench( args = [ 1000 , 10000 ] ) ]
123141fn case_when_nary_equality_lookup ( bencher : Bencher , size : usize ) {
124142 let array = make_struct_array ( size) ;
125143
@@ -185,6 +203,39 @@ fn case_when_all_true(bencher: Bencher, size: usize) {
185203 } ) ;
186204}
187205
206+ /// Benchmark n-ary CASE WHEN where the first branch dominates (~90% of rows).
207+ /// This highlights the early-exit and deferred-merge optimizations: subsequent conditions
208+ /// match no remaining rows and are skipped entirely.
209+ #[ divan:: bench( args = [ 1000 , 10000 ] ) ]
210+ fn case_when_nary_early_dominant ( bencher : Bencher , size : usize ) {
211+ let array = make_struct_array ( size) ;
212+
213+ // CASE WHEN value < 90% THEN 1 WHEN value < 95% THEN 2 WHEN value < 97.5% THEN 3 ELSE 4
214+ let t1 = ( size as i32 * 9 ) / 10 ;
215+ let t2 = ( size as i32 * 19 ) / 20 ;
216+ let t3 = ( size as i32 * 39 ) / 40 ;
217+
218+ let expr = nested_case_when (
219+ vec ! [
220+ ( lt( get_item( "value" , root( ) ) , lit( t1) ) , lit( 1i32 ) ) ,
221+ ( lt( get_item( "value" , root( ) ) , lit( t2) ) , lit( 2i32 ) ) ,
222+ ( lt( get_item( "value" , root( ) ) , lit( t3) ) , lit( 3i32 ) ) ,
223+ ] ,
224+ Some ( lit ( 4i32 ) ) ,
225+ ) ;
226+
227+ bencher
228+ . with_inputs ( || ( & expr, & array) )
229+ . bench_refs ( |( expr, array) | {
230+ let mut ctx = SESSION . create_execution_ctx ( ) ;
231+ array
232+ . apply ( expr)
233+ . unwrap ( )
234+ . execute :: < Canonical > ( & mut ctx)
235+ . unwrap ( )
236+ } ) ;
237+ }
238+
188239/// Benchmark CASE WHEN where all conditions are false.
189240#[ divan:: bench( args = [ 1000 , 10000 , 100000 ] ) ]
190241fn case_when_all_false ( bencher : Bencher , size : usize ) {
@@ -208,3 +259,30 @@ fn case_when_all_false(bencher: Bencher, size: usize) {
208259 . unwrap ( )
209260 } ) ;
210261}
262+
263+ /// Benchmark CASE WHEN cycling through 3 branches per row (triggers merge_row_by_row).
264+ /// Run length = 1; exercises branch 0, branch 1, and the else fallback at every 3rd row.
265+ #[ divan:: bench( args = [ 100 , 1000 ] ) ]
266+ fn case_when_fragmented ( bencher : Bencher , size : usize ) {
267+ let array = make_fragmented_array ( size) ;
268+
269+ // CASE WHEN c0 THEN 0 WHEN c1 THEN 1 ELSE 2 END
270+ let expr = nested_case_when (
271+ vec ! [
272+ ( get_item( "c0" , root( ) ) , lit( 0i32 ) ) ,
273+ ( get_item( "c1" , root( ) ) , lit( 1i32 ) ) ,
274+ ] ,
275+ Some ( lit ( 2i32 ) ) ,
276+ ) ;
277+
278+ bencher
279+ . with_inputs ( || ( & expr, & array) )
280+ . bench_refs ( |( expr, array) | {
281+ let mut ctx = SESSION . create_execution_ctx ( ) ;
282+ array
283+ . apply ( expr)
284+ . unwrap ( )
285+ . execute :: < Canonical > ( & mut ctx)
286+ . unwrap ( )
287+ } ) ;
288+ }
0 commit comments