@@ -8,7 +8,6 @@ use vortex_buffer::BitBuffer;
88use vortex_buffer:: BitBufferMut ;
99use vortex_error:: VortexResult ;
1010use vortex_error:: vortex_ensure;
11- use vortex_mask:: Mask ;
1211
1312use super :: super :: Interleave ;
1413use super :: super :: InterleaveArrayExt ;
@@ -21,62 +20,44 @@ use crate::executor::ExecutionCtx;
2120use crate :: executor:: ExecutionResult ;
2221use crate :: match_each_unsigned_integer_ptype;
2322use crate :: require_child;
24- use crate :: validity:: Validity ;
2523
2624/// Gathers `N` boolean values under unsigned `array_indices` / `row_indices` selectors, scattering
27- /// each selected bit (and its validity) into the output position it routes to.
25+ /// each selected bit into the output position it routes to.
2826pub ( super ) fn execute (
2927 array : Array < Interleave > ,
30- ctx : & mut ExecutionCtx ,
28+ _ctx : & mut ExecutionCtx ,
3129) -> VortexResult < ExecutionResult > {
3230 let num_values = array. num_values ( ) ;
3331
34- // Drive every value and both selectors to canonical encodings so we can operate on raw bits.
32+ // Drive both selectors and every value to canonical encodings so we can operate on raw bits.
3533 let mut array = array;
34+ array = require_child ! ( array, array. array_indices( ) , 0 => Primitive ) ;
35+ array = require_child ! ( array, array. row_indices( ) , 1 => Primitive ) ;
3636 for i in 0 ..num_values {
37- array = require_child ! ( array, array. value( i) , i => Bool ) ;
37+ array = require_child ! ( array, array. value( i) , i + 2 => Bool ) ;
3838 }
39- array = require_child ! ( array, array. array_indices( ) , num_values => Primitive ) ;
40- array = require_child ! ( array, array. row_indices( ) , num_values + 1 => Primitive ) ;
4139
42- let dtype = array. as_ref ( ) . dtype ( ) . clone ( ) ;
43- let len = array. as_ref ( ) . len ( ) ;
44- let nullable = dtype. is_nullable ( ) ;
45-
46- // Materialize each value's bits, and its validity mask only when the output can be null.
40+ // Materialize each value's bits; the selectors gather one bit per output below.
4741 let mut value_bits = Vec :: with_capacity ( num_values) ;
48- let mut value_validity = Vec :: with_capacity ( num_values) ;
4942 for i in 0 ..num_values {
50- let value = array. value ( i) . as_ :: < Bool > ( ) ;
51- let bits = value. to_bit_buffer ( ) ;
52- let validity = nullable
53- . then ( || value. validity ( ) ?. execute_mask ( bits. len ( ) , ctx) )
54- . transpose ( ) ?;
55- value_bits. push ( bits) ;
56- value_validity. push ( validity) ;
43+ value_bits. push ( array. value ( i) . as_ :: < Bool > ( ) . to_bit_buffer ( ) ) ;
5744 }
5845
46+ let validity = array. as_ref ( ) . validity ( ) ?;
47+
5948 // Scatter directly from the typed selector buffers — no intermediate `usize` materialization.
6049 let array_indices = array. array_indices ( ) . as_ :: < Primitive > ( ) ;
6150 let row_indices = array. row_indices ( ) . as_ :: < Primitive > ( ) ;
62- let ( values, validity ) = match_each_unsigned_integer_ptype ! ( array_indices. ptype( ) , |A | {
51+ let values = match_each_unsigned_integer_ptype ! ( array_indices. ptype( ) , |A | {
6352 match_each_unsigned_integer_ptype!( row_indices. ptype( ) , |R | {
6453 gather(
65- len,
66- num_values,
6754 & value_bits,
68- & value_validity,
6955 array_indices. as_slice:: <A >( ) ,
7056 row_indices. as_slice:: <R >( ) ,
71- nullable,
7257 ) ?
7358 } )
7459 } ) ;
7560
76- let validity = match validity {
77- Some ( bits) => Validity :: from ( bits. freeze ( ) ) ,
78- None => Validity :: NonNullable ,
79- } ;
8061 Ok ( ExecutionResult :: done ( BoolArray :: try_new (
8162 values. freeze ( ) ,
8263 validity,
@@ -85,43 +66,70 @@ pub(super) fn execute(
8566
8667/// The scatter, monomorphized on the selector integer widths so each `(array_index, row_index)`
8768/// pair is read straight from its packed buffer.
88- ///
89- /// Output bits (and validity) are produced with [`BitBufferMut::collect_bool`], which packs 64
90- /// results per word: every output bit is written branchlessly, avoiding a per-row `set`/`unset`
91- /// (each of which would bounds-check and branch on the random bit value).
92- #[ allow( clippy:: too_many_arguments) ]
9369fn gather < A : AsPrimitive < usize > , R : AsPrimitive < usize > > (
94- len : usize ,
95- num_values : usize ,
9670 value_bits : & [ BitBuffer ] ,
97- value_validity : & [ Option < Mask > ] ,
9871 branches : & [ A ] ,
9972 rows : & [ R ] ,
100- nullable : bool ,
101- ) -> VortexResult < ( BitBufferMut , Option < BitBufferMut > ) > {
102- // Validate the per-row bounds once up front (returning an error rather than panicking), so the
103- // word-packing passes below are tight branchless loops.
73+ ) -> VortexResult < BitBufferMut > {
74+ let len = validate_selectors ( value_bits, branches, rows) ?;
75+
76+ // SAFETY: `validate_selectors` proved `branches.len() == rows.len() == len`, and for every
77+ // `i < len` that `branches[i] < value_bits.len()` and `rows[i] < value_bits[branches[i]].len()`.
78+ Ok ( unsafe { gather_bits ( len, value_bits, branches, rows) } )
79+ }
80+
81+ /// Validates the per-row selector bounds, returning the output length (`branches.len()`).
82+ ///
83+ /// On success, `rows.len() == branches.len() == len` and, for every `i < len`,
84+ /// `branches[i] < value_bits.len()` and `rows[i] < value_bits[branches[i]].len()` — exactly the
85+ /// preconditions of [`gather_bits`]. Errors (rather than panics) on any out-of-bounds selector.
86+ fn validate_selectors < A : AsPrimitive < usize > , R : AsPrimitive < usize > > (
87+ value_bits : & [ BitBuffer ] ,
88+ branches : & [ A ] ,
89+ rows : & [ R ] ,
90+ ) -> VortexResult < usize > {
91+ // The two selectors are validated to equal length at construction, which is the output length.
92+ let len = branches. len ( ) ;
93+ vortex_ensure ! (
94+ rows. len( ) == len,
95+ "interleave selectors differ in length: array_indices {len}, row_indices {}" ,
96+ rows. len( )
97+ ) ;
98+
10499 for i in 0 ..len {
105100 let branch = branches[ i] . as_ ( ) ;
106- vortex_ensure ! ( branch < num_values, "interleave array index out of bounds" ) ;
101+ vortex_ensure ! (
102+ branch < value_bits. len( ) ,
103+ "interleave array index out of bounds"
104+ ) ;
107105 vortex_ensure ! (
108106 rows[ i] . as_( ) < value_bits[ branch] . len( ) ,
109107 "interleave row index out of bounds"
110108 ) ;
111109 }
112110
113- let values =
114- BitBufferMut :: collect_bool ( len, |i| value_bits[ branches[ i] . as_ ( ) ] . value ( rows[ i] . as_ ( ) ) ) ;
115-
116- // A missing per-value mask means every row of that value is valid; only materialized when the
117- // output can be null.
118- let validity = nullable. then ( || {
119- BitBufferMut :: collect_bool ( len, |i| {
120- value_validity[ branches[ i] . as_ ( ) ]
121- . as_ref ( )
122- . is_none_or ( |mask| mask. value ( rows[ i] . as_ ( ) ) )
123- } )
124- } ) ;
111+ Ok ( len)
112+ }
125113
126- Ok ( ( values, validity) )
114+ /// Gathers one bit per output from `bits[branches[i]]` at position `rows[i]`, packing 64 results per
115+ /// word with [`BitBufferMut::collect_bool`].
116+ ///
117+ /// The bounds-checked `BitBuffer::value` is slower still.
118+ ///
119+ /// # Safety
120+ ///
121+ /// `branches` and `rows` must both contain at least `len` elements. For every `i < len`,
122+ /// `branches[i] < bits.len()` and `rows[i] < bits[branches[i]].len()`.
123+ unsafe fn gather_bits < A : AsPrimitive < usize > , R : AsPrimitive < usize > > (
124+ len : usize ,
125+ bits : & [ BitBuffer ] ,
126+ branches : & [ A ] ,
127+ rows : & [ R ] ,
128+ ) -> BitBufferMut {
129+ // SAFETY: `collect_bool` calls this for `i < len`, and the caller guarantees `branches[i]` and
130+ // `rows[i]` are in bounds for `bits` / the selected buffer.
131+ BitBufferMut :: collect_bool ( len, |i| unsafe {
132+ bits. get_unchecked ( branches. get_unchecked ( i) . as_ ( ) )
133+ . value_unchecked ( rows. get_unchecked ( i) . as_ ( ) )
134+ } )
127135}
0 commit comments