@@ -167,30 +167,44 @@ where
167167 T : NativePType + FastLanesComparable < Bitpacked = U > ,
168168 U : UnsignedPType + BitPacking + BitPackingCompare ,
169169{
170- let mut bits = collect_chunk_masks :: < U > ( array, |bit_width, packed_chunk, lower_matches| {
171- let mut upper_matches = [ 0u64 ; 16 ] ;
172-
173- unsafe {
174- U :: unchecked_unpack_cmp (
175- bit_width,
176- packed_chunk,
177- lower_matches,
178- |value, lower_bound| lower_matches_bound ( lower_bound, value, options. lower_strict ) ,
170+ let mut bits = match ( options. lower_strict , options. upper_strict ) {
171+ ( StrictComparison :: Strict , StrictComparison :: Strict ) => {
172+ collect_between_masks :: < U , T , _ , _ > (
173+ array,
179174 lower,
180- ) ;
181- U :: unchecked_unpack_cmp (
182- bit_width,
183- packed_chunk,
184- & mut upper_matches,
185- |value, upper_bound| upper_matches_bound ( value, upper_bound, options. upper_strict ) ,
186175 upper,
187- ) ;
176+ NativePType :: is_lt,
177+ NativePType :: is_lt,
178+ )
188179 }
189-
190- for ( lower_match, upper_match) in lower_matches. iter_mut ( ) . zip ( upper_matches) {
191- * lower_match &= upper_match;
180+ ( StrictComparison :: Strict , StrictComparison :: NonStrict ) => {
181+ collect_between_masks :: < U , T , _ , _ > (
182+ array,
183+ lower,
184+ upper,
185+ NativePType :: is_lt,
186+ NativePType :: is_le,
187+ )
192188 }
193- } ) ;
189+ ( StrictComparison :: NonStrict , StrictComparison :: Strict ) => {
190+ collect_between_masks :: < U , T , _ , _ > (
191+ array,
192+ lower,
193+ upper,
194+ NativePType :: is_le,
195+ NativePType :: is_lt,
196+ )
197+ }
198+ ( StrictComparison :: NonStrict , StrictComparison :: NonStrict ) => {
199+ collect_between_masks :: < U , T , _ , _ > (
200+ array,
201+ lower,
202+ upper,
203+ NativePType :: is_le,
204+ NativePType :: is_le,
205+ )
206+ }
207+ } ;
194208
195209 if let Some ( patches) = array. patches ( ) {
196210 apply_patch_predicate :: < T > ( & mut bits, & patches, ctx, |patched| {
@@ -206,6 +220,31 @@ where
206220 . into_array ( ) )
207221}
208222
223+ fn collect_between_masks < U , T , LF , UF > (
224+ array : & BitPackedData ,
225+ lower : T ,
226+ upper : T ,
227+ lower_matches : LF ,
228+ upper_matches : UF ,
229+ ) -> BitBufferMut
230+ where
231+ T : NativePType + FastLanesComparable < Bitpacked = U > ,
232+ U : UnsignedPType + BitPacking ,
233+ LF : Fn ( T , T ) -> bool + Copy ,
234+ UF : Fn ( T , T ) -> bool + Copy ,
235+ {
236+ collect_unpacked_chunk_masks :: < U > ( array, |unpacked, chunk_matches| {
237+ fill_between_chunk :: < U , T , LF , UF > (
238+ unpacked,
239+ chunk_matches,
240+ lower,
241+ upper,
242+ lower_matches,
243+ upper_matches,
244+ ) ;
245+ } )
246+ }
247+
209248fn collect_chunk_masks < U > (
210249 array : & BitPackedData ,
211250 mut fill_chunk : impl FnMut ( usize , & [ U ] , & mut [ u64 ; 16 ] ) ,
@@ -245,6 +284,79 @@ where
245284 )
246285}
247286
287+ fn collect_unpacked_chunk_masks < U > (
288+ array : & BitPackedData ,
289+ mut fill_chunk : impl FnMut ( & [ U ; 1024 ] , & mut [ u64 ; 16 ] ) ,
290+ ) -> BitBufferMut
291+ where
292+ U : UnsignedPType + BitPacking ,
293+ {
294+ if array. is_empty ( ) {
295+ return BitBufferMut :: empty ( ) ;
296+ }
297+
298+ let bit_width = array. bit_width ( ) as usize ;
299+ let packed = array. packed_slice :: < U > ( ) ;
300+ let elems_per_chunk = 128 * bit_width / size_of :: < U > ( ) ;
301+ let num_chunks = ( array. offset ( ) as usize + array. len ( ) ) . div_ceil ( 1024 ) ;
302+ let mut output = BufferMut :: < u64 > :: with_capacity ( num_chunks * 16 ) ;
303+
304+ for chunk_idx in 0 ..num_chunks {
305+ let packed_chunk = & packed[ chunk_idx * elems_per_chunk..] [ ..elems_per_chunk] ;
306+ let mut unpacked = [ U :: default ( ) ; 1024 ] ;
307+ let mut chunk_matches = [ 0u64 ; 16 ] ;
308+
309+ unsafe {
310+ U :: unchecked_unpack ( bit_width, packed_chunk, & mut unpacked) ;
311+ }
312+
313+ fill_chunk ( & unpacked, & mut chunk_matches) ;
314+ output. extend_from_slice ( & chunk_matches) ;
315+ }
316+
317+ let total_len = num_chunks * 1024 ;
318+ let mut output = BitBufferMut :: from_buffer ( output. into_byte_buffer ( ) , 0 , total_len) ;
319+
320+ if array. offset ( ) == 0 {
321+ output. truncate ( array. len ( ) ) ;
322+ return output;
323+ }
324+
325+ BitBufferMut :: copy_from (
326+ & output
327+ . freeze ( )
328+ . slice ( array. offset ( ) as usize ..array. offset ( ) as usize + array. len ( ) ) ,
329+ )
330+ }
331+
332+ #[ inline]
333+ fn fill_between_chunk < U , T , LF , UF > (
334+ unpacked : & [ U ; 1024 ] ,
335+ chunk_matches : & mut [ u64 ; 16 ] ,
336+ lower : T ,
337+ upper : T ,
338+ lower_matches : LF ,
339+ upper_matches : UF ,
340+ ) where
341+ T : NativePType + FastLanesComparable < Bitpacked = U > ,
342+ U : UnsignedPType ,
343+ LF : Fn ( T , T ) -> bool ,
344+ UF : Fn ( T , T ) -> bool ,
345+ {
346+ for ( word_idx, word) in chunk_matches. iter_mut ( ) . enumerate ( ) {
347+ let start = word_idx * 64 ;
348+ let mut mask = 0u64 ;
349+
350+ for bit_idx in 0 ..64 {
351+ let value = T :: as_unpacked ( unpacked[ start + bit_idx] ) ;
352+ mask |=
353+ u64:: from ( lower_matches ( lower, value) && upper_matches ( value, upper) ) << bit_idx;
354+ }
355+
356+ * word = mask;
357+ }
358+ }
359+
248360fn apply_patch_predicate < T > (
249361 bits : & mut BitBufferMut ,
250362 patches : & Patches ,
0 commit comments