@@ -5,9 +5,8 @@ use std::mem::MaybeUninit;
55
66use crate :: BitBuffer ;
77use crate :: BitBufferMut ;
8- use crate :: Buffer ;
8+ use crate :: BufferMut ;
99use crate :: ByteBufferMut ;
10- use crate :: trusted_len:: TrustedLenExt ;
1110
1211/// Read up to 8 bytes as a little-endian `u64`, zero-padding the high bytes when fewer than 8 are
1312/// supplied. Using [`u64::from_le_bytes`] keeps the bit-numbering identical on little- and
@@ -188,36 +187,52 @@ pub(super) fn bitwise_binary_op<F: FnMut(u64, u64) -> u64>(
188187 mut op : F ,
189188) -> BitBuffer {
190189 assert_eq ! ( left. len( ) , right. len( ) ) ;
191-
192- // If the buffers are aligned, we can use the fast path.
193- if left. offset ( ) . is_multiple_of ( 8 ) && right. offset ( ) . is_multiple_of ( 8 ) {
194- let left_chunks = left. unaligned_chunks ( ) ;
195- let right_chunks = right. unaligned_chunks ( ) ;
196- if left_chunks. lead_padding ( ) == 0
197- && left_chunks. trailing_padding ( ) == 0
198- && right_chunks. lead_padding ( ) == 0
199- && right_chunks. trailing_padding ( ) == 0
200- {
201- let iter = left_chunks
202- . iter ( )
203- . zip ( right_chunks. iter ( ) )
204- . map ( |( l, r) | op ( l, r) ) ;
205- let iter = unsafe { iter. trusted_len ( ) } ;
206- let result = Buffer :: < u64 > :: from_trusted_len_iter ( iter) . into_byte_buffer ( ) ;
207- return BitBuffer :: new ( result, left. len ( ) ) ;
208- }
190+ let len = left. len ( ) ;
191+ if len == 0 {
192+ return BitBuffer :: empty ( ) ;
209193 }
210194
211- let iter = left
212- . chunks ( )
213- . iter_padded ( )
214- . zip ( right. chunks ( ) . iter_padded ( ) )
215- . map ( |( l, r) | op ( l, r) ) ;
216- let iter = unsafe { iter. trusted_len ( ) } ;
195+ let n_bytes = len. div_ceil ( 8 ) ;
196+ let out = if left. offset ( ) . is_multiple_of ( 8 ) && right. offset ( ) . is_multiple_of ( 8 ) {
197+ // Byte-aligned operands: logical bits map onto physical `u64` words, so read the backing
198+ // bytes straight as words and build the result from a `TrustedLen` iterator.
199+ let l_start = left. offset ( ) / 8 ;
200+ let r_start = right. offset ( ) / 8 ;
201+ let lhs = & left. inner ( ) . as_slice ( ) [ l_start..l_start + n_bytes] ;
202+ let rhs = & right. inner ( ) . as_slice ( ) [ r_start..r_start + n_bytes] ;
217203
218- let result = Buffer :: < u64 > :: from_trusted_len_iter ( iter) . into_byte_buffer ( ) ;
204+ let ( lhs_words, lhs_tail) = lhs. as_chunks :: < 8 > ( ) ;
205+ let ( rhs_words, rhs_tail) = rhs. as_chunks :: < 8 > ( ) ;
206+
207+ let mut out = BufferMut :: < u64 > :: from_trusted_len_iter (
208+ lhs_words
209+ . iter ( )
210+ . zip ( rhs_words)
211+ . map ( |( l, r) | op ( u64:: from_le_bytes ( * l) , u64:: from_le_bytes ( * r) ) ) ,
212+ ) ;
213+ if !lhs_tail. is_empty ( ) {
214+ out. push ( op ( read_u64_le ( lhs_tail) , read_u64_le ( rhs_tail) ) ) ;
215+ }
216+ out
217+ } else {
218+ // Sub-byte offset: `iter_padded` realigns the bits and appends one pad word, so take
219+ // exactly `ceil(len / 64)` words.
220+ let n_words = len. div_ceil ( 64 ) ;
221+ let mut out = BufferMut :: < u64 > :: with_capacity ( n_words) ;
222+ for ( l, r) in left
223+ . chunks ( )
224+ . iter_padded ( )
225+ . zip ( right. chunks ( ) . iter_padded ( ) )
226+ . take ( n_words)
227+ {
228+ out. push ( op ( l, r) ) ;
229+ }
230+ out
231+ } ;
219232
220- BitBuffer :: new ( result, left. len ( ) )
233+ let mut bytes = out. into_byte_buffer ( ) ;
234+ bytes. truncate ( n_bytes) ;
235+ BitBuffer :: new ( bytes. freeze ( ) , len)
221236}
222237
223238#[ cfg( test) ]
@@ -317,6 +332,41 @@ mod tests {
317332 assert_eq ! ( result, bitbuffer![ false , true , true , false ] ) ;
318333 }
319334
335+ /// `bitwise_binary_op` must match a naive per-bit reference for every op, offset and length,
336+ /// independent of the chunked kernels.
337+ #[ rstest]
338+ #[ case:: aligned( 0 , 0 ) ]
339+ #[ case:: byte_aligned( 8 , 16 ) ]
340+ #[ case:: byte_aligned_mismatch( 16 , 0 ) ]
341+ #[ case:: sub_byte( 3 , 3 ) ]
342+ #[ case:: sub_byte_mismatch( 0 , 5 ) ]
343+ fn binary_op_matches_naive ( #[ case] left_offset : usize , #[ case] right_offset : usize ) {
344+ #[ allow( clippy:: cast_possible_truncation) ]
345+ let make = |offset : usize , len : usize , salt : u8 | -> BitBuffer {
346+ let bytes: ByteBufferMut = ( 0 ..( offset + len) . div_ceil ( 8 ) . max ( 1 ) )
347+ . map ( |i| ( i as u8 ) . wrapping_mul ( 31 ) . wrapping_add ( salt) )
348+ . collect ( ) ;
349+ BitBufferMut :: from_buffer ( bytes, offset, len) . freeze ( )
350+ } ;
351+ let ops: [ fn ( u64 , u64 ) -> u64 ; 4 ] =
352+ [ |a, b| a & b, |a, b| a | b, |a, b| a ^ b, |a, b| a & !b] ;
353+
354+ for len in [ 1usize , 5 , 8 , 63 , 64 , 65 , 127 , 128 , 200 , 256 ] {
355+ let left = make ( left_offset, len, 0xC3 ) ;
356+ let right = make ( right_offset, len, 0x5A ) ;
357+ for op in ops {
358+ let got = bitwise_binary_op ( & left, & right, op) ;
359+ let expected: BitBuffer = ( 0 ..len)
360+ . map ( |i| op ( u64:: from ( left. value ( i) ) , u64:: from ( right. value ( i) ) ) & 1 == 1 )
361+ . collect ( ) ;
362+ assert_eq ! (
363+ got, expected,
364+ "loff={left_offset} roff={right_offset} len={len}"
365+ ) ;
366+ }
367+ }
368+ }
369+
320370 /// Regression test for a bug where [`bitwise_unary_op`] produced corrupt results when
321371 /// the [`BitBuffer`]'s underlying byte pointer was not u64-aligned. Slicing a buffer by
322372 /// a non-multiple-of-8 number of bytes can cause this misalignment. The bug only
0 commit comments