@@ -29,13 +29,11 @@ use vortex_array::vtable::VTable;
2929use vortex_array:: vtable:: ValidityVTable ;
3030use vortex_array:: vtable:: child_to_validity;
3131use vortex_array:: vtable:: validity_to_child;
32+ use vortex_buffer:: BitBufferMut ;
3233use vortex_buffer:: ByteBuffer ;
33- use vortex_buffer:: { BitBuffer , BitBufferMut } ;
34- use vortex_error:: VortexExpect as _;
3534use vortex_error:: VortexResult ;
3635use vortex_error:: vortex_bail;
3736use vortex_error:: vortex_ensure;
38- use vortex_error:: vortex_ensure_eq;
3937use vortex_error:: vortex_panic;
4038use vortex_session:: VortexSession ;
4139use vortex_session:: registry:: CachedId ;
@@ -133,7 +131,7 @@ impl VTable for ByteBool {
133131 }
134132 let buffer = buffers[ 0 ] . clone ( ) ;
135133
136- let data = ByteBoolData :: try_new ( buffer, validity. clone ( ) ) ? ;
134+ let data = ByteBoolData :: new ( buffer, validity. clone ( ) ) ;
137135 let slots = ByteBoolData :: make_slots ( & validity, len) ;
138136 Ok ( ArrayParts :: new ( self . clone ( ) , dtype. clone ( ) , len, data) . with_slots ( slots) )
139137 }
@@ -200,41 +198,12 @@ impl<T: TypedArrayRef<ByteBool>> ByteBoolArrayExt for T {}
200198pub struct ByteBool ;
201199
202200impl ByteBool {
203- /// Construct a [`ByteBoolArray`] from a raw bytes buffer and validity.
204- ///
205- /// # Panics
206- ///
207- /// Panics if the provided components do not satisfy the invariants documented in
208- /// [`ByteBool::new_unchecked`].
209201 pub fn new ( buffer : BufferHandle , validity : Validity ) -> ByteBoolArray {
210- Self :: try_new ( buffer, validity) . vortex_expect ( "ByteBoolArray construction failed" )
211- }
212-
213- /// Construct a [`ByteBoolArray`] from a raw bytes buffer and validity, returning
214- /// an error if the provided components do not satisfy the invariants documented
215- /// in [`ByteBool::new_unchecked`].
216- pub fn try_new ( buffer : BufferHandle , validity : Validity ) -> VortexResult < ByteBoolArray > {
217202 let dtype = DType :: Bool ( validity. nullability ( ) ) ;
218- let len = buffer. len ( ) ;
219- let slots = ByteBoolData :: make_slots ( & validity, len) ;
220- let data = ByteBoolData :: try_new ( buffer, validity) ?;
221- Array :: try_from_parts ( ArrayParts :: new ( ByteBool , dtype, len, data) . with_slots ( slots) )
222- }
223203
224- /// Construct a [`ByteBoolArray`] without validating the buffer contents.
225- ///
226- /// # Safety
227- ///
228- /// Every byte of `buffer` must be `0x00` or `0x01`. Any other byte value is
229- /// Undefined Behavior because [`ByteBoolData::truthy_bytes`] reinterprets the buffer
230- /// as `&[bool]`, and a `bool` with any bit pattern other than 0 or 1 is UB.
231- /// If `validity` is [`Validity::Array`], its length must equal `buffer.len()`.
232- pub unsafe fn new_unchecked ( buffer : BufferHandle , validity : Validity ) -> ByteBoolArray {
233- let dtype = DType :: Bool ( validity. nullability ( ) ) ;
234- let len = buffer. len ( ) ;
235- let slots = ByteBoolData :: make_slots ( & validity, len) ;
236- // SAFETY: caller guarantees every buffer byte is 0 or 1.
237- let data = unsafe { ByteBoolData :: new_unchecked ( buffer, validity) } ;
204+ let slots = ByteBoolData :: make_slots ( & validity, buffer. len ( ) ) ;
205+ let data = ByteBoolData :: new ( buffer, validity) ;
206+ let len = data. len ( ) ;
238207 unsafe {
239208 Array :: from_parts_unchecked (
240209 ArrayParts :: new ( ByteBool , dtype, len, data) . with_slots ( slots) ,
@@ -294,81 +263,22 @@ impl ByteBoolData {
294263 Ok ( ( ) )
295264 }
296265
297- /// Validate that every byte of `buffer` is `0x00` or `0x01`.
298- ///
299- /// [`ByteBoolData::truthy_bytes`] transmutes the buffer's bytes to `&[bool]`; any byte
300- /// other than `0x00` or `0x01` would produce a `bool` with an invalid bit pattern,
301- /// which is Undefined Behavior per the Rust reference.
302- ///
303- /// Device-resident buffers are not host-readable and are skipped.
304- pub fn validate_bytes ( buffer : & BufferHandle ) -> VortexResult < ( ) > {
305- let Some ( bytes) = buffer. as_host_opt ( ) else {
306- return Ok ( ( ) ) ;
307- } ;
308- // Count over a flat `&[u8]` vectorizes to pcmpgtb/pmovmskb on x86 and
309- // cmhi/addv on aarch64, so the fast path runs at ~16 bytes/cycle.
310- // See https://godbolt.org/z/z797nT1c8
311- let invalid = bytes. as_slice ( ) . iter ( ) . filter ( |& & b| b > 1 ) . count ( ) ;
312- vortex_ensure_eq ! (
313- invalid,
314- 0 ,
315- "ByteBoolArray buffer contains {invalid} bytes that are not 0 or 1" ,
316- ) ;
317- Ok ( ( ) )
318- }
319-
320266 fn make_slots ( validity : & Validity , len : usize ) -> Vec < Option < ArrayRef > > {
321267 vec ! [ validity_to_child( validity, len) ]
322268 }
323269
324- /// Construct [`ByteBoolData`] from a raw bytes buffer and validity.
325- ///
326- /// # Panics
327- ///
328- /// Panics if the provided components do not satisfy the invariants documented in
329- /// [`ByteBoolData::new_unchecked`].
330270 pub fn new ( buffer : BufferHandle , validity : Validity ) -> Self {
331- Self :: try_new ( buffer, validity) . vortex_expect ( "ByteBoolData construction failed" )
332- }
333-
334- /// Construct [`ByteBoolData`] from a raw bytes buffer and validity, returning an
335- /// error if the provided components do not satisfy the invariants documented in
336- /// [`ByteBoolData::new_unchecked`].
337- pub fn try_new ( buffer : BufferHandle , validity : Validity ) -> VortexResult < Self > {
338- Self :: check_validity_len ( & buffer, & validity) ?;
339- Self :: validate_bytes ( & buffer) ?;
340- // SAFETY: buffer bytes and validity length validated above.
341- Ok ( unsafe { Self :: new_unchecked ( buffer, validity) } )
342- }
343-
344- /// Construct [`ByteBoolData`] without validating the buffer contents.
345- ///
346- /// # Safety
347- ///
348- /// Every byte of `buffer` must be `0x00` or `0x01`. Any other byte value is
349- /// Undefined Behavior because [`ByteBoolData::truthy_bytes`] reinterprets the buffer
350- /// as `&[bool]`, and a `bool` with any bit pattern other than 0 or 1 is UB.
351- /// If `validity` is [`Validity::Array`], its length must equal `buffer.len()`.
352- pub unsafe fn new_unchecked ( buffer : BufferHandle , validity : Validity ) -> Self {
353- debug_assert ! (
354- Self :: validate_bytes( & buffer) . is_ok( ) ,
355- "ByteBoolData::new_unchecked called with non-boolean bytes" ,
356- ) ;
357- Self :: check_validity_len ( & buffer, & validity)
358- . vortex_expect ( "ByteBoolData::new_unchecked called with mismatched validity length" ) ;
359- Self { buffer }
360- }
361-
362- fn check_validity_len ( buffer : & BufferHandle , validity : & Validity ) -> VortexResult < ( ) > {
363- if let Some ( vlen) = validity. maybe_len ( ) {
364- vortex_ensure ! (
365- buffer. len( ) == vlen,
271+ let length = buffer. len ( ) ;
272+ if let Some ( vlen) = validity. maybe_len ( )
273+ && length != vlen
274+ {
275+ vortex_panic ! (
366276 "Buffer length ({}) does not match validity length ({})" ,
367- buffer . len ( ) ,
277+ length ,
368278 vlen
369279 ) ;
370280 }
371- Ok ( ( ) )
281+ Self { buffer }
372282 }
373283
374284 /// Returns the number of elements in the array.
@@ -386,9 +296,7 @@ impl ByteBoolData {
386296 let validity = validity. into ( ) ;
387297 // SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
388298 let data: Vec < u8 > = unsafe { std:: mem:: transmute ( data) } ;
389- let buffer = BufferHandle :: new_host ( ByteBuffer :: from ( data) ) ;
390- // SAFETY: bytes came from `Vec<bool>`, which guarantees values of 0 or 1.
391- unsafe { Self :: new_unchecked ( buffer, validity) }
299+ Self :: new ( BufferHandle :: new_host ( ByteBuffer :: from ( data) ) , validity)
392300 }
393301
394302 pub fn buffer ( & self ) -> & BufferHandle {
@@ -489,47 +397,6 @@ mod tests {
489397 }
490398
491399 #[ test]
492- fn try_new_rejects_invalid_byte ( ) {
493- // `ByteBoolData::as_slice` transmutes the underlying bytes into `&[bool]`.
494- // A bool with any bit pattern other than 0 or 1 is Undefined Behavior per
495- // the Rust reference, so `try_new` must reject these buffers.
496- let raw = ByteBuffer :: from ( vec ! [ 0x02u8 , 0x01 , 0xFFu8 ] ) ;
497- let handle = BufferHandle :: new_host ( raw) ;
498- let err = ByteBool :: try_new ( handle, Validity :: NonNullable ) . unwrap_err ( ) ;
499- assert ! (
500- err. to_string( ) . contains( "bytes that are not 0 or 1" ) ,
501- "unexpected error: {err}" ,
502- ) ;
503- }
504-
505- #[ test]
506- #[ should_panic( expected = "bytes that are not 0 or 1" ) ]
507- fn new_panics_on_invalid_byte ( ) {
508- let raw = ByteBuffer :: from ( vec ! [ 0x02u8 ] ) ;
509- let handle = BufferHandle :: new_host ( raw) ;
510- drop ( ByteBool :: new ( handle, Validity :: NonNullable ) ) ;
511- }
512-
513- #[ test]
514- fn new_unchecked_accepts_valid_bytes ( ) {
515- let raw = ByteBuffer :: from ( vec ! [ 0x00u8 , 0x01 , 0x01 , 0x00 ] ) ;
516- let handle = BufferHandle :: new_host ( raw) ;
517- // SAFETY: all bytes are 0 or 1.
518- let arr = unsafe { ByteBool :: new_unchecked ( handle, Validity :: NonNullable ) } ;
519- assert_eq ! ( arr. len( ) , 4 ) ;
520- assert_eq ! ( arr. truthy_bytes( ) , & [ false , true , true , false ] ) ;
521- }
522-
523- #[ test]
524- #[ cfg( debug_assertions) ]
525- #[ should_panic( expected = "non-boolean bytes" ) ]
526- fn new_unchecked_debug_asserts_invalid_bytes ( ) {
527- let raw = ByteBuffer :: from ( vec ! [ 0x02u8 ] ) ;
528- let handle = BufferHandle :: new_host ( raw) ;
529- // SAFETY: intentionally violated to exercise the debug assertion.
530- drop ( unsafe { ByteBool :: new_unchecked ( handle, Validity :: NonNullable ) } ) ;
531- }
532-
533400 #[ test]
534401 fn test_nullable_bytebool_serde_roundtrip ( ) {
535402 let array = ByteBool :: from_option_vec ( vec ! [ Some ( true ) , None , Some ( false ) , None ] ) ;
0 commit comments