Skip to content

Commit 6367c77

Browse files
committed
Revert "fix potential UB in ByteBool constructor"
This reverts commit 655082c.
1 parent 9b08bf2 commit 6367c77

File tree

4 files changed

+21
-163
lines changed

4 files changed

+21
-163
lines changed

encodings/bytebool/public-api.lock

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@ pub fn vortex_bytebool::ByteBool::from_vec<V: core::convert::Into<vortex_array::
1010

1111
pub fn vortex_bytebool::ByteBool::new(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> vortex_bytebool::ByteBoolArray
1212

13-
pub unsafe fn vortex_bytebool::ByteBool::new_unchecked(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> vortex_bytebool::ByteBoolArray
14-
15-
pub fn vortex_bytebool::ByteBool::try_new(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> vortex_error::VortexResult<vortex_bytebool::ByteBoolArray>
16-
1713
impl core::clone::Clone for vortex_bytebool::ByteBool
1814

1915
pub fn vortex_bytebool::ByteBool::clone(&self) -> vortex_bytebool::ByteBool
@@ -80,8 +76,6 @@ pub struct vortex_bytebool::ByteBoolData
8076

8177
impl vortex_bytebool::ByteBoolData
8278

83-
pub fn vortex_bytebool::ByteBoolData::as_slice(&self) -> &[bool]
84-
8579
pub fn vortex_bytebool::ByteBoolData::buffer(&self) -> &vortex_array::buffer::BufferHandle
8680

8781
pub fn vortex_bytebool::ByteBoolData::from_vec<V: core::convert::Into<vortex_array::validity::Validity>>(data: alloc::vec::Vec<bool>, validity: V) -> Self
@@ -92,14 +86,10 @@ pub fn vortex_bytebool::ByteBoolData::len(&self) -> usize
9286

9387
pub fn vortex_bytebool::ByteBoolData::new(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> Self
9488

95-
pub unsafe fn vortex_bytebool::ByteBoolData::new_unchecked(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> Self
96-
97-
pub fn vortex_bytebool::ByteBoolData::try_new(buffer: vortex_array::buffer::BufferHandle, validity: vortex_array::validity::Validity) -> vortex_error::VortexResult<Self>
89+
pub fn vortex_bytebool::ByteBoolData::truthy_bytes(&self) -> &[u8]
9890

9991
pub fn vortex_bytebool::ByteBoolData::validate(buffer: &vortex_array::buffer::BufferHandle, validity: &vortex_array::validity::Validity, dtype: &vortex_array::dtype::DType, len: usize) -> vortex_error::VortexResult<()>
10092

101-
pub fn vortex_bytebool::ByteBoolData::validate_bytes(buffer: &vortex_array::buffer::BufferHandle) -> vortex_error::VortexResult<()>
102-
10393
impl core::clone::Clone for vortex_bytebool::ByteBoolData
10494

10595
pub fn vortex_bytebool::ByteBoolData::clone(&self) -> vortex_bytebool::ByteBoolData

encodings/bytebool/src/array.rs

Lines changed: 13 additions & 146 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,11 @@ use vortex_array::vtable::VTable;
2929
use vortex_array::vtable::ValidityVTable;
3030
use vortex_array::vtable::child_to_validity;
3131
use vortex_array::vtable::validity_to_child;
32+
use vortex_buffer::BitBufferMut;
3233
use vortex_buffer::ByteBuffer;
33-
use vortex_buffer::{BitBuffer, BitBufferMut};
34-
use vortex_error::VortexExpect as _;
3534
use vortex_error::VortexResult;
3635
use vortex_error::vortex_bail;
3736
use vortex_error::vortex_ensure;
38-
use vortex_error::vortex_ensure_eq;
3937
use vortex_error::vortex_panic;
4038
use vortex_session::VortexSession;
4139
use vortex_session::registry::CachedId;
@@ -133,7 +131,7 @@ impl VTable for ByteBool {
133131
}
134132
let buffer = buffers[0].clone();
135133

136-
let data = ByteBoolData::try_new(buffer, validity.clone())?;
134+
let data = ByteBoolData::new(buffer, validity.clone());
137135
let slots = ByteBoolData::make_slots(&validity, len);
138136
Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
139137
}
@@ -200,41 +198,12 @@ impl<T: TypedArrayRef<ByteBool>> ByteBoolArrayExt for T {}
200198
pub struct ByteBool;
201199

202200
impl ByteBool {
203-
/// Construct a [`ByteBoolArray`] from a raw bytes buffer and validity.
204-
///
205-
/// # Panics
206-
///
207-
/// Panics if the provided components do not satisfy the invariants documented in
208-
/// [`ByteBool::new_unchecked`].
209201
pub fn new(buffer: BufferHandle, validity: Validity) -> ByteBoolArray {
210-
Self::try_new(buffer, validity).vortex_expect("ByteBoolArray construction failed")
211-
}
212-
213-
/// Construct a [`ByteBoolArray`] from a raw bytes buffer and validity, returning
214-
/// an error if the provided components do not satisfy the invariants documented
215-
/// in [`ByteBool::new_unchecked`].
216-
pub fn try_new(buffer: BufferHandle, validity: Validity) -> VortexResult<ByteBoolArray> {
217202
let dtype = DType::Bool(validity.nullability());
218-
let len = buffer.len();
219-
let slots = ByteBoolData::make_slots(&validity, len);
220-
let data = ByteBoolData::try_new(buffer, validity)?;
221-
Array::try_from_parts(ArrayParts::new(ByteBool, dtype, len, data).with_slots(slots))
222-
}
223203

224-
/// Construct a [`ByteBoolArray`] without validating the buffer contents.
225-
///
226-
/// # Safety
227-
///
228-
/// Every byte of `buffer` must be `0x00` or `0x01`. Any other byte value is
229-
/// Undefined Behavior because [`ByteBoolData::truthy_bytes`] reinterprets the buffer
230-
/// as `&[bool]`, and a `bool` with any bit pattern other than 0 or 1 is UB.
231-
/// If `validity` is [`Validity::Array`], its length must equal `buffer.len()`.
232-
pub unsafe fn new_unchecked(buffer: BufferHandle, validity: Validity) -> ByteBoolArray {
233-
let dtype = DType::Bool(validity.nullability());
234-
let len = buffer.len();
235-
let slots = ByteBoolData::make_slots(&validity, len);
236-
// SAFETY: caller guarantees every buffer byte is 0 or 1.
237-
let data = unsafe { ByteBoolData::new_unchecked(buffer, validity) };
204+
let slots = ByteBoolData::make_slots(&validity, buffer.len());
205+
let data = ByteBoolData::new(buffer, validity);
206+
let len = data.len();
238207
unsafe {
239208
Array::from_parts_unchecked(
240209
ArrayParts::new(ByteBool, dtype, len, data).with_slots(slots),
@@ -294,81 +263,22 @@ impl ByteBoolData {
294263
Ok(())
295264
}
296265

297-
/// Validate that every byte of `buffer` is `0x00` or `0x01`.
298-
///
299-
/// [`ByteBoolData::truthy_bytes`] transmutes the buffer's bytes to `&[bool]`; any byte
300-
/// other than `0x00` or `0x01` would produce a `bool` with an invalid bit pattern,
301-
/// which is Undefined Behavior per the Rust reference.
302-
///
303-
/// Device-resident buffers are not host-readable and are skipped.
304-
pub fn validate_bytes(buffer: &BufferHandle) -> VortexResult<()> {
305-
let Some(bytes) = buffer.as_host_opt() else {
306-
return Ok(());
307-
};
308-
// Count over a flat `&[u8]` vectorizes to pcmpgtb/pmovmskb on x86 and
309-
// cmhi/addv on aarch64, so the fast path runs at ~16 bytes/cycle.
310-
// See https://godbolt.org/z/z797nT1c8
311-
let invalid = bytes.as_slice().iter().filter(|&&b| b > 1).count();
312-
vortex_ensure_eq!(
313-
invalid,
314-
0,
315-
"ByteBoolArray buffer contains {invalid} bytes that are not 0 or 1",
316-
);
317-
Ok(())
318-
}
319-
320266
fn make_slots(validity: &Validity, len: usize) -> Vec<Option<ArrayRef>> {
321267
vec![validity_to_child(validity, len)]
322268
}
323269

324-
/// Construct [`ByteBoolData`] from a raw bytes buffer and validity.
325-
///
326-
/// # Panics
327-
///
328-
/// Panics if the provided components do not satisfy the invariants documented in
329-
/// [`ByteBoolData::new_unchecked`].
330270
pub fn new(buffer: BufferHandle, validity: Validity) -> Self {
331-
Self::try_new(buffer, validity).vortex_expect("ByteBoolData construction failed")
332-
}
333-
334-
/// Construct [`ByteBoolData`] from a raw bytes buffer and validity, returning an
335-
/// error if the provided components do not satisfy the invariants documented in
336-
/// [`ByteBoolData::new_unchecked`].
337-
pub fn try_new(buffer: BufferHandle, validity: Validity) -> VortexResult<Self> {
338-
Self::check_validity_len(&buffer, &validity)?;
339-
Self::validate_bytes(&buffer)?;
340-
// SAFETY: buffer bytes and validity length validated above.
341-
Ok(unsafe { Self::new_unchecked(buffer, validity) })
342-
}
343-
344-
/// Construct [`ByteBoolData`] without validating the buffer contents.
345-
///
346-
/// # Safety
347-
///
348-
/// Every byte of `buffer` must be `0x00` or `0x01`. Any other byte value is
349-
/// Undefined Behavior because [`ByteBoolData::truthy_bytes`] reinterprets the buffer
350-
/// as `&[bool]`, and a `bool` with any bit pattern other than 0 or 1 is UB.
351-
/// If `validity` is [`Validity::Array`], its length must equal `buffer.len()`.
352-
pub unsafe fn new_unchecked(buffer: BufferHandle, validity: Validity) -> Self {
353-
debug_assert!(
354-
Self::validate_bytes(&buffer).is_ok(),
355-
"ByteBoolData::new_unchecked called with non-boolean bytes",
356-
);
357-
Self::check_validity_len(&buffer, &validity)
358-
.vortex_expect("ByteBoolData::new_unchecked called with mismatched validity length");
359-
Self { buffer }
360-
}
361-
362-
fn check_validity_len(buffer: &BufferHandle, validity: &Validity) -> VortexResult<()> {
363-
if let Some(vlen) = validity.maybe_len() {
364-
vortex_ensure!(
365-
buffer.len() == vlen,
271+
let length = buffer.len();
272+
if let Some(vlen) = validity.maybe_len()
273+
&& length != vlen
274+
{
275+
vortex_panic!(
366276
"Buffer length ({}) does not match validity length ({})",
367-
buffer.len(),
277+
length,
368278
vlen
369279
);
370280
}
371-
Ok(())
281+
Self { buffer }
372282
}
373283

374284
/// Returns the number of elements in the array.
@@ -386,9 +296,7 @@ impl ByteBoolData {
386296
let validity = validity.into();
387297
// SAFETY: we are transmuting a Vec<bool> into a Vec<u8>
388298
let data: Vec<u8> = unsafe { std::mem::transmute(data) };
389-
let buffer = BufferHandle::new_host(ByteBuffer::from(data));
390-
// SAFETY: bytes came from `Vec<bool>`, which guarantees values of 0 or 1.
391-
unsafe { Self::new_unchecked(buffer, validity) }
299+
Self::new(BufferHandle::new_host(ByteBuffer::from(data)), validity)
392300
}
393301

394302
pub fn buffer(&self) -> &BufferHandle {
@@ -489,47 +397,6 @@ mod tests {
489397
}
490398

491399
#[test]
492-
fn try_new_rejects_invalid_byte() {
493-
// `ByteBoolData::as_slice` transmutes the underlying bytes into `&[bool]`.
494-
// A bool with any bit pattern other than 0 or 1 is Undefined Behavior per
495-
// the Rust reference, so `try_new` must reject these buffers.
496-
let raw = ByteBuffer::from(vec![0x02u8, 0x01, 0xFFu8]);
497-
let handle = BufferHandle::new_host(raw);
498-
let err = ByteBool::try_new(handle, Validity::NonNullable).unwrap_err();
499-
assert!(
500-
err.to_string().contains("bytes that are not 0 or 1"),
501-
"unexpected error: {err}",
502-
);
503-
}
504-
505-
#[test]
506-
#[should_panic(expected = "bytes that are not 0 or 1")]
507-
fn new_panics_on_invalid_byte() {
508-
let raw = ByteBuffer::from(vec![0x02u8]);
509-
let handle = BufferHandle::new_host(raw);
510-
drop(ByteBool::new(handle, Validity::NonNullable));
511-
}
512-
513-
#[test]
514-
fn new_unchecked_accepts_valid_bytes() {
515-
let raw = ByteBuffer::from(vec![0x00u8, 0x01, 0x01, 0x00]);
516-
let handle = BufferHandle::new_host(raw);
517-
// SAFETY: all bytes are 0 or 1.
518-
let arr = unsafe { ByteBool::new_unchecked(handle, Validity::NonNullable) };
519-
assert_eq!(arr.len(), 4);
520-
assert_eq!(arr.truthy_bytes(), &[false, true, true, false]);
521-
}
522-
523-
#[test]
524-
#[cfg(debug_assertions)]
525-
#[should_panic(expected = "non-boolean bytes")]
526-
fn new_unchecked_debug_asserts_invalid_bytes() {
527-
let raw = ByteBuffer::from(vec![0x02u8]);
528-
let handle = BufferHandle::new_host(raw);
529-
// SAFETY: intentionally violated to exercise the debug assertion.
530-
drop(unsafe { ByteBool::new_unchecked(handle, Validity::NonNullable) });
531-
}
532-
533400
#[test]
534401
fn test_nullable_bytebool_serde_roundtrip() {
535402
let array = ByteBool::from_option_vec(vec![Some(true), None, Some(false), None]);

encodings/bytebool/src/compute.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,9 @@ impl TakeExecute for ByteBool {
7676
.collect::<ByteBuffer>()
7777
});
7878

79-
// SAFETY:
80-
unsafe {
81-
Ok(Some(
82-
ByteBool::new_unchecked(BufferHandle::new_host(taken), validity).into_array(),
83-
))
84-
}
79+
Ok(Some(
80+
ByteBool::new(BufferHandle::new_host(taken), validity).into_array(),
81+
))
8582
}
8683
}
8784

vortex-buffer/public-api.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,10 @@ impl core::convert::From<&[bool]> for vortex_buffer::BitBufferMut
504504

505505
pub fn vortex_buffer::BitBufferMut::from(value: &[bool]) -> Self
506506

507+
impl core::convert::From<&[u8]> for vortex_buffer::BitBufferMut
508+
509+
pub fn vortex_buffer::BitBufferMut::from(value: &[u8]) -> Self
510+
507511
impl core::convert::From<alloc::vec::Vec<bool>> for vortex_buffer::BitBufferMut
508512

509513
pub fn vortex_buffer::BitBufferMut::from(value: alloc::vec::Vec<bool>) -> Self

0 commit comments

Comments
 (0)