diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index ee3413e1833d..22a1ba7653ac 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -364,6 +364,250 @@ impl BooleanArray { Self::new(values, nulls) } + /// Apply a bitwise operation to this array's values using u64 operations, + /// returning a new [`BooleanArray`]. + /// + /// The null buffer is preserved unchanged. + /// + /// See [`BooleanBuffer::from_bitwise_unary_op`] for details on the operation. + /// + /// # Example + /// + /// ``` + /// # use arrow_array::BooleanArray; + /// let array = BooleanArray::from(vec![true, false, true]); + /// let result = array.bitwise_unary(|x| !x); + /// assert_eq!(result, BooleanArray::from(vec![false, true, false])); + /// ``` + pub fn bitwise_unary(&self, op: F) -> BooleanArray + where + F: FnMut(u64) -> u64, + { + let values = BooleanBuffer::from_bitwise_unary_op( + self.values.values(), + self.values.offset(), + self.values.len(), + op, + ); + BooleanArray::new(values, self.nulls.clone()) + } + + /// Try to apply a bitwise operation to this array's values in place using + /// u64 operations. + /// + /// If the underlying buffer is uniquely owned, the operation is applied + /// in place and `Ok` is returned. If the buffer is shared, `Err(self)` is + /// returned so the caller can fall back to [`bitwise_unary`](Self::bitwise_unary). + /// + /// The null buffer is preserved unchanged. + /// + /// # Example + /// + /// ``` + /// # use arrow_array::BooleanArray; + /// let array = BooleanArray::from(vec![true, false, true]); + /// let result = array.bitwise_unary_mut(|x| !x).unwrap(); + /// assert_eq!(result, BooleanArray::from(vec![false, true, false])); + /// ``` + pub fn bitwise_unary_mut(self, op: F) -> Result + where + F: FnMut(u64) -> u64, + { + self.try_bitwise_unary_in_place(op) + .map_err(|(array, _op)| array) + } + + /// Apply a bitwise operation to this array's values in place if the buffer + /// is uniquely owned, or clone and apply if shared. + /// + /// This is a convenience wrapper around [`bitwise_unary_mut`](Self::bitwise_unary_mut) + /// that falls back to [`bitwise_unary`](Self::bitwise_unary) when the buffer is shared. + /// + /// The null buffer is preserved unchanged. + /// + /// # Example + /// + /// ``` + /// # use arrow_array::BooleanArray; + /// let array = BooleanArray::from(vec![true, false, true]); + /// let result = array.bitwise_unary_mut_or_clone(|x| !x); + /// assert_eq!(result, BooleanArray::from(vec![false, true, false])); + /// ``` + pub fn bitwise_unary_mut_or_clone(self, op: F) -> BooleanArray + where + F: FnMut(u64) -> u64, + { + match self.try_bitwise_unary_in_place(op) { + Ok(array) => array, + Err((array, op)) => array.bitwise_unary(op), + } + } + + /// Try to apply a unary op in place. Returns `op` back on failure so + /// callers can fall back to an allocating path without requiring `F: Clone`. + fn try_bitwise_unary_in_place(self, op: F) -> Result + where + F: FnMut(u64) -> u64, + { + let (values, nulls) = self.into_parts(); + let offset = values.offset(); + let len = values.len(); + let buffer = values.into_inner(); + match buffer.into_mutable() { + Ok(mut buf) => { + bit_util::apply_bitwise_unary_op(buf.as_slice_mut(), offset, len, op); + let values = BooleanBuffer::new(buf.into(), offset, len); + Ok(BooleanArray::new(values, nulls)) + } + Err(buffer) => { + let values = BooleanBuffer::new(buffer, offset, len); + Err((BooleanArray::new(values, nulls), op)) + } + } + } + + /// Apply a bitwise binary operation to this array and `rhs` using u64 + /// operations, returning a new [`BooleanArray`]. + /// + /// Null buffers are unioned: the result is null where either input is null. + /// + /// See [`BooleanBuffer::from_bitwise_binary_op`] for details on the operation. + /// + /// # Panics + /// + /// Panics if `self` and `rhs` have different lengths. + /// + /// # Example + /// + /// ``` + /// # use arrow_array::BooleanArray; + /// let a = BooleanArray::from(vec![true, false, true, true]); + /// let b = BooleanArray::from(vec![true, true, false, true]); + /// let result = a.bitwise_bin_op(&b, |a, b| a & b); + /// assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); + /// ``` + pub fn bitwise_bin_op(&self, rhs: &BooleanArray, op: F) -> BooleanArray + where + F: FnMut(u64, u64) -> u64, + { + assert_eq!(self.len(), rhs.len()); + let nulls = NullBuffer::union(self.nulls(), rhs.nulls()); + let values = BooleanBuffer::from_bitwise_binary_op( + self.values.values(), + self.values.offset(), + rhs.values.values(), + rhs.values.offset(), + self.values.len(), + op, + ); + BooleanArray::new(values, nulls) + } + + /// Try to apply a bitwise binary operation to this array and `rhs` in + /// place using u64 operations. + /// + /// If this array's underlying buffer is uniquely owned, the operation is + /// applied in place and `Ok` is returned. If the buffer is shared, + /// `Err(self)` is returned so the caller can fall back to + /// [`bitwise_bin_op`](Self::bitwise_bin_op). + /// + /// Null buffers are unioned: the result is null where either input is null. + /// + /// # Panics + /// + /// Panics if `self` and `rhs` have different lengths. + /// + /// # Example + /// + /// ``` + /// # use arrow_array::BooleanArray; + /// let a = BooleanArray::from(vec![true, false, true, true]); + /// let b = BooleanArray::from(vec![true, true, false, true]); + /// let result = a.bitwise_bin_op_mut(&b, |a, b| a & b).unwrap(); + /// assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); + /// ``` + pub fn bitwise_bin_op_mut( + self, + rhs: &BooleanArray, + op: F, + ) -> Result + where + F: FnMut(u64, u64) -> u64, + { + self.try_bitwise_bin_op_in_place(rhs, op) + .map_err(|(array, _op)| array) + } + + /// Apply a bitwise binary operation to this array and `rhs` in place if the + /// buffer is uniquely owned, or clone and apply if shared. + /// + /// This is a convenience wrapper around [`bitwise_bin_op_mut`](Self::bitwise_bin_op_mut) + /// that falls back to [`bitwise_bin_op`](Self::bitwise_bin_op) when the buffer is shared. + /// + /// Null buffers are unioned: the result is null where either input is null. + /// + /// # Panics + /// + /// Panics if `self` and `rhs` have different lengths. + /// + /// # Example + /// + /// ``` + /// # use arrow_array::BooleanArray; + /// let a = BooleanArray::from(vec![true, false, true, true]); + /// let b = BooleanArray::from(vec![true, true, false, true]); + /// let result = a.bitwise_bin_op_mut_or_clone(&b, |a, b| a & b); + /// assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); + /// ``` + pub fn bitwise_bin_op_mut_or_clone(self, rhs: &BooleanArray, op: F) -> BooleanArray + where + F: FnMut(u64, u64) -> u64, + { + match self.try_bitwise_bin_op_in_place(rhs, op) { + Ok(array) => array, + Err((array, op)) => array.bitwise_bin_op(rhs, op), + } + } + + /// Try to apply a binary op in place. Returns `op` back on failure so + /// callers can fall back to an allocating path without requiring `F: Clone`. + fn try_bitwise_bin_op_in_place( + self, + rhs: &BooleanArray, + op: F, + ) -> Result + where + F: FnMut(u64, u64) -> u64, + { + assert_eq!(self.len(), rhs.len()); + let (values, nulls) = self.into_parts(); + let offset = values.offset(); + let len = values.len(); + let buffer = values.into_inner(); + match buffer.into_mutable() { + Ok(mut buf) => { + bit_util::apply_bitwise_binary_op( + buf.as_slice_mut(), + offset, + rhs.values.inner(), + rhs.values.offset(), + len, + op, + ); + // Defer null union to the success path so the Err path returns + // self's original nulls, avoiding a redundant union in callers + // that fall back to bitwise_bin_op. + let nulls = NullBuffer::union(nulls.as_ref(), rhs.nulls()); + let values = BooleanBuffer::new(buf.into(), offset, len); + Ok(BooleanArray::new(values, nulls)) + } + Err(buffer) => { + let values = BooleanBuffer::new(buffer, offset, len); + Err((BooleanArray::new(values, nulls), op)) + } + } + } + /// Deconstruct this array into its constituent parts pub fn into_parts(self) -> (BooleanBuffer, Option) { (self.values, self.nulls) @@ -643,6 +887,41 @@ impl From for BooleanArray { #[cfg(test)] mod tests { use super::*; + + // Captures the values-buffer identity for a BooleanArray so tests can assert + // whether an operation reused the original allocation or produced a new one. + struct PointerInfo { + ptr: *const u8, + offset: usize, + len: usize, + } + + impl PointerInfo { + // Record the current values buffer pointer plus bit offset/length. The + // offset/length checks ensure a logically equivalent slice wasn't rebuilt + // with a different view over the same allocation. + fn new(array: &BooleanArray) -> Self { + Self { + ptr: array.values().inner().as_ptr(), + offset: array.values().offset(), + len: array.values().len(), + } + } + + // Assert that the array still points at the exact same values buffer and + // preserves the same bit view. + fn assert_same(&self, array: &BooleanArray) { + assert_eq!(array.values().inner().as_ptr(), self.ptr); + assert_eq!(array.values().offset(), self.offset); + assert_eq!(array.values().len(), self.len); + } + + // Assert that the array now points at a different values allocation, + // indicating the operation fell back to an allocating path. + fn assert_different(&self, array: &BooleanArray) { + assert_ne!(array.values().inner().as_ptr(), self.ptr); + } + } use arrow_buffer::Buffer; use rand::{Rng, rng}; @@ -1062,4 +1341,293 @@ mod tests { assert_eq!(arr.has_false(), expected_has_false, "len={len}"); } } + + #[test] + fn test_bitwise_unary_not() { + let arr = BooleanArray::from(vec![true, false, true, false]); + let result = arr.bitwise_unary(|x| !x); + let expected = BooleanArray::from(vec![false, true, false, true]); + assert_eq!(result, expected); + } + + #[test] + fn test_bitwise_unary_preserves_nulls() { + let arr = BooleanArray::from(vec![Some(true), None, Some(false), Some(true)]); + let result = arr.bitwise_unary(|x| !x); + + assert_eq!(result.null_count(), 1); + assert!(result.is_null(1)); + assert!(!result.value(0)); + assert!(result.value(2)); + assert!(!result.value(3)); + } + + #[test] + fn test_bitwise_unary_mut_unshared() { + let arr = BooleanArray::from(vec![true, false, true, false]); + let info = PointerInfo::new(&arr); + let result = arr.bitwise_unary_mut(|x| !x).unwrap(); + let expected = BooleanArray::from(vec![false, true, false, true]); + assert_eq!(result, expected); + info.assert_same(&result); + } + + #[test] + fn test_bitwise_unary_mut_shared() { + let arr = BooleanArray::from(vec![true, false, true, false]); + let info = PointerInfo::new(&arr); + let _shared = arr.clone(); + let result = arr.bitwise_unary_mut(|x| !x); + assert!(result.is_err()); + + let returned = result.unwrap_err(); + assert_eq!(returned, BooleanArray::from(vec![true, false, true, false])); + info.assert_same(&returned); + } + + #[test] + fn test_bitwise_unary_mut_with_nulls() { + let arr = BooleanArray::from(vec![Some(true), None, Some(false)]); + let result = arr.bitwise_unary_mut(|x| !x).unwrap(); + + assert_eq!(result.null_count(), 1); + assert!(result.is_null(1)); + assert!(!result.value(0)); + assert!(result.value(2)); + } + + #[test] + fn test_bitwise_unary_mut_or_clone_shared() { + let arr = BooleanArray::from(vec![true, false, true]); + let info = PointerInfo::new(&arr); + let _shared = arr.clone(); + let result = arr.bitwise_unary_mut_or_clone(|x| !x); + assert_eq!(result, BooleanArray::from(vec![false, true, false])); + info.assert_different(&result); + } + + #[test] + fn test_bitwise_unary_mut_or_clone_unshared() { + // Covers the uniquely-owned fast path in bitwise_unary_mut_or_clone. + let arr = BooleanArray::from(vec![true, false, true]); + let info = PointerInfo::new(&arr); + let result = arr.bitwise_unary_mut_or_clone(|x| !x); + assert_eq!(result, BooleanArray::from(vec![false, true, false])); + info.assert_same(&result); + } + + #[test] + fn test_bitwise_bin_op_and() { + let a = BooleanArray::from(vec![true, false, true, true]); + let b = BooleanArray::from(vec![true, true, false, true]); + let result = a.bitwise_bin_op(&b, |a, b| a & b); + assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); + } + + #[test] + fn test_bitwise_bin_op_or() { + let a = BooleanArray::from(vec![true, false, true, false]); + let b = BooleanArray::from(vec![false, true, false, false]); + let result = a.bitwise_bin_op(&b, |a, b| a | b); + assert_eq!(result, BooleanArray::from(vec![true, true, true, false])); + } + + #[test] + fn test_bitwise_bin_op_null_union() { + let a = BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]); + let b = BooleanArray::from(vec![Some(true), Some(true), None, Some(true)]); + let result = a.bitwise_bin_op(&b, |a, b| a & b); + + assert_eq!(result.null_count(), 2); + assert!(result.is_null(1)); + assert!(result.is_null(2)); + assert!(result.value(0)); + assert!(!result.value(3)); + } + + #[test] + fn test_bitwise_bin_op_one_nullable() { + let a = BooleanArray::from(vec![Some(true), None, Some(true)]); + let b = BooleanArray::from(vec![false, true, true]); + let result = a.bitwise_bin_op(&b, |a, b| a & b); + + assert_eq!(result.null_count(), 1); + assert!(result.is_null(1)); + assert!(!result.value(0)); + assert!(result.value(2)); + } + + #[test] + fn test_bitwise_bin_op_no_nulls() { + let a = BooleanArray::from(vec![true, false, true]); + let b = BooleanArray::from(vec![false, true, true]); + let result = a.bitwise_bin_op(&b, |a, b| a | b); + + assert!(result.nulls().is_none()); + assert_eq!(result, BooleanArray::from(vec![true, true, true])); + } + + #[test] + fn test_bitwise_bin_op_mut_unshared() { + let a = BooleanArray::from(vec![true, false, true, true]); + let info = PointerInfo::new(&a); + let b = BooleanArray::from(vec![true, true, false, true]); + let result = a.bitwise_bin_op_mut(&b, |a, b| a & b).unwrap(); + assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); + info.assert_same(&result); + } + + #[test] + fn test_bitwise_bin_op_mut_shared() { + let a = BooleanArray::from(vec![true, false, true, true]); + let info = PointerInfo::new(&a); + let _shared = a.clone(); + let result = a.bitwise_bin_op_mut( + &BooleanArray::from(vec![true, true, false, true]), + |a, b| a & b, + ); + assert!(result.is_err()); + let returned = result.unwrap_err(); + info.assert_same(&returned); + } + + #[test] + fn test_bitwise_bin_op_mut_with_nulls() { + let a = BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]); + let b = BooleanArray::from(vec![Some(true), Some(true), None, Some(true)]); + let result = a.bitwise_bin_op_mut(&b, |a, b| a & b).unwrap(); + + assert_eq!(result.null_count(), 2); + assert!(result.is_null(1)); + assert!(result.is_null(2)); + assert!(result.value(0)); + assert!(!result.value(3)); + } + + #[test] + fn test_bitwise_bin_op_mut_or_clone_shared() { + let a = BooleanArray::from(vec![true, false, true, true]); + let info = PointerInfo::new(&a); + let _shared = a.clone(); + let b = BooleanArray::from(vec![true, true, false, true]); + let result = a.bitwise_bin_op_mut_or_clone(&b, |a, b| a & b); + assert_eq!(result, BooleanArray::from(vec![true, false, false, true])); + info.assert_different(&result); + } + + #[test] + fn test_bitwise_bin_op_mut_or_clone_shared_with_nulls() { + // When the buffer is shared, _mut_or_clone falls back to bitwise_bin_op. + // The null union must only be applied once, not double-applied. + let a = BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]); + let info = PointerInfo::new(&a); + let _shared = a.clone(); + let b = BooleanArray::from(vec![Some(true), Some(true), None, Some(true)]); + + let expected = a.bitwise_bin_op(&b, |a, b| a & b); + let result = a.bitwise_bin_op_mut_or_clone(&b, |a, b| a & b); + + assert_eq!(result, expected); + assert_eq!(result.null_count(), 2); + assert!(result.is_null(1)); + assert!(result.is_null(2)); + info.assert_different(&result); + } + + #[test] + fn test_bitwise_bin_op_mut_or_clone_unshared_with_nulls() { + // Covers the uniquely-owned fast path in bitwise_bin_op_mut_or_clone, + // including null union on the in-place path. + let a = BooleanArray::from(vec![Some(true), None, Some(true), Some(false)]); + let info = PointerInfo::new(&a); + let b = BooleanArray::from(vec![Some(true), Some(true), None, Some(true)]); + let result = a.bitwise_bin_op_mut_or_clone(&b, |a, b| a & b); + + assert_eq!(result.null_count(), 2); + assert!(result.is_null(1)); + assert!(result.is_null(2)); + assert!(result.value(0)); + assert!(!result.value(3)); + info.assert_same(&result); + } + + #[test] + fn test_bitwise_unary_empty() { + let arr = BooleanArray::from(Vec::::new()); + let result = arr.bitwise_unary(|x| !x); + assert_eq!(result.len(), 0); + } + + #[test] + fn test_bitwise_bin_op_empty() { + let a = BooleanArray::from(Vec::::new()); + let b = BooleanArray::from(Vec::::new()); + let result = a.bitwise_bin_op(&b, |a, b| a & b); + assert_eq!(result.len(), 0); + } + + #[test] + fn test_bitwise_unary_sliced() { + // Slicing creates a non-zero offset into the underlying buffer. + let arr = BooleanArray::from(vec![true, false, true, true, false]); + let sliced = arr.slice(1, 3); // [false, true, true] + + let result = sliced.bitwise_unary(|x| !x); + assert_eq!(result.len(), 3); + assert!(result.value(0)); + assert!(!result.value(1)); + assert!(!result.value(2)); + } + + #[test] + fn test_bitwise_unary_mut_sliced() { + // Slicing shares the buffer, so _mut must return Err. + let arr = BooleanArray::from(vec![true, false, true, true, false]); + let sliced = arr.slice(1, 3); + assert!(sliced.bitwise_unary_mut(|x| !x).is_err()); + } + + #[test] + fn test_bitwise_unary_mut_or_clone_sliced() { + // Slicing shares the buffer, so _mut_or_clone falls back to allocating. + let arr = BooleanArray::from(vec![true, false, true, true, false]); + let sliced = arr.slice(1, 3); // [false, true, true] + + let result = sliced.bitwise_unary_mut_or_clone(|x| !x); + assert_eq!(result.len(), 3); + assert!(result.value(0)); + assert!(!result.value(1)); + assert!(!result.value(2)); + } + + #[test] + fn test_bitwise_bin_op_different_offsets() { + // Left and right sliced to different offsets exercises misaligned + // bit handling in from_bitwise_binary_op. + let left_full = BooleanArray::from(vec![false, true, false, true, true]); + let right_full = BooleanArray::from(vec![true, true, true, false, true, false]); + + let left = left_full.slice(1, 3); // [true, false, true] + let right = right_full.slice(2, 3); // [true, false, true] + + let result = left.bitwise_bin_op(&right, |a, b| a & b); + assert_eq!(result.len(), 3); + assert!(result.value(0)); + assert!(!result.value(1)); + assert!(result.value(2)); + } + + #[test] + fn test_bitwise_bin_op_mut_or_clone_different_offsets() { + // Both sliced (shared buffers), so falls back to allocating path. + let left_full = BooleanArray::from(vec![false, true, true, false, true]); + let right_full = BooleanArray::from(vec![true, true, false, false, true, false]); + + let left = left_full.slice(1, 3); // [true, true, false] + let right = right_full.slice(2, 3); // [false, false, true] + + let expected = left.bitwise_bin_op(&right, |a, b| a & b); + let result = left.bitwise_bin_op_mut_or_clone(&right, |a, b| a & b); + assert_eq!(result, expected); + } } diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs index 6046369c62a7..729beaa0612f 100644 --- a/arrow-buffer/src/buffer/null.rs +++ b/arrow-buffer/src/buffer/null.rs @@ -84,6 +84,22 @@ impl NullBuffer { } } + /// Computes the union of the nulls in multiple optional [`NullBuffer`]s + /// + /// See [`union`](Self::union) + pub fn union_many<'a>( + nulls: impl IntoIterator>, + ) -> Option { + // Unwrap to BooleanBuffer because BitAndAssign is not implemented for NullBuffer + let mut buffers = nulls.into_iter().filter_map(|nb| nb.map(NullBuffer::inner)); + let first = buffers.next()?; + let mut result = first.clone(); + for buf in buffers { + result &= buf; + } + Some(Self::new(result)) + } + /// Returns true if all nulls in `other` also exist in self pub fn contains(&self, other: &NullBuffer) -> bool { if other.null_count == 0 { @@ -336,4 +352,51 @@ mod tests { let result = NullBuffer::from_unsliced_buffer(buf, 0); assert!(result.is_none()); } + + #[test] + fn test_union_many_all_none() { + let result = NullBuffer::union_many([None, None, None]); + assert!(result.is_none()); + } + + #[test] + fn test_union_many_single_some() { + let a = NullBuffer::from(&[true, false, true, true]); + let result = NullBuffer::union_many([Some(&a)]); + assert_eq!(result, Some(a)); + } + + #[test] + fn test_union_many_two_inputs() { + let a = NullBuffer::from(&[true, false, true, true]); + let b = NullBuffer::from(&[true, true, false, true]); + let result = NullBuffer::union_many([Some(&a), Some(&b)]); + let expected = NullBuffer::union(Some(&a), Some(&b)); + assert_eq!(result, expected); + } + + #[test] + fn test_union_many_three_inputs() { + let a = NullBuffer::from(&[true, false, true, true]); + let b = NullBuffer::from(&[true, true, false, true]); + let c = NullBuffer::from(&[false, true, true, true]); + let result = NullBuffer::union_many([Some(&a), Some(&b), Some(&c)]); + let expected = NullBuffer::from(&[false, false, false, true]); + assert_eq!(result, Some(expected)); + } + + #[test] + fn test_union_many_mixed_none() { + let a = NullBuffer::from(&[true, false, true, true]); + let b = NullBuffer::from(&[false, true, true, true]); + let result = NullBuffer::union_many([Some(&a), None, Some(&b)]); + let expected = NullBuffer::union(Some(&a), Some(&b)); + assert_eq!(result, expected); + } + + #[test] + fn test_union_many_empty_slice() { + let result = NullBuffer::union_many([] as [Option<&NullBuffer>; 0]); + assert!(result.is_none()); + } }