Skip to content

Commit 73ac072

Browse files
committed
reset validity instead of filling
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent bab7798 commit 73ac072

6 files changed

Lines changed: 203 additions & 218 deletions

File tree

vortex-buffer/src/bit/buf.rs

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,17 @@ impl PartialEq for BitBuffer {
6464
}
6565
}
6666

67+
/// How many bits are set to one in ButBuffer
68+
#[derive(PartialEq, Eq, Debug)]
69+
pub enum ZeroesCount {
70+
/// All bits are set
71+
AllZeroes,
72+
/// All bits are unset
73+
AllOnes,
74+
/// Some bits are set
75+
Unknown,
76+
}
77+
6778
impl BitBuffer {
6879
/// Create a new `BoolBuffer` backed by a [`ByteBuffer`] with `len` bits in view.
6980
///
@@ -319,6 +330,36 @@ impl BitBuffer {
319330
count_ones(self.buffer.as_slice(), self.offset, self.len)
320331
}
321332

333+
/// Copy bits to target slice, returning whether all bits are zero, all are one, or mixed.
334+
/// "target" must be a buffer large enough to hold "len" bits where "len" is buffer's len.
335+
pub fn copy_to_slice(&self, target: &mut [u64]) -> ZeroesCount {
336+
let mut sum_or = 0u64;
337+
let mut sum_and = u64::MAX;
338+
let chunks = self.chunks();
339+
let remainder_bits = chunks.remainder_bits();
340+
let remainder_len = chunks.remainder_len();
341+
let chunk_len = chunks.chunk_len();
342+
for (slot, chunk) in target.iter_mut().zip(chunks) {
343+
*slot = chunk;
344+
sum_or |= chunk;
345+
sum_and &= chunk;
346+
}
347+
if remainder_len > 0 {
348+
target[chunk_len] = remainder_bits;
349+
sum_or |= remainder_bits;
350+
let mask = (1u64 << remainder_len) - 1;
351+
sum_and &= remainder_bits | !mask;
352+
}
353+
354+
if sum_or == 0 {
355+
ZeroesCount::AllZeroes
356+
} else if sum_and == u64::MAX {
357+
ZeroesCount::AllOnes
358+
} else {
359+
ZeroesCount::Unknown
360+
}
361+
}
362+
322363
/// Get the number of unset bits in the buffer.
323364
pub fn false_count(&self) -> usize {
324365
self.len - self.true_count()
@@ -571,6 +612,7 @@ mod tests {
571612
use rstest::rstest;
572613

573614
use crate::ByteBuffer;
615+
use crate::ZeroesCount;
574616
use crate::bit::BitBuffer;
575617
use crate::buffer;
576618

@@ -781,4 +823,104 @@ mod tests {
781823
assert_eq!(mapped.value(i), expected, "Mismatch at index {}", i);
782824
}
783825
}
826+
827+
fn copy_equal(source: BitBuffer, expected_count: ZeroesCount) {
828+
let n = source.len().div_ceil(64);
829+
let mut target = vec![0u64; n];
830+
let result = source.copy_to_slice(&mut target);
831+
assert_eq!(result, expected_count);
832+
for i in 0..source.len() {
833+
let copied = (target[i / 64] >> (i % 64)) & 1 == 1;
834+
assert_eq!(copied, source.value(i), "Bit mismatch at index {i}");
835+
}
836+
}
837+
838+
#[rstest]
839+
#[case(10)]
840+
#[case(64)]
841+
#[case(65)]
842+
#[case(128)]
843+
#[case(1024)]
844+
fn test_copy_to_slice_all_true(#[case] len_bits: usize) {
845+
copy_equal(BitBuffer::new_set(len_bits), ZeroesCount::AllOnes);
846+
}
847+
848+
#[rstest]
849+
#[case(10)]
850+
#[case(64)]
851+
#[case(65)]
852+
#[case(128)]
853+
#[case(1024)]
854+
fn test_copy_to_slice_all_false(#[case] len_bits: usize) {
855+
copy_equal(BitBuffer::new_unset(len_bits), ZeroesCount::AllZeroes);
856+
}
857+
858+
#[rstest]
859+
#[case(10)]
860+
#[case(64)]
861+
#[case(65)]
862+
#[case(128)]
863+
#[case(1024)]
864+
fn test_copy_to_slice_all_true_unaligned(#[case] len_bits: usize) {
865+
let buffer = buffer![u8::MAX; 8 * 20];
866+
copy_equal(
867+
BitBuffer::new_with_offset(buffer, len_bits, 1),
868+
ZeroesCount::AllOnes,
869+
);
870+
}
871+
872+
#[rstest]
873+
#[case(10)]
874+
#[case(64)]
875+
#[case(65)]
876+
#[case(128)]
877+
#[case(1024)]
878+
fn test_copy_to_slice_all_false_unaligned(#[case] len_bits: usize) {
879+
let buffer = buffer![0; 8 * 20];
880+
copy_equal(
881+
BitBuffer::new_with_offset(buffer, len_bits, 1),
882+
ZeroesCount::AllZeroes,
883+
);
884+
}
885+
886+
#[test]
887+
fn test_copy_to_slice_some() {
888+
let mut target = vec![0u64];
889+
let source = BitBuffer::new(buffer![1u8, 2, 3, 50, 51, 52, 100, 101], 64);
890+
let zero_count = source.copy_to_slice(&mut target);
891+
assert_eq!(
892+
target[0], 0x65_64_34_33_32_03_02_01_u64,
893+
"{:#08x} == {:#08x}",
894+
target[0], 0x65_64_34_33_32_03_02_01_u64,
895+
);
896+
assert_eq!(zero_count, ZeroesCount::Unknown);
897+
}
898+
899+
#[test]
900+
fn test_copy_to_slice_some_unaligned() {
901+
let source = BitBuffer::new_with_offset(buffer![1u8, 2, 3, 50, 51, 52, 100, 101, 0], 64, 1);
902+
let mut target = vec![0u64; 1];
903+
let zero_count = source.copy_to_slice(&mut target);
904+
assert_eq!(zero_count, ZeroesCount::Unknown);
905+
for i in 0..64 {
906+
let copied = (target[0] >> i) & 1 == 1;
907+
assert_eq!(copied, source.value(i), "Bit mismatch at index {i}");
908+
}
909+
}
910+
911+
#[test]
912+
fn test_copy_to_slice_some_65() {
913+
let mut target = vec![0u64; 2];
914+
let source = BitBuffer::new(buffer![0u8, 0, 0, 0, 0, 0, 0, 0, 1], 65);
915+
let zero_count = source.copy_to_slice(&mut target);
916+
assert_eq!(zero_count, ZeroesCount::Unknown);
917+
}
918+
919+
#[test]
920+
fn test_copy_to_slice_some_65_unaligned() {
921+
let mut target = vec![0u64; 2];
922+
let source = BitBuffer::new_with_offset(buffer![0u8, 1, 0, 0, 0, 0, 0, 0, 0], 65, 1);
923+
let zero_count = source.copy_to_slice(&mut target);
924+
assert_eq!(zero_count, ZeroesCount::Unknown);
925+
}
784926
}

vortex-duckdb/cpp/include/duckdb_vx/vector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ void duckdb_vx_string_vector_add_vector_data_buffer(duckdb_vector ffi_vector, du
4747
// valid.
4848
void duckdb_vx_vector_set_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer);
4949

50+
// Reset vector's validity mask to nullptr, making all vector's elements valid.
51+
// vector must not be a DictionaryVector or a SequenceVector
52+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector);
53+
5054
// Set the data pointer for the vector. This is the start of the values array in the vector.
5155
void duckdb_vx_vector_set_data_ptr(duckdb_vector ffi_vector, void *ptr);
5256

vortex-duckdb/cpp/vector.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#include "include/duckdb_vx/vector.h"
45
#include "duckdb_vx/duckdb_diagnostics.h"
56

67
DUCKDB_INCLUDES_BEGIN
@@ -106,3 +107,20 @@ const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duc
106107
return nullptr;
107108
}
108109
}
110+
111+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector) {
112+
using enum VectorType;
113+
Vector &vector = *reinterpret_cast<Vector *>(ffi_vector);
114+
const VectorType type = vector.GetVectorType();
115+
D_ASSERT(type != DICTIONARY_VECTOR && type != SEQUENCE_VECTOR);
116+
switch (type) {
117+
case CONSTANT_VECTOR:
118+
return ConstantVector::Validity(vector).Reset();
119+
case FLAT_VECTOR:
120+
return FlatVector::Validity(vector).Reset();
121+
case FSST_VECTOR:
122+
return FSSTVector::Validity(vector).Reset();
123+
default:
124+
__builtin_unreachable();
125+
}
126+
}

vortex-duckdb/src/duckdb/vector.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,11 @@ impl VectorRef {
207207
///
208208
/// The provided capacity *must* be the actual capacity of this vector.
209209
pub unsafe fn validity_bitslice_mut(&mut self, capacity: usize) -> Option<&mut BitSlice<u64>> {
210-
unsafe { self.validity_slice_mut(capacity) }.map(|slice| slice.view_bits_mut())
210+
// capacity is always less than BitSlice<u64>::MAX_ELTS
211+
unsafe {
212+
self.validity_slice_mut(capacity)
213+
.map(|slice| BitSlice::from_slice_unchecked_mut(slice))
214+
}
211215
}
212216

213217
pub fn validity_ref(&self, len: usize) -> ValidityRef<'_> {

0 commit comments

Comments
 (0)