Skip to content

Commit f7f88fe

Browse files
committed
reset validity instead of filling
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent bab7798 commit f7f88fe

6 files changed

Lines changed: 154 additions & 218 deletions

File tree

vortex-buffer/src/bit/buf.rs

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,30 @@ impl BitBuffer {
319319
count_ones(self.buffer.as_slice(), self.offset, self.len)
320320
}
321321

322+
/// Copy bits to "target", counting ones.
323+
pub fn copy_with_true_count(&self, target: &mut [u64]) -> usize {
324+
let mut ones = 0usize;
325+
let iter = target.iter_mut();
326+
if self.offset == 0 {
327+
let chunks = self.chunks();
328+
let remainder = chunks.remainder_bits();
329+
let remainder_len = chunks.remainder_len();
330+
for (slot, chunk) in iter.zip(chunks) {
331+
*slot = chunk;
332+
ones += chunk.count_ones() as usize;
333+
}
334+
if remainder_len > 0 {
335+
ones += remainder.count_ones() as usize;
336+
}
337+
} else {
338+
for (slot, chunk) in iter.zip(self.unaligned_chunks().iter()) {
339+
*slot = chunk;
340+
ones += chunk.count_ones() as usize;
341+
}
342+
}
343+
ones
344+
}
345+
322346
/// Get the number of unset bits in the buffer.
323347
pub fn false_count(&self) -> usize {
324348
self.len - self.true_count()
@@ -781,4 +805,77 @@ mod tests {
781805
assert_eq!(mapped.value(i), expected, "Mismatch at index {}", i);
782806
}
783807
}
808+
809+
#[test]
810+
fn test_copy_with_true_count_all_true() {
811+
let mut target = [0u64; 10];
812+
let source = BitBuffer::new_set(10);
813+
let true_count = source.copy_with_true_count(&mut target);
814+
let slice = source.inner().as_slice();
815+
assert!(
816+
target
817+
.iter()
818+
.zip(slice.chunks_exact(8))
819+
.all(|(val, chunk)| val.to_le_bytes() == chunk)
820+
);
821+
assert_eq!(true_count, 10);
822+
}
823+
824+
#[test]
825+
fn test_copy_with_true_count_unaligned() {
826+
let mut target = [0u64; 10];
827+
let source = BitBuffer::new_with_offset(buffer![10u8, 11u8, 12u8], 16, 1);
828+
let true_count = source.copy_with_true_count(&mut target);
829+
let slice = source.inner().as_slice();
830+
assert!(
831+
target
832+
.iter()
833+
.zip(slice.chunks_exact(8))
834+
.all(|(val, chunk)| val.to_le_bytes() == chunk)
835+
);
836+
assert_eq!(true_count, 5);
837+
}
838+
839+
#[test]
840+
fn test_copy_with_true_count_all_false() {
841+
let mut target = [0u64; 10];
842+
let source = BitBuffer::new_unset(10);
843+
let true_count = source.copy_with_true_count(&mut target);
844+
let slice = source.inner().as_slice();
845+
assert!(
846+
target
847+
.iter()
848+
.zip(slice.chunks_exact(8))
849+
.all(|(val, chunk)| val.to_le_bytes() == chunk)
850+
);
851+
assert_eq!(true_count, 0);
852+
}
853+
854+
#[test]
855+
fn test_copy_with_true_count_one() {
856+
let mut target = [0u64; 10];
857+
let source = BitBuffer::new(buffer![0u8, 1], 16);
858+
let true_count = source.copy_with_true_count(&mut target);
859+
let slice = source.inner().as_slice();
860+
assert!(
861+
target
862+
.iter()
863+
.zip(slice.chunks_exact(8))
864+
.all(|(val, chunk)| val.to_le_bytes() == chunk)
865+
);
866+
assert_eq!(true_count, 1);
867+
}
868+
869+
#[test]
870+
fn test_copy_with_true_count() {
871+
let mut target = vec![0u64];
872+
let source = BitBuffer::new(buffer![1u8, 2, 3, 50, 51, 52, 100, 101], 64);
873+
let true_count = source.copy_with_true_count(&mut target);
874+
assert_eq!(
875+
target[0], 0x65_64_34_33_32_03_02_01_u64,
876+
"{:#08x} == {:#08x}",
877+
target[0], 0x65_64_34_33_32_03_02_01_u64,
878+
);
879+
assert_eq!(true_count, 21);
880+
}
784881
}

vortex-duckdb/cpp/include/duckdb_vx/vector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ void duckdb_vx_string_vector_add_vector_data_buffer(duckdb_vector ffi_vector, du
4747
// valid.
4848
void duckdb_vx_vector_set_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer);
4949

50+
// Reset vector's validity mask to nullptr, making all vector's elements valid.
51+
// vector must not be a DictionaryVector or a SequenceVector
52+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector);
53+
5054
// Set the data pointer for the vector. This is the start of the values array in the vector.
5155
void duckdb_vx_vector_set_data_ptr(duckdb_vector ffi_vector, void *ptr);
5256

vortex-duckdb/cpp/vector.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#include "include/duckdb_vx/vector.h"
45
#include "duckdb_vx/duckdb_diagnostics.h"
56

67
DUCKDB_INCLUDES_BEGIN
@@ -106,3 +107,20 @@ const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duc
106107
return nullptr;
107108
}
108109
}
110+
111+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector) {
112+
using enum VectorType;
113+
Vector &vector = *reinterpret_cast<Vector *>(ffi_vector);
114+
const VectorType type = vector.GetVectorType();
115+
D_ASSERT(type != DICTIONARY_VECTOR && type != SEQUENCE_VECTOR);
116+
switch (type) {
117+
case CONSTANT_VECTOR:
118+
return ConstantVector::Validity(vector).Reset();
119+
case FLAT_VECTOR:
120+
return FlatVector::Validity(vector).Reset();
121+
case FSST_VECTOR:
122+
return FSSTVector::Validity(vector).Reset();
123+
default:
124+
__builtin_unreachable();
125+
}
126+
}

vortex-duckdb/src/duckdb/vector.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,11 @@ impl VectorRef {
207207
///
208208
/// The provided capacity *must* be the actual capacity of this vector.
209209
pub unsafe fn validity_bitslice_mut(&mut self, capacity: usize) -> Option<&mut BitSlice<u64>> {
210-
unsafe { self.validity_slice_mut(capacity) }.map(|slice| slice.view_bits_mut())
210+
// capacity is always less than BitSlice<u64>::MAX_ELTS
211+
unsafe {
212+
self.validity_slice_mut(capacity)
213+
.map(|slice| BitSlice::from_slice_unchecked_mut(slice))
214+
}
211215
}
212216

213217
pub fn validity_ref(&self, len: usize) -> ValidityRef<'_> {

vortex-duckdb/src/exporter/mod.rs

Lines changed: 0 additions & 190 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ mod validity;
1919
mod varbinview;
2020
mod vector;
2121

22-
use bitvec::prelude::Lsb0;
23-
use bitvec::view::BitView;
2422
pub use cache::ConversionCache;
2523
pub use decimal::precision_to_duckdb_storage_size;
2624
use vortex::array::ArrayRef;
@@ -187,19 +185,6 @@ fn new_array_exporter_with_flatten(
187185
}
188186
}
189187

190-
/// Copy the sliced bits from source into target.
191-
///
192-
/// Offset and length are a _bit_ offset and a _bit_ length into source.
193-
///
194-
/// `target.len()` must equal `len`.
195-
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) {
196-
let (start, middle, end) = unsafe { target.align_to_mut::<u8>() };
197-
assert!(start.is_empty());
198-
assert!(end.is_empty());
199-
let target = &mut middle.view_bits_mut::<Lsb0>()[..len];
200-
target.copy_from_bitslice(&source.view_bits()[offset..][..len]);
201-
}
202-
203188
#[cfg(test)]
204189
mod tests {
205190
use vortex::buffer::BitBuffer;
@@ -208,7 +193,6 @@ mod tests {
208193
use crate::cpp::DUCKDB_TYPE;
209194
use crate::duckdb::LogicalType;
210195
use crate::duckdb::Vector;
211-
use crate::exporter::copy_from_slice;
212196

213197
#[test]
214198
fn test_set_validity_all_true() {
@@ -355,178 +339,4 @@ mod tests {
355339
}
356340
}
357341

358-
#[test]
359-
fn test_copy_from_slice_empty_to_empty() {
360-
let target = &mut [];
361-
let source = Vec::<u8>::new();
362-
copy_from_slice(target, &source, 0, 0);
363-
}
364-
365-
#[test]
366-
fn test_copy_from_slice_64_to_empty() {
367-
let target = &mut [];
368-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101];
369-
copy_from_slice(target, &source, 0, 0);
370-
copy_from_slice(target, &source, 5, 0);
371-
copy_from_slice(target, &source, 8, 0);
372-
}
373-
374-
#[test]
375-
fn test_copy_from_slice_64_to_64() {
376-
let mut target = vec![0u64];
377-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101];
378-
copy_from_slice(&mut target, &source, 0, 64);
379-
assert_eq!(
380-
target[0], 0x65_64_34_33_32_03_02_01_u64,
381-
"{:#08x} == {:#08x}",
382-
target[0], 0x65_64_34_33_32_03_02_01_u64,
383-
);
384-
}
385-
386-
#[test]
387-
fn test_copy_from_slice_80_to_0() {
388-
let target = &mut [];
389-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
390-
copy_from_slice(target, &source, 0, 0);
391-
copy_from_slice(target, &source, 8, 0);
392-
copy_from_slice(target, &source, 10, 0);
393-
}
394-
395-
#[test]
396-
fn test_copy_from_slice_80_to_64_case_1() {
397-
let mut target = [0u64];
398-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
399-
copy_from_slice(&mut target, &source, 16, 64);
400-
assert_eq!(
401-
target[0], 0xff_fe_65_64_34_33_32_03_u64,
402-
"{:#08x} == {:#08x}",
403-
target[0], 0xff_fe_65_64_34_33_32_03_u64,
404-
);
405-
}
406-
407-
#[test]
408-
fn test_copy_from_slice_80_to_64_case_2() {
409-
let mut target = [0u64];
410-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
411-
copy_from_slice(&mut target, &source, 8, 64);
412-
assert_eq!(
413-
target[0], 0xfe_65_64_34_33_32_03_02_u64,
414-
"{:#08x} == {:#08x}",
415-
target[0], 0xfe_65_64_34_33_32_03_02_u64,
416-
);
417-
}
418-
419-
#[test]
420-
fn test_copy_from_slice_80_to_64_case_3() {
421-
let mut target = [0u64];
422-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
423-
copy_from_slice(&mut target, &source, 0, 64);
424-
assert_eq!(
425-
target[0], 0x65_64_34_33_32_03_02_01_u64,
426-
"{:#08x} == {:#08x}",
427-
target[0], 0x65_64_34_33_32_03_02_01_u64,
428-
);
429-
}
430-
431-
#[test]
432-
fn test_copy_from_slice_80_to_64_case_4() {
433-
let mut target = [0u64];
434-
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
435-
copy_from_slice(&mut target, &source, 10, 64);
436-
assert_eq!(
437-
target[0],
438-
0xff_99_59_0d_0c_cc_80_c0_u64, // Python: hex(0xff_fe_65_64_34_33_32_03_02 >> 2), then remove the high two hexits
439-
"{:#08x} == {:#08x}",
440-
target[0],
441-
0xff_99_59_0d_0c_cc_80_c0_u64
442-
);
443-
}
444-
445-
#[test]
446-
fn test_copy_from_slice_248_to_128_middle_non_empty() {
447-
let mut target = [0u64, 0u64];
448-
let source: [u8; 31] = [
449-
0x01, 0x02, 0x03, 0x04, 0xff, 0xfe, 0xfd, 0xfc, 0x05, 0x06, 0x07, 0x08, 0xfc, 0xfb,
450-
0xfa, 0xf9, 0x01, 0x02, 0x03, 0x04, 0xff, 0xfe, 0xfd, 0xfc, 0x05, 0x06, 0x07, 0x08,
451-
0xfc, 0xfb, 0xfa,
452-
];
453-
// In a span of 248 bits (31 bytes) there should be at least one 8-byte aligned span.
454-
let (_, middle, _) = unsafe { source.align_to::<u64>() };
455-
assert!(!middle.is_empty());
456-
457-
copy_from_slice(&mut target, &source, 0, 128);
458-
assert_eq!(
459-
target[0], 0xfc_fd_fe_ff_04_03_02_01_u64,
460-
"{:#08x} == {:#08x}",
461-
target[0], 0xfc_fd_fe_ff_04_03_02_01_u64,
462-
);
463-
assert_eq!(
464-
target[1], 0xf9_fa_fb_fc_08_07_06_05_u64,
465-
"{:#08x} == {:#08x}",
466-
target[1], 0xf9_fa_fb_fc_08_07_06_05_u64,
467-
);
468-
469-
copy_from_slice(&mut target, &source, 8, 128);
470-
assert_eq!(
471-
target[0], 0x05_fc_fd_fe_ff_04_03_02_u64,
472-
"{:#08x} == {:#08x}",
473-
target[0], 0x05_fc_fd_fe_ff_04_03_02_u64,
474-
);
475-
assert_eq!(
476-
target[1], 0x01_f9_fa_fb_fc_08_07_06_u64,
477-
"{:#08x} == {:#08x}",
478-
target[1], 0x01_f9_fa_fb_fc_08_07_06_u64,
479-
);
480-
481-
copy_from_slice(&mut target, &source, 8 * 8, 128);
482-
assert_eq!(
483-
target[0], 0xf9_fa_fb_fc_08_07_06_05_u64,
484-
"{:#08x} == {:#08x}",
485-
target[0], 0xf9_fa_fb_fc_08_07_06_05_u64,
486-
);
487-
assert_eq!(
488-
target[1], 0xfc_fd_fe_ff_04_03_02_01_u64,
489-
"{:#08x} == {:#08x}",
490-
target[1], 0xfc_fd_fe_ff_04_03_02_01_u64,
491-
);
492-
493-
copy_from_slice(&mut target, &source, 8 * 12, 128);
494-
assert_eq!(
495-
target[0], 0x04_03_02_01_f9_fa_fb_fc_u64,
496-
"{:#08x} == {:#08x}",
497-
target[0], 0x04_03_02_01_f9_fa_fb_fc_u64,
498-
);
499-
assert_eq!(
500-
target[1], 0x08_07_06_05_fc_fd_fe_ff_u64,
501-
"{:#08x} == {:#08x}",
502-
target[1], 0x08_07_06_05_fc_fd_fe_ff_u64,
503-
);
504-
505-
copy_from_slice(&mut target, &source, 8 * 12 + 4, 128);
506-
// Find the 12th byte, skip the first hexit, take the next 32 hexits (i.e. 16 bytesor 128
507-
// bits).
508-
assert_eq!(
509-
target[0], 0xf0_40_30_20_1f_9f_af_bf_u64,
510-
"{:#08x} == {:#08x}",
511-
target[0], 0xf0_40_30_20_1f_9f_af_bf_u64,
512-
);
513-
assert_eq!(
514-
target[1], 0xc0_80_70_60_5f_cf_df_ef_u64,
515-
"{:#08x} == {:#08x}",
516-
target[1], 0xc0_80_70_60_5f_cf_df_ef_u64,
517-
);
518-
519-
// Take the above and shift one bit towards the right-hand-side.
520-
copy_from_slice(&mut target, &source, 8 * 12 + 4 + 1, 128);
521-
assert_eq!(
522-
target[0], 0xf8_20_18_10_0f_cf_d7_df_u64,
523-
"{:#08x} == {:#08x}",
524-
target[0], 0xf8_20_18_10_0f_cf_d7_df_u64,
525-
);
526-
assert_eq!(
527-
target[1], 0xe0_40_38_30_2f_e7_ef_f7_u64,
528-
"{:#08x} == {:#08x}",
529-
target[1], 0xe0_40_38_30_2f_e7_ef_f7_u64,
530-
);
531-
}
532342
}

0 commit comments

Comments
 (0)