Skip to content

Commit 7658463

Browse files
committed
reset validity instead of filling
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent bab7798 commit 7658463

5 files changed

Lines changed: 111 additions & 56 deletions

File tree

vortex-duckdb/cpp/include/duckdb_vx/vector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ void duckdb_vx_string_vector_add_vector_data_buffer(duckdb_vector ffi_vector, du
4747
// valid.
4848
void duckdb_vx_vector_set_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer);
4949

50+
// Reset vector's validity mask to nullptr, making all vector's elements valid.
51+
// vector must not be a DictionaryVector or a SequenceVector
52+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector);
53+
5054
// Set the data pointer for the vector. This is the start of the values array in the vector.
5155
void duckdb_vx_vector_set_data_ptr(duckdb_vector ffi_vector, void *ptr);
5256

vortex-duckdb/cpp/vector.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#include "include/duckdb_vx/vector.h"
45
#include "duckdb_vx/duckdb_diagnostics.h"
56

67
DUCKDB_INCLUDES_BEGIN
@@ -106,3 +107,20 @@ const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duc
106107
return nullptr;
107108
}
108109
}
110+
111+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector) {
112+
using enum VectorType;
113+
Vector &vector = *reinterpret_cast<Vector *>(ffi_vector);
114+
const VectorType type = vector.GetVectorType();
115+
D_ASSERT(type != DICTIONARY_VECTOR && type != SEQUENCE_VECTOR);
116+
switch (type) {
117+
case CONSTANT_VECTOR:
118+
return ConstantVector::Validity(vector).Reset();
119+
case FLAT_VECTOR:
120+
return FlatVector::Validity(vector).Reset();
121+
case FSST_VECTOR:
122+
return FSSTVector::Validity(vector).Reset();
123+
default:
124+
__builtin_unreachable();
125+
}
126+
}

vortex-duckdb/src/duckdb/vector.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,11 @@ impl VectorRef {
207207
///
208208
/// The provided capacity *must* be the actual capacity of this vector.
209209
pub unsafe fn validity_bitslice_mut(&mut self, capacity: usize) -> Option<&mut BitSlice<u64>> {
210-
unsafe { self.validity_slice_mut(capacity) }.map(|slice| slice.view_bits_mut())
210+
// capacity is always less than BitSlice<u64>::MAX_ELTS
211+
unsafe {
212+
self.validity_slice_mut(capacity)
213+
.map(|slice| BitSlice::from_slice_unchecked_mut(slice))
214+
}
211215
}
212216

213217
pub fn validity_ref(&self, len: usize) -> ValidityRef<'_> {

vortex-duckdb/src/exporter/mod.rs

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ mod validity;
1919
mod varbinview;
2020
mod vector;
2121

22-
use bitvec::prelude::Lsb0;
23-
use bitvec::view::BitView;
2422
pub use cache::ConversionCache;
2523
pub use decimal::precision_to_duckdb_storage_size;
2624
use vortex::array::ArrayRef;
@@ -32,6 +30,7 @@ use vortex::array::arrays::List;
3230
use vortex::array::arrays::StructArray;
3331
use vortex::array::arrays::TemporalArray;
3432
use vortex::array::arrays::struct_::StructArrayExt;
33+
use vortex::buffer::BitChunks;
3534
use vortex::encodings::runend::RunEnd;
3635
use vortex::encodings::sequence::Sequence;
3736
use vortex::error::VortexResult;
@@ -187,17 +186,33 @@ fn new_array_exporter_with_flatten(
187186
}
188187
}
189188

190-
/// Copy the sliced bits from source into target.
189+
/// Copy the sliced bits from source into target, returning whether all copied bits are zero,
190+
/// all are one, or mixed.
191191
///
192-
/// Offset and length are a _bit_ offset and a _bit_ length into source.
192+
/// offset and len are a _bit_ offset and a _bit_ length into `source`.
193193
///
194-
/// `target.len()` must equal `len`.
195-
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) {
196-
let (start, middle, end) = unsafe { target.align_to_mut::<u8>() };
197-
assert!(start.is_empty());
198-
assert!(end.is_empty());
199-
let target = &mut middle.view_bits_mut::<Lsb0>()[..len];
200-
target.copy_from_bitslice(&source.view_bits()[offset..][..len]);
194+
/// target must have at least len.div_ceil(64) elements.
195+
/// Returns the number of ones in copied slice.
196+
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) -> usize {
197+
if len == 0 {
198+
return 0;
199+
}
200+
201+
let mut ones = 0usize;
202+
let chunks = BitChunks::new(source, offset, len);
203+
let chunk_len = chunks.chunk_len();
204+
let remainder_len = chunks.remainder_len();
205+
let remainder = chunks.remainder_bits();
206+
for (slot, chunk) in target.iter_mut().zip(chunks) {
207+
*slot = chunk;
208+
ones += chunk.count_ones() as usize;
209+
}
210+
211+
if remainder_len > 0 {
212+
target[chunk_len] = remainder;
213+
ones += remainder.count_ones() as usize;
214+
}
215+
ones
201216
}
202217

203218
#[cfg(test)]
@@ -359,44 +374,51 @@ mod tests {
359374
fn test_copy_from_slice_empty_to_empty() {
360375
let target = &mut [];
361376
let source = Vec::<u8>::new();
362-
copy_from_slice(target, &source, 0, 0);
377+
assert_eq!(copy_from_slice(target, &source, 0, 0), 0);
363378
}
364379

365380
#[test]
366381
fn test_copy_from_slice_64_to_empty() {
367382
let target = &mut [];
368383
let source = [1u8, 2, 3, 50, 51, 52, 100, 101];
369-
copy_from_slice(target, &source, 0, 0);
370-
copy_from_slice(target, &source, 5, 0);
371-
copy_from_slice(target, &source, 8, 0);
384+
assert_eq!(copy_from_slice(target, &source, 0, 0), 0);
385+
assert_eq!(copy_from_slice(target, &source, 5, 0), 0);
386+
assert_eq!(copy_from_slice(target, &source, 8, 0), 0);
372387
}
373388

374389
#[test]
375390
fn test_copy_from_slice_64_to_64() {
376391
let mut target = vec![0u64];
377392
let source = [1u8, 2, 3, 50, 51, 52, 100, 101];
378-
copy_from_slice(&mut target, &source, 0, 64);
393+
assert_eq!(copy_from_slice(&mut target, &source, 0, 64), 21);
379394
assert_eq!(
380395
target[0], 0x65_64_34_33_32_03_02_01_u64,
381396
"{:#08x} == {:#08x}",
382397
target[0], 0x65_64_34_33_32_03_02_01_u64,
383398
);
384399
}
385400

401+
#[test]
402+
fn test_copy_from_slice_64_ones() {
403+
let mut target = [0u64];
404+
let source = [u8::MAX; 8];
405+
assert_eq!(copy_from_slice(&mut target, &source, 0, 64), 64);
406+
}
407+
386408
#[test]
387409
fn test_copy_from_slice_80_to_0() {
388410
let target = &mut [];
389411
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
390-
copy_from_slice(target, &source, 0, 0);
391-
copy_from_slice(target, &source, 8, 0);
392-
copy_from_slice(target, &source, 10, 0);
412+
assert_eq!(copy_from_slice(target, &source, 0, 0), 0);
413+
assert_eq!(copy_from_slice(target, &source, 8, 0), 0);
414+
assert_eq!(copy_from_slice(target, &source, 10, 0), 0,);
393415
}
394416

395417
#[test]
396418
fn test_copy_from_slice_80_to_64_case_1() {
397419
let mut target = [0u64];
398420
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
399-
copy_from_slice(&mut target, &source, 16, 64);
421+
assert_eq!(copy_from_slice(&mut target, &source, 16, 64), 34);
400422
assert_eq!(
401423
target[0], 0xff_fe_65_64_34_33_32_03_u64,
402424
"{:#08x} == {:#08x}",
@@ -408,7 +430,7 @@ mod tests {
408430
fn test_copy_from_slice_80_to_64_case_2() {
409431
let mut target = [0u64];
410432
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
411-
copy_from_slice(&mut target, &source, 8, 64);
433+
assert_eq!(copy_from_slice(&mut target, &source, 8, 64), 27);
412434
assert_eq!(
413435
target[0], 0xfe_65_64_34_33_32_03_02_u64,
414436
"{:#08x} == {:#08x}",
@@ -420,7 +442,7 @@ mod tests {
420442
fn test_copy_from_slice_80_to_64_case_3() {
421443
let mut target = [0u64];
422444
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
423-
copy_from_slice(&mut target, &source, 0, 64);
445+
assert_eq!(copy_from_slice(&mut target, &source, 0, 64), 21,);
424446
assert_eq!(
425447
target[0], 0x65_64_34_33_32_03_02_01_u64,
426448
"{:#08x} == {:#08x}",
@@ -432,7 +454,7 @@ mod tests {
432454
fn test_copy_from_slice_80_to_64_case_4() {
433455
let mut target = [0u64];
434456
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
435-
copy_from_slice(&mut target, &source, 10, 64);
457+
assert_eq!(copy_from_slice(&mut target, &source, 10, 64), 28,);
436458
assert_eq!(
437459
target[0],
438460
0xff_99_59_0d_0c_cc_80_c0_u64, // Python: hex(0xff_fe_65_64_34_33_32_03_02 >> 2), then remove the high two hexits
@@ -454,7 +476,7 @@ mod tests {
454476
let (_, middle, _) = unsafe { source.align_to::<u64>() };
455477
assert!(!middle.is_empty());
456478

457-
copy_from_slice(&mut target, &source, 0, 128);
479+
assert_eq!(copy_from_slice(&mut target, &source, 0, 128), 66,);
458480
assert_eq!(
459481
target[0], 0xfc_fd_fe_ff_04_03_02_01_u64,
460482
"{:#08x} == {:#08x}",
@@ -466,7 +488,7 @@ mod tests {
466488
target[1], 0xf9_fa_fb_fc_08_07_06_05_u64,
467489
);
468490

469-
copy_from_slice(&mut target, &source, 8, 128);
491+
assert_eq!(copy_from_slice(&mut target, &source, 8, 128), 66);
470492
assert_eq!(
471493
target[0], 0x05_fc_fd_fe_ff_04_03_02_u64,
472494
"{:#08x} == {:#08x}",
@@ -478,7 +500,7 @@ mod tests {
478500
target[1], 0x01_f9_fa_fb_fc_08_07_06_u64,
479501
);
480502

481-
copy_from_slice(&mut target, &source, 8 * 8, 128);
503+
assert_eq!(copy_from_slice(&mut target, &source, 8 * 8, 128), 66,);
482504
assert_eq!(
483505
target[0], 0xf9_fa_fb_fc_08_07_06_05_u64,
484506
"{:#08x} == {:#08x}",
@@ -490,7 +512,7 @@ mod tests {
490512
target[1], 0xfc_fd_fe_ff_04_03_02_01_u64,
491513
);
492514

493-
copy_from_slice(&mut target, &source, 8 * 12, 128);
515+
assert_eq!(copy_from_slice(&mut target, &source, 8 * 12, 128), 66,);
494516
assert_eq!(
495517
target[0], 0x04_03_02_01_f9_fa_fb_fc_u64,
496518
"{:#08x} == {:#08x}",
@@ -502,7 +524,7 @@ mod tests {
502524
target[1], 0x08_07_06_05_fc_fd_fe_ff_u64,
503525
);
504526

505-
copy_from_slice(&mut target, &source, 8 * 12 + 4, 128);
527+
assert_eq!(copy_from_slice(&mut target, &source, 8 * 12 + 4, 128), 66,);
506528
// Find the 12th byte, skip the first hexit, take the next 32 hexits (i.e. 16 bytesor 128
507529
// bits).
508530
assert_eq!(
@@ -517,7 +539,10 @@ mod tests {
517539
);
518540

519541
// Take the above and shift one bit towards the right-hand-side.
520-
copy_from_slice(&mut target, &source, 8 * 12 + 4 + 1, 128);
542+
assert_eq!(
543+
copy_from_slice(&mut target, &source, 8 * 12 + 4 + 1, 128),
544+
66,
545+
);
521546
assert_eq!(
522547
target[0], 0xf8_20_18_10_0f_cf_d7_df_u64,
523548
"{:#08x} == {:#08x}",

vortex-duckdb/src/exporter/vector.rs

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex::mask::Mask;
5+
use vortex::mask::MaskValues;
56

7+
use crate::cpp::duckdb_vx_vector_set_all_valid;
68
use crate::duckdb::Value;
79
use crate::duckdb::VectorRef;
810
use crate::exporter::copy_from_slice;
@@ -11,44 +13,46 @@ impl VectorRef {
1113
pub(super) unsafe fn set_validity(&mut self, mask: &Mask, offset: usize, len: usize) -> bool {
1214
match mask {
1315
Mask::AllTrue(_) => {
14-
// We only need to blank out validity if there is already a slice allocated.
15-
// SAFETY: Caller guarantees this.
16-
unsafe { self.set_all_true_validity(len) }
16+
self.set_all_true_validity();
1717
false
1818
}
1919
Mask::AllFalse(_) => {
20-
// SAFETY: Caller guarantees this.
2120
self.set_all_false_validity();
2221
true
2322
}
24-
Mask::Values(arr) => {
25-
let true_count = arr.bit_buffer().slice(offset..(offset + len)).true_count();
26-
if true_count == len {
27-
unsafe { self.set_all_true_validity(len) }
28-
} else if true_count == 0 {
29-
self.set_all_false_validity()
30-
} else {
31-
let source = arr.bit_buffer().inner().as_slice();
32-
copy_from_slice(
33-
unsafe { self.ensure_validity_slice(len) },
34-
source,
35-
offset,
36-
len,
37-
);
38-
}
39-
40-
true_count == 0
41-
}
23+
Mask::Values(arr) => self.set_validity_with_array(arr, len, offset),
4224
}
4325
}
4426

45-
pub(super) unsafe fn set_all_true_validity(&mut self, len: usize) {
46-
if let Some(validity) = unsafe { self.validity_bitslice_mut(len) } {
47-
validity.fill(true);
27+
fn set_validity_with_array(&mut self, arr: &MaskValues, len: usize, offset: usize) -> bool {
28+
let true_count = arr.true_count();
29+
if true_count == arr.len() {
30+
self.set_all_true_validity();
31+
return false;
32+
} else if true_count == 0 {
33+
self.set_all_false_validity();
34+
return true;
35+
}
36+
37+
let dest = unsafe { self.ensure_validity_slice(len) };
38+
let source = arr.bit_buffer().inner().as_slice();
39+
let ones = copy_from_slice(dest, source, offset, len);
40+
if ones == 0 {
41+
self.set_all_false_validity();
42+
true
43+
} else if ones == len {
44+
self.set_all_true_validity();
45+
false
46+
} else {
47+
false
4848
}
4949
}
5050

51-
pub(super) fn set_all_false_validity(&mut self) {
51+
fn set_all_true_validity(&mut self) {
52+
unsafe { duckdb_vx_vector_set_all_valid(self.as_ptr()) };
53+
}
54+
55+
fn set_all_false_validity(&mut self) {
5256
self.reference_value(&Value::null(&self.logical_type()));
5357
}
5458
}

0 commit comments

Comments
 (0)