Skip to content

Commit 30483d5

Browse files
committed
reset validity instead of filling
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent bab7798 commit 30483d5

5 files changed

Lines changed: 72 additions & 36 deletions

File tree

vortex-duckdb/cpp/include/duckdb_vx/vector.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ void duckdb_vx_string_vector_add_vector_data_buffer(duckdb_vector ffi_vector, du
4747
// valid.
4848
void duckdb_vx_vector_set_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer);
4949

50+
// Reset vector's validity mask to nullptr, making all vector's elements valid.
51+
// vector must not be a DictionaryVector or a SequenceVector
52+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector);
53+
5054
// Set the data pointer for the vector. This is the start of the values array in the vector.
5155
void duckdb_vx_vector_set_data_ptr(duckdb_vector ffi_vector, void *ptr);
5256

vortex-duckdb/cpp/vector.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
#include "include/duckdb_vx/vector.h"
45
#include "duckdb_vx/duckdb_diagnostics.h"
56

67
DUCKDB_INCLUDES_BEGIN
@@ -106,3 +107,20 @@ const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duc
106107
return nullptr;
107108
}
108109
}
110+
111+
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector) {
112+
using enum VectorType;
113+
Vector &vector = *reinterpret_cast<Vector *>(ffi_vector);
114+
const VectorType type = vector.GetVectorType();
115+
D_ASSERT(type != DICTIONARY_VECTOR && type != SEQUENCE_VECTOR);
116+
switch (type) {
117+
case CONSTANT_VECTOR:
118+
return ConstantVector::Validity(vector).Reset();
119+
case FLAT_VECTOR:
120+
return FlatVector::Validity(vector).Reset();
121+
case FSST_VECTOR:
122+
return FSSTVector::Validity(vector).Reset();
123+
default:
124+
__builtin_unreachable();
125+
}
126+
}

vortex-duckdb/src/duckdb/vector.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,13 @@ impl VectorRef {
207207
///
208208
/// The provided capacity *must* be the actual capacity of this vector.
209209
pub unsafe fn validity_bitslice_mut(&mut self, capacity: usize) -> Option<&mut BitSlice<u64>> {
210-
unsafe { self.validity_slice_mut(capacity) }.map(|slice| slice.view_bits_mut())
210+
// from_slice_unchecked_mut implies capacity must be less than BitSlice<u64>::MAX_ELTS
211+
// but this is always true since validity is set per VectorRef/vector and its side doesn't
212+
// exceed 2048
213+
unsafe {
214+
self.validity_slice_mut(capacity)
215+
.map(|slice| BitSlice::from_slice_unchecked_mut(slice))
216+
}
211217
}
212218

213219
pub fn validity_ref(&self, len: usize) -> ValidityRef<'_> {

vortex-duckdb/src/exporter/mod.rs

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@ mod validity;
1919
mod varbinview;
2020
mod vector;
2121

22-
use bitvec::prelude::Lsb0;
23-
use bitvec::view::BitView;
2422
pub use cache::ConversionCache;
2523
pub use decimal::precision_to_duckdb_storage_size;
2624
use vortex::array::ArrayRef;
@@ -32,6 +30,7 @@ use vortex::array::arrays::List;
3230
use vortex::array::arrays::StructArray;
3331
use vortex::array::arrays::TemporalArray;
3432
use vortex::array::arrays::struct_::StructArrayExt;
33+
use vortex::buffer::BitChunks;
3534
use vortex::encodings::runend::RunEnd;
3635
use vortex::encodings::sequence::Sequence;
3736
use vortex::error::VortexResult;
@@ -187,17 +186,24 @@ fn new_array_exporter_with_flatten(
187186
}
188187
}
189188

190-
/// Copy the sliced bits from source into target.
189+
/// Copy the sliced bits from source into target, counting ones.
191190
///
192191
/// Offset and length are a _bit_ offset and a _bit_ length into source.
193192
///
194193
/// `target.len()` must equal `len`.
195-
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) {
196-
let (start, middle, end) = unsafe { target.align_to_mut::<u8>() };
197-
assert!(start.is_empty());
198-
assert!(end.is_empty());
199-
let target = &mut middle.view_bits_mut::<Lsb0>()[..len];
200-
target.copy_from_bitslice(&source.view_bits()[offset..][..len]);
194+
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) -> usize {
195+
let chunks = BitChunks::new(source, offset, len);
196+
let mut count = 0usize;
197+
for (i, chunk) in chunks.iter().enumerate() {
198+
target[i] = chunk;
199+
count += chunk.count_ones() as usize;
200+
}
201+
if chunks.remainder_len() > 0 {
202+
let remainder = chunks.remainder_bits();
203+
target[chunks.chunk_len()] = remainder;
204+
count += remainder.count_ones() as usize;
205+
}
206+
count
201207
}
202208

203209
#[cfg(test)]

vortex-duckdb/src/exporter/vector.rs

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
use vortex::mask::Mask;
5+
use vortex::mask::MaskValues;
56

7+
use crate::cpp::duckdb_vx_vector_set_all_valid;
68
use crate::duckdb::Value;
79
use crate::duckdb::VectorRef;
810
use crate::exporter::copy_from_slice;
@@ -11,44 +13,44 @@ impl VectorRef {
1113
pub(super) unsafe fn set_validity(&mut self, mask: &Mask, offset: usize, len: usize) -> bool {
1214
match mask {
1315
Mask::AllTrue(_) => {
14-
// We only need to blank out validity if there is already a slice allocated.
15-
// SAFETY: Caller guarantees this.
16-
unsafe { self.set_all_true_validity(len) }
16+
self.set_all_true_validity();
1717
false
1818
}
1919
Mask::AllFalse(_) => {
20-
// SAFETY: Caller guarantees this.
2120
self.set_all_false_validity();
2221
true
2322
}
24-
Mask::Values(arr) => {
25-
let true_count = arr.bit_buffer().slice(offset..(offset + len)).true_count();
26-
if true_count == len {
27-
unsafe { self.set_all_true_validity(len) }
28-
} else if true_count == 0 {
29-
self.set_all_false_validity()
30-
} else {
31-
let source = arr.bit_buffer().inner().as_slice();
32-
copy_from_slice(
33-
unsafe { self.ensure_validity_slice(len) },
34-
source,
35-
offset,
36-
len,
37-
);
38-
}
39-
40-
true_count == 0
41-
}
23+
Mask::Values(arr) => self.set_validity_with_array(&arr, len, offset),
4224
}
4325
}
4426

45-
pub(super) unsafe fn set_all_true_validity(&mut self, len: usize) {
46-
if let Some(validity) = unsafe { self.validity_bitslice_mut(len) } {
47-
validity.fill(true);
27+
fn set_validity_with_array(&mut self, arr: &MaskValues, len: usize, offset: usize) -> bool {
28+
let true_count = arr.true_count();
29+
if true_count == arr.len() {
30+
self.set_all_true_validity();
31+
return false;
32+
} else if true_count == 0 {
33+
self.set_all_false_validity();
34+
return true;
35+
}
36+
37+
let source = arr.bit_buffer().inner().as_slice();
38+
let dest = unsafe { self.ensure_validity_slice(len) };
39+
let true_count = copy_from_slice(dest, source, offset, len);
40+
if true_count == len {
41+
self.set_all_true_validity()
42+
} else if true_count == 0 {
43+
self.set_all_false_validity()
4844
}
45+
46+
true_count == 0
47+
}
48+
49+
fn set_all_true_validity(&mut self) {
50+
unsafe { duckdb_vx_vector_set_all_valid(self.as_ptr()) };
4951
}
5052

51-
pub(super) fn set_all_false_validity(&mut self) {
53+
fn set_all_false_validity(&mut self) {
5254
self.reference_value(&Value::null(&self.logical_type()));
5355
}
5456
}

0 commit comments

Comments
 (0)