Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ void duckdb_vx_string_vector_add_vector_data_buffer(duckdb_vector ffi_vector, du
// valid.
void duckdb_vx_vector_set_vector_data_buffer(duckdb_vector ffi_vector, duckdb_vx_vector_buffer buffer);

// Reset vector's validity mask to nullptr, making all vector's elements valid.
// vector must not be a DictionaryVector or a SequenceVector
void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector);

// Set the data pointer for the vector. This is the start of the values array in the vector.
void duckdb_vx_vector_set_data_ptr(duckdb_vector ffi_vector, void *ptr);

Expand Down
18 changes: 18 additions & 0 deletions vortex-duckdb/cpp/vector.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "include/duckdb_vx/vector.h"
#include "duckdb_vx/duckdb_diagnostics.h"

DUCKDB_INCLUDES_BEGIN
Expand Down Expand Up @@ -106,3 +107,20 @@ const char *duckdb_vector_to_string(duckdb_vector vector, unsigned long len, duc
return nullptr;
}
}

void duckdb_vx_vector_set_all_valid(duckdb_vector ffi_vector) {
using enum VectorType;
Vector &vector = *reinterpret_cast<Vector *>(ffi_vector);
const VectorType type = vector.GetVectorType();
D_ASSERT(type != DICTIONARY_VECTOR && type != SEQUENCE_VECTOR);
switch (type) {
case CONSTANT_VECTOR:
return ConstantVector::Validity(vector).Reset();
case FLAT_VECTOR:
return FlatVector::Validity(vector).Reset();
case FSST_VECTOR:
return FSSTVector::Validity(vector).Reset();
default:
Comment thread
myrrc marked this conversation as resolved.
__builtin_unreachable();
}
}
6 changes: 5 additions & 1 deletion vortex-duckdb/src/duckdb/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,11 @@ impl VectorRef {
///
/// The provided capacity *must* be the actual capacity of this vector.
pub unsafe fn validity_bitslice_mut(&mut self, capacity: usize) -> Option<&mut BitSlice<u64>> {
unsafe { self.validity_slice_mut(capacity) }.map(|slice| slice.view_bits_mut())
// capacity is always less than BitSlice<u64>::MAX_ELTS
unsafe {
self.validity_slice_mut(capacity)
.map(|slice| BitSlice::from_slice_unchecked_mut(slice))
}
}

pub fn validity_ref(&self, len: usize) -> ValidityRef<'_> {
Expand Down
83 changes: 54 additions & 29 deletions vortex-duckdb/src/exporter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ mod validity;
mod varbinview;
mod vector;

use bitvec::prelude::Lsb0;
use bitvec::view::BitView;
pub use cache::ConversionCache;
pub use decimal::precision_to_duckdb_storage_size;
use vortex::array::ArrayRef;
Expand All @@ -32,6 +30,7 @@ use vortex::array::arrays::List;
use vortex::array::arrays::StructArray;
use vortex::array::arrays::TemporalArray;
use vortex::array::arrays::struct_::StructArrayExt;
use vortex::buffer::BitChunks;
use vortex::encodings::runend::RunEnd;
use vortex::encodings::sequence::Sequence;
use vortex::error::VortexResult;
Expand Down Expand Up @@ -187,17 +186,33 @@ fn new_array_exporter_with_flatten(
}
}

/// Copy the sliced bits from source into target.
/// Copy the sliced bits from source into target, returning whether all copied bits are zero,
/// all are one, or mixed.
///
/// Offset and length are a _bit_ offset and a _bit_ length into source.
/// offset and len are a _bit_ offset and a _bit_ length into `source`.
///
/// `target.len()` must equal `len`.
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) {
let (start, middle, end) = unsafe { target.align_to_mut::<u8>() };
assert!(start.is_empty());
assert!(end.is_empty());
let target = &mut middle.view_bits_mut::<Lsb0>()[..len];
target.copy_from_bitslice(&source.view_bits()[offset..][..len]);
/// target must have at least len.div_ceil(64) elements.
/// Returns the number of ones in copied slice.
fn copy_from_slice(target: &mut [u64], source: &[u8], offset: usize, len: usize) -> usize {
if len == 0 {
return 0;
}

let mut ones = 0usize;
let chunks = BitChunks::new(source, offset, len);
let chunk_len = chunks.chunk_len();
let remainder_len = chunks.remainder_len();
let remainder = chunks.remainder_bits();
for (slot, chunk) in target.iter_mut().zip(chunks) {
*slot = chunk;
ones += chunk.count_ones() as usize;
}

if remainder_len > 0 {
target[chunk_len] = remainder;
ones += remainder.count_ones() as usize;
}
ones
}

#[cfg(test)]
Expand Down Expand Up @@ -359,44 +374,51 @@ mod tests {
fn test_copy_from_slice_empty_to_empty() {
let target = &mut [];
let source = Vec::<u8>::new();
copy_from_slice(target, &source, 0, 0);
assert_eq!(copy_from_slice(target, &source, 0, 0), 0);
}

#[test]
fn test_copy_from_slice_64_to_empty() {
let target = &mut [];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101];
copy_from_slice(target, &source, 0, 0);
copy_from_slice(target, &source, 5, 0);
copy_from_slice(target, &source, 8, 0);
assert_eq!(copy_from_slice(target, &source, 0, 0), 0);
assert_eq!(copy_from_slice(target, &source, 5, 0), 0);
assert_eq!(copy_from_slice(target, &source, 8, 0), 0);
}

#[test]
fn test_copy_from_slice_64_to_64() {
let mut target = vec![0u64];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101];
copy_from_slice(&mut target, &source, 0, 64);
assert_eq!(copy_from_slice(&mut target, &source, 0, 64), 21);
assert_eq!(
target[0], 0x65_64_34_33_32_03_02_01_u64,
"{:#08x} == {:#08x}",
target[0], 0x65_64_34_33_32_03_02_01_u64,
);
}

#[test]
fn test_copy_from_slice_64_ones() {
let mut target = [0u64];
let source = [u8::MAX; 8];
assert_eq!(copy_from_slice(&mut target, &source, 0, 64), 64);
}

#[test]
fn test_copy_from_slice_80_to_0() {
let target = &mut [];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
copy_from_slice(target, &source, 0, 0);
copy_from_slice(target, &source, 8, 0);
copy_from_slice(target, &source, 10, 0);
assert_eq!(copy_from_slice(target, &source, 0, 0), 0);
assert_eq!(copy_from_slice(target, &source, 8, 0), 0);
assert_eq!(copy_from_slice(target, &source, 10, 0), 0,);
}

#[test]
fn test_copy_from_slice_80_to_64_case_1() {
let mut target = [0u64];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
copy_from_slice(&mut target, &source, 16, 64);
assert_eq!(copy_from_slice(&mut target, &source, 16, 64), 34);
assert_eq!(
target[0], 0xff_fe_65_64_34_33_32_03_u64,
"{:#08x} == {:#08x}",
Expand All @@ -408,7 +430,7 @@ mod tests {
fn test_copy_from_slice_80_to_64_case_2() {
let mut target = [0u64];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
copy_from_slice(&mut target, &source, 8, 64);
assert_eq!(copy_from_slice(&mut target, &source, 8, 64), 27);
assert_eq!(
target[0], 0xfe_65_64_34_33_32_03_02_u64,
"{:#08x} == {:#08x}",
Expand All @@ -420,7 +442,7 @@ mod tests {
fn test_copy_from_slice_80_to_64_case_3() {
let mut target = [0u64];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
copy_from_slice(&mut target, &source, 0, 64);
assert_eq!(copy_from_slice(&mut target, &source, 0, 64), 21,);
assert_eq!(
target[0], 0x65_64_34_33_32_03_02_01_u64,
"{:#08x} == {:#08x}",
Expand All @@ -432,7 +454,7 @@ mod tests {
fn test_copy_from_slice_80_to_64_case_4() {
let mut target = [0u64];
let source = [1u8, 2, 3, 50, 51, 52, 100, 101, 254, 255];
copy_from_slice(&mut target, &source, 10, 64);
assert_eq!(copy_from_slice(&mut target, &source, 10, 64), 28,);
assert_eq!(
target[0],
0xff_99_59_0d_0c_cc_80_c0_u64, // Python: hex(0xff_fe_65_64_34_33_32_03_02 >> 2), then remove the high two hexits
Expand All @@ -454,7 +476,7 @@ mod tests {
let (_, middle, _) = unsafe { source.align_to::<u64>() };
assert!(!middle.is_empty());

copy_from_slice(&mut target, &source, 0, 128);
assert_eq!(copy_from_slice(&mut target, &source, 0, 128), 66,);
assert_eq!(
target[0], 0xfc_fd_fe_ff_04_03_02_01_u64,
"{:#08x} == {:#08x}",
Expand All @@ -466,7 +488,7 @@ mod tests {
target[1], 0xf9_fa_fb_fc_08_07_06_05_u64,
);

copy_from_slice(&mut target, &source, 8, 128);
assert_eq!(copy_from_slice(&mut target, &source, 8, 128), 66);
assert_eq!(
target[0], 0x05_fc_fd_fe_ff_04_03_02_u64,
"{:#08x} == {:#08x}",
Expand All @@ -478,7 +500,7 @@ mod tests {
target[1], 0x01_f9_fa_fb_fc_08_07_06_u64,
);

copy_from_slice(&mut target, &source, 8 * 8, 128);
assert_eq!(copy_from_slice(&mut target, &source, 8 * 8, 128), 66,);
assert_eq!(
target[0], 0xf9_fa_fb_fc_08_07_06_05_u64,
"{:#08x} == {:#08x}",
Expand All @@ -490,7 +512,7 @@ mod tests {
target[1], 0xfc_fd_fe_ff_04_03_02_01_u64,
);

copy_from_slice(&mut target, &source, 8 * 12, 128);
assert_eq!(copy_from_slice(&mut target, &source, 8 * 12, 128), 66,);
assert_eq!(
target[0], 0x04_03_02_01_f9_fa_fb_fc_u64,
"{:#08x} == {:#08x}",
Expand All @@ -502,7 +524,7 @@ mod tests {
target[1], 0x08_07_06_05_fc_fd_fe_ff_u64,
);

copy_from_slice(&mut target, &source, 8 * 12 + 4, 128);
assert_eq!(copy_from_slice(&mut target, &source, 8 * 12 + 4, 128), 66,);
// Find the 12th byte, skip the first hexit, take the next 32 hexits (i.e. 16 bytesor 128
// bits).
assert_eq!(
Expand All @@ -517,7 +539,10 @@ mod tests {
);

// Take the above and shift one bit towards the right-hand-side.
copy_from_slice(&mut target, &source, 8 * 12 + 4 + 1, 128);
assert_eq!(
copy_from_slice(&mut target, &source, 8 * 12 + 4 + 1, 128),
66,
);
assert_eq!(
target[0], 0xf8_20_18_10_0f_cf_d7_df_u64,
"{:#08x} == {:#08x}",
Expand Down
56 changes: 30 additions & 26 deletions vortex-duckdb/src/exporter/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex::mask::Mask;
use vortex::mask::MaskValues;

use crate::cpp::duckdb_vx_vector_set_all_valid;
use crate::duckdb::Value;
use crate::duckdb::VectorRef;
use crate::exporter::copy_from_slice;
Expand All @@ -11,44 +13,46 @@ impl VectorRef {
pub(super) unsafe fn set_validity(&mut self, mask: &Mask, offset: usize, len: usize) -> bool {
match mask {
Mask::AllTrue(_) => {
// We only need to blank out validity if there is already a slice allocated.
// SAFETY: Caller guarantees this.
unsafe { self.set_all_true_validity(len) }
self.set_all_true_validity();
false
}
Mask::AllFalse(_) => {
// SAFETY: Caller guarantees this.
self.set_all_false_validity();
true
}
Mask::Values(arr) => {
let true_count = arr.bit_buffer().slice(offset..(offset + len)).true_count();
if true_count == len {
unsafe { self.set_all_true_validity(len) }
} else if true_count == 0 {
self.set_all_false_validity()
} else {
let source = arr.bit_buffer().inner().as_slice();
copy_from_slice(
unsafe { self.ensure_validity_slice(len) },
source,
offset,
len,
);
}

true_count == 0
}
Mask::Values(arr) => self.set_validity_with_array(arr, len, offset),
}
}

pub(super) unsafe fn set_all_true_validity(&mut self, len: usize) {
if let Some(validity) = unsafe { self.validity_bitslice_mut(len) } {
validity.fill(true);
fn set_validity_with_array(&mut self, arr: &MaskValues, len: usize, offset: usize) -> bool {
let true_count = arr.true_count();
if true_count == arr.len() {
self.set_all_true_validity();
return false;
} else if true_count == 0 {
self.set_all_false_validity();
return true;
}
Comment thread
myrrc marked this conversation as resolved.

let dest = unsafe { self.ensure_validity_slice(len) };
let source = arr.bit_buffer().inner().as_slice();
let ones = copy_from_slice(dest, source, offset, len);
if ones == 0 {
self.set_all_false_validity();
true
} else if ones == len {
self.set_all_true_validity();
false
} else {
false
}
}

pub(super) fn set_all_false_validity(&mut self) {
fn set_all_true_validity(&mut self) {
unsafe { duckdb_vx_vector_set_all_valid(self.as_ptr()) };
}

fn set_all_false_validity(&mut self) {
self.reference_value(&Value::null(&self.logical_type()));
}
}
Loading