diff --git a/vortex-duckdb/src/exporter/all_invalid.rs b/vortex-duckdb/src/exporter/all_invalid.rs index 873dde3c089..cc5d5da9efb 100644 --- a/vortex-duckdb/src/exporter/all_invalid.rs +++ b/vortex-duckdb/src/exporter/all_invalid.rs @@ -3,39 +3,25 @@ use vortex::array::ExecutionCtx; use vortex::error::VortexResult; -use vortex::error::vortex_ensure; -use crate::duckdb::LogicalTypeRef; -use crate::duckdb::Value; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; -struct AllInvalidExporter { - len: usize, - null_value: Value, -} +struct AllInvalidExporter; -pub(crate) fn new_exporter(len: usize, logical_type: &LogicalTypeRef) -> Box { - Box::new(AllInvalidExporter { - len, - null_value: Value::null(logical_type), - }) +pub(crate) fn new_exporter() -> Box { + Box::new(AllInvalidExporter {}) } impl ColumnExporter for AllInvalidExporter { fn export( &self, - offset: usize, - len: usize, + _offset: usize, + _len: usize, vector: &mut VectorRef, _ctx: &mut ExecutionCtx, ) -> VortexResult<()> { - vortex_ensure!( - offset + len <= self.len, - "invalid exporter: offset + len must be less than or equal to len" - ); - - vector.reference_value(&self.null_value); + vector.set_all_false_validity(); Ok(()) } } @@ -43,7 +29,6 @@ impl ColumnExporter for AllInvalidExporter { #[cfg(test)] mod tests { use vortex::array::VortexSessionExecute; - use vortex::array::arrays::PrimitiveArray; use super::*; use crate::SESSION; @@ -52,12 +37,10 @@ mod tests { #[test] fn all_null_array() { - let arr = PrimitiveArray::from_option_iter::([None, None, None]); let ltype = LogicalType::int32(); + let mut chunk = DataChunk::new([ltype]); - let mut chunk = DataChunk::new([ltype.clone()]); - - new_exporter(arr.len(), <ype) + new_exporter() .export( 0, 3, diff --git a/vortex-duckdb/src/exporter/bool.rs b/vortex-duckdb/src/exporter/bool.rs index cb488ce5297..84fd17f0789 100644 --- a/vortex-duckdb/src/exporter/bool.rs +++ b/vortex-duckdb/src/exporter/bool.rs @@ -4,11 +4,11 @@ use vortex::array::ExecutionCtx; use vortex::array::arrays::BoolArray; use vortex::array::arrays::bool::BoolArrayExt; +use vortex::array::validity::Validity; use vortex::buffer::BitBuffer; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::duckdb::LogicalType; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; use crate::exporter::all_invalid; @@ -24,11 +24,12 @@ pub(crate) fn new_exporter( ) -> VortexResult> { let len = array.len(); let bits = array.to_bit_buffer(); - let validity = array.validity()?.to_array(len).execute::(ctx)?; - if validity.all_false() { - return Ok(all_invalid::new_exporter(len, &LogicalType::bool())); + let validity = array.validity()?; + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); } + let validity = validity.to_array(len).execute::(ctx)?; Ok(validity::new_exporter( validity, diff --git a/vortex-duckdb/src/exporter/constant.rs b/vortex-duckdb/src/exporter/constant.rs index 8bb05ac1edc..ade5fece825 100644 --- a/vortex-duckdb/src/exporter/constant.rs +++ b/vortex-duckdb/src/exporter/constant.rs @@ -65,14 +65,14 @@ impl ColumnExporter for ConstantExporter { fn export( &self, _offset: usize, - len: usize, + _len: usize, vector: &mut VectorRef, _ctx: &mut ExecutionCtx, ) -> VortexResult<()> { match self.value.as_ref() { None => { // TODO(ngates): would be good if DuckDB supported constant null vectors. - unsafe { vector.set_validity(&Mask::AllFalse(len), 0, len) }; + vector.set_all_false_validity(); } Some(value) => { vector.reference_value(value); diff --git a/vortex-duckdb/src/exporter/decimal.rs b/vortex-duckdb/src/exporter/decimal.rs index 2fe226b7f31..5b52f6d6c53 100644 --- a/vortex-duckdb/src/exporter/decimal.rs +++ b/vortex-duckdb/src/exporter/decimal.rs @@ -8,9 +8,9 @@ use vortex::array::ExecutionCtx; use vortex::array::arrays::DecimalArray; use vortex::array::arrays::decimal::DecimalDataParts; use vortex::array::match_each_decimal_value_type; +use vortex::array::validity::Validity; use vortex::buffer::Buffer; use vortex::dtype::BigCast; -use vortex::dtype::DType; use vortex::dtype::DecimalDType; use vortex::dtype::DecimalType; use vortex::dtype::NativeDecimalType; @@ -19,7 +19,6 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use vortex::mask::Mask; -use crate::duckdb::LogicalType; use crate::duckdb::VectorBuffer; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; @@ -49,13 +48,11 @@ pub(crate) fn new_exporter( values, } = array.into_data_parts(); let dest_values_type = precision_to_duckdb_storage_size(&decimal_dtype)?; - let nullability = validity.nullability(); - let validity = validity.to_array(len).execute::(ctx)?; - if validity.all_false() { - let ltype = LogicalType::try_from(DType::Decimal(decimal_dtype, nullability))?; - return Ok(all_invalid::new_exporter(len, <ype)); + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); } + let validity = validity.to_array(len).execute::(ctx)?; let exporter = if values_type == dest_values_type { match_each_decimal_value_type!(values_type, |D| { diff --git a/vortex-duckdb/src/exporter/dict.rs b/vortex-duckdb/src/exporter/dict.rs index 961c7e35b84..6377906940c 100644 --- a/vortex-duckdb/src/exporter/dict.rs +++ b/vortex-duckdb/src/exporter/dict.rs @@ -17,7 +17,6 @@ use vortex::dtype::IntegerPType; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::duckdb::LogicalType; use crate::duckdb::ReusableDict; use crate::duckdb::SelectionVector; use crate::duckdb::VectorRef; @@ -43,7 +42,6 @@ pub(crate) fn new_exporter_with_flatten( ) -> VortexResult> { // Grab the cache dictionary values. let values = array.values(); - let values_type: LogicalType = values.dtype().try_into()?; if let Some(constant) = values.as_opt::() { return constant::new_exporter_with_mask( ConstantArray::new(constant.scalar().clone(), array.codes().len()), @@ -57,7 +55,7 @@ pub(crate) fn new_exporter_with_flatten( match codes_mask { Mask::AllTrue(_) => {} - Mask::AllFalse(len) => return Ok(all_invalid::new_exporter(len, &values_type)), + Mask::AllFalse(_) => return Ok(all_invalid::new_exporter()), Mask::Values(_) => { // duckdb cannot have a dictionary with validity in the codes, so flatten the array and // apply the validity mask there. diff --git a/vortex-duckdb/src/exporter/fixed_size_list.rs b/vortex-duckdb/src/exporter/fixed_size_list.rs index d7fbfa64647..ed93ad2b9c1 100644 --- a/vortex-duckdb/src/exporter/fixed_size_list.rs +++ b/vortex-duckdb/src/exporter/fixed_size_list.rs @@ -11,6 +11,7 @@ use vortex::array::ExecutionCtx; use vortex::array::arrays::FixedSizeListArray; use vortex::array::arrays::fixed_size_list::FixedSizeListArrayExt; +use vortex::array::validity::Validity; use vortex::error::VortexResult; use vortex::mask::Mask; @@ -18,7 +19,6 @@ use super::ConversionCache; use super::all_invalid; use super::new_array_exporter_with_flatten; use super::validity; -use crate::duckdb::LogicalType; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; @@ -42,15 +42,14 @@ pub(crate) fn new_exporter( let parts = array.into_data_parts(); let elements = parts.elements; let validity = parts.validity; - let dtype = parts.dtype; - let mask = validity.to_array(len).execute::(ctx)?; - let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?; - if mask.all_false() { - let ltype = LogicalType::try_from(dtype)?; - return Ok(all_invalid::new_exporter(len, <ype)); + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); } + let mask = validity.to_array(len).execute::(ctx)?; + let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?; + Ok(validity::new_exporter( mask, Box::new(FixedSizeListExporter { diff --git a/vortex-duckdb/src/exporter/list.rs b/vortex-duckdb/src/exporter/list.rs index 1ae14869908..c532242fae7 100644 --- a/vortex-duckdb/src/exporter/list.rs +++ b/vortex-duckdb/src/exporter/list.rs @@ -10,6 +10,7 @@ use vortex::array::arrays::ListArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::list::ListDataParts; use vortex::array::match_each_integer_ptype; +use vortex::array::validity::Validity; use vortex::dtype::IntegerPType; use vortex::error::VortexResult; use vortex::error::vortex_err; @@ -49,15 +50,14 @@ pub(crate) fn new_exporter( elements, offsets, validity, - dtype, + dtype: _dtype, } = array.into_data_parts(); let num_elements = elements.len(); - let validity = validity.to_array(array_len).execute::(ctx)?; - if validity.all_false() { - let ltype = LogicalType::try_from(dtype)?; - return Ok(all_invalid::new_exporter(array_len, <ype)); + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); } + let validity = validity.to_array(array_len).execute::(ctx)?; let values_key = elements.addr(); // Check if we have a cached vector and extract it if we do. diff --git a/vortex-duckdb/src/exporter/list_view.rs b/vortex-duckdb/src/exporter/list_view.rs index dcc7c66ce31..4e88d8f4199 100644 --- a/vortex-duckdb/src/exporter/list_view.rs +++ b/vortex-duckdb/src/exporter/list_view.rs @@ -10,7 +10,7 @@ use vortex::array::arrays::ListViewArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::listview::ListViewDataParts; use vortex::array::match_each_integer_ptype; -use vortex::dtype::DType; +use vortex::array::validity::Validity; use vortex::dtype::IntegerPType; use vortex::error::VortexResult; use vortex::error::vortex_err; @@ -56,13 +56,11 @@ pub(crate) fn new_exporter( } = array.into_data_parts(); // Cache an `elements` vector up front so that future exports can reference it. let num_elements = elements.len(); - let nullability = validity.nullability(); - let validity = validity.to_array(len).execute::(ctx)?; - if validity.all_false() { - let ltype = LogicalType::try_from(DType::List(elements_dtype, nullability))?; - return Ok(all_invalid::new_exporter(len, <ype)); + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); } + let validity = validity.to_array(len).execute::(ctx)?; let values_key = elements.addr(); // Check if we have a cached vector and extract it if we do. diff --git a/vortex-duckdb/src/exporter/mod.rs b/vortex-duckdb/src/exporter/mod.rs index b5b26340efe..f76b3e41124 100644 --- a/vortex-duckdb/src/exporter/mod.rs +++ b/vortex-duckdb/src/exporter/mod.rs @@ -37,7 +37,6 @@ use vortex::error::VortexResult; use vortex::error::vortex_bail; use crate::duckdb::DataChunkRef; -use crate::duckdb::LogicalType; use crate::duckdb::VectorRef; use crate::duckdb::duckdb_vector_size; @@ -166,7 +165,7 @@ fn new_array_exporter_with_flatten( // Otherwise, we fall back to canonical match array.execute::(ctx)? { - Canonical::Null(array) => Ok(all_invalid::new_exporter(array.len(), &LogicalType::null())), + Canonical::Null(_) => Ok(all_invalid::new_exporter()), Canonical::Bool(array) => bool::new_exporter(array, ctx), Canonical::Primitive(array) => primitive::new_exporter(array, ctx), Canonical::Decimal(array) => decimal::new_exporter(array, ctx), diff --git a/vortex-duckdb/src/exporter/primitive.rs b/vortex-duckdb/src/exporter/primitive.rs index 7506cbdb93c..1bbec2d79c8 100644 --- a/vortex-duckdb/src/exporter/primitive.rs +++ b/vortex-duckdb/src/exporter/primitive.rs @@ -6,11 +6,11 @@ use std::marker::PhantomData; use vortex::array::ExecutionCtx; use vortex::array::arrays::PrimitiveArray; use vortex::array::match_each_native_ptype; +use vortex::array::validity::Validity; use vortex::dtype::NativePType; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::duckdb::LogicalType; use crate::duckdb::VectorBuffer; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; @@ -28,15 +28,11 @@ pub fn new_exporter( array: PrimitiveArray, ctx: &mut ExecutionCtx, ) -> VortexResult> { - let validity = array - .validity()? - .to_array(array.len()) - .execute::(ctx)?; - - if validity.all_false() { - let ltype = LogicalType::try_from(array.ptype())?; - return Ok(all_invalid::new_exporter(array.len(), <ype)); - } + let validity = array.validity()?; + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); + }; + let validity = validity.to_array(array.len()).execute::(ctx)?; match_each_native_ptype!(array.ptype(), |T| { let buffer = array.to_buffer::(); diff --git a/vortex-duckdb/src/exporter/sequence.rs b/vortex-duckdb/src/exporter/sequence.rs index a9a569d2a06..bc4b2731b40 100644 --- a/vortex-duckdb/src/exporter/sequence.rs +++ b/vortex-duckdb/src/exporter/sequence.rs @@ -35,6 +35,7 @@ impl ColumnExporter for SequenceExporter { ) -> VortexResult<()> { let offset = offset.as_i64(); let start = (offset * self.step) + self.start; + // TODO why don't we apply validity mask here? vector.to_sequence(start, self.step, len.as_u64()); Ok(()) diff --git a/vortex-duckdb/src/exporter/struct_.rs b/vortex-duckdb/src/exporter/struct_.rs index f53ec437bf2..9a07028686b 100644 --- a/vortex-duckdb/src/exporter/struct_.rs +++ b/vortex-duckdb/src/exporter/struct_.rs @@ -8,9 +8,9 @@ use vortex::array::arrays::StructArray; use vortex::array::arrays::bool::BoolArrayExt; use vortex::array::arrays::struct_::StructDataParts; use vortex::array::builtins::ArrayBuiltins; +use vortex::array::validity::Validity; use vortex::error::VortexResult; -use crate::duckdb::LogicalType; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; use crate::exporter::ConversionCache; @@ -30,16 +30,15 @@ pub(crate) fn new_exporter( let len = array.len(); let StructDataParts { validity, - struct_fields, + struct_fields: _struct_fields, fields, .. } = array.into_data_parts(); - let validity = validity.to_array(len).execute::(ctx)?; - if validity.to_bit_buffer().true_count() == 0 { - let ltype = LogicalType::try_from(struct_fields)?; - return Ok(all_invalid::new_exporter(len, <ype)); - } + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); + }; + let validity = validity.to_array(len).execute::(ctx)?; let children = fields .iter() diff --git a/vortex-duckdb/src/exporter/varbinview.rs b/vortex-duckdb/src/exporter/varbinview.rs index 8067b08a856..e17f63e2e2a 100644 --- a/vortex-duckdb/src/exporter/varbinview.rs +++ b/vortex-duckdb/src/exporter/varbinview.rs @@ -9,12 +9,12 @@ use vortex::array::arrays::VarBinViewArray; use vortex::array::arrays::varbinview::BinaryView; use vortex::array::arrays::varbinview::Inlined; use vortex::array::arrays::varbinview::VarBinViewDataParts; +use vortex::array::validity::Validity; use vortex::buffer::Buffer; use vortex::buffer::ByteBuffer; use vortex::error::VortexResult; use vortex::mask::Mask; -use crate::duckdb::LogicalType; use crate::duckdb::VectorBuffer; use crate::duckdb::VectorRef; use crate::exporter::ColumnExporter; @@ -34,15 +34,15 @@ pub(crate) fn new_exporter( let len = array.len(); let VarBinViewDataParts { validity, - dtype, + dtype: _dtype, views, buffers, } = array.into_data_parts(); - let validity = validity.to_array(len).execute::(ctx)?; - if validity.all_false() { - let ltype = LogicalType::try_from(dtype)?; - return Ok(all_invalid::new_exporter(len, <ype)); + + if matches!(validity, Validity::AllInvalid) { + return Ok(all_invalid::new_exporter()); } + let validity = validity.to_array(len).execute::(ctx)?; let buffers: Vec<_> = buffers.iter().cloned().map(|b| b.unwrap_host()).collect(); let buffers: Arc<[ByteBuffer]> = Arc::from(buffers); diff --git a/vortex-duckdb/src/exporter/vector.rs b/vortex-duckdb/src/exporter/vector.rs index c8b9e36f2eb..e6ada5c7dd7 100644 --- a/vortex-duckdb/src/exporter/vector.rs +++ b/vortex-duckdb/src/exporter/vector.rs @@ -48,11 +48,11 @@ impl VectorRef { } } - fn set_all_true_validity(&mut self) { + pub fn set_all_true_validity(&mut self) { unsafe { duckdb_vx_vector_set_all_valid(self.as_ptr()) }; } - fn set_all_false_validity(&mut self) { + pub fn set_all_false_validity(&mut self) { self.reference_value(&Value::null(&self.logical_type())); } }