Skip to content

Commit 44c511d

Browse files
authored
duckdb: Exit earlier on Validity::AllFalse (#7411)
1. Make duckdb all-invalid exporter a unit type: If we set underlying vector's validity to "all false", duckdb won't read underlying values so you don't need to fill them 2. Exit early on exporter branches if validity is all false. Saves a ConstantArray creation + execute Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent a004afa commit 44c511d

14 files changed

Lines changed: 57 additions & 86 deletions

File tree

vortex-duckdb/src/exporter/all_invalid.rs

Lines changed: 8 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,47 +3,32 @@
33

44
use vortex::array::ExecutionCtx;
55
use vortex::error::VortexResult;
6-
use vortex::error::vortex_ensure;
76

8-
use crate::duckdb::LogicalTypeRef;
9-
use crate::duckdb::Value;
107
use crate::duckdb::VectorRef;
118
use crate::exporter::ColumnExporter;
129

13-
struct AllInvalidExporter {
14-
len: usize,
15-
null_value: Value,
16-
}
10+
struct AllInvalidExporter;
1711

18-
pub(crate) fn new_exporter(len: usize, logical_type: &LogicalTypeRef) -> Box<dyn ColumnExporter> {
19-
Box::new(AllInvalidExporter {
20-
len,
21-
null_value: Value::null(logical_type),
22-
})
12+
pub(crate) fn new_exporter() -> Box<dyn ColumnExporter> {
13+
Box::new(AllInvalidExporter {})
2314
}
2415

2516
impl ColumnExporter for AllInvalidExporter {
2617
fn export(
2718
&self,
28-
offset: usize,
29-
len: usize,
19+
_offset: usize,
20+
_len: usize,
3021
vector: &mut VectorRef,
3122
_ctx: &mut ExecutionCtx,
3223
) -> VortexResult<()> {
33-
vortex_ensure!(
34-
offset + len <= self.len,
35-
"invalid exporter: offset + len must be less than or equal to len"
36-
);
37-
38-
vector.reference_value(&self.null_value);
24+
vector.set_all_false_validity();
3925
Ok(())
4026
}
4127
}
4228

4329
#[cfg(test)]
4430
mod tests {
4531
use vortex::array::VortexSessionExecute;
46-
use vortex::array::arrays::PrimitiveArray;
4732

4833
use super::*;
4934
use crate::SESSION;
@@ -52,12 +37,10 @@ mod tests {
5237

5338
#[test]
5439
fn all_null_array() {
55-
let arr = PrimitiveArray::from_option_iter::<i32, _>([None, None, None]);
5640
let ltype = LogicalType::int32();
41+
let mut chunk = DataChunk::new([ltype]);
5742

58-
let mut chunk = DataChunk::new([ltype.clone()]);
59-
60-
new_exporter(arr.len(), &ltype)
43+
new_exporter()
6144
.export(
6245
0,
6346
3,

vortex-duckdb/src/exporter/bool.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
use vortex::array::ExecutionCtx;
55
use vortex::array::arrays::BoolArray;
66
use vortex::array::arrays::bool::BoolArrayExt;
7+
use vortex::array::validity::Validity;
78
use vortex::buffer::BitBuffer;
89
use vortex::error::VortexResult;
910
use vortex::mask::Mask;
1011

11-
use crate::duckdb::LogicalType;
1212
use crate::duckdb::VectorRef;
1313
use crate::exporter::ColumnExporter;
1414
use crate::exporter::all_invalid;
@@ -24,11 +24,12 @@ pub(crate) fn new_exporter(
2424
) -> VortexResult<Box<dyn ColumnExporter>> {
2525
let len = array.len();
2626
let bits = array.to_bit_buffer();
27-
let validity = array.validity()?.to_array(len).execute::<Mask>(ctx)?;
2827

29-
if validity.all_false() {
30-
return Ok(all_invalid::new_exporter(len, &LogicalType::bool()));
28+
let validity = array.validity()?;
29+
if matches!(validity, Validity::AllInvalid) {
30+
return Ok(all_invalid::new_exporter());
3131
}
32+
let validity = validity.to_array(len).execute::<Mask>(ctx)?;
3233

3334
Ok(validity::new_exporter(
3435
validity,

vortex-duckdb/src/exporter/constant.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,14 +65,14 @@ impl ColumnExporter for ConstantExporter {
6565
fn export(
6666
&self,
6767
_offset: usize,
68-
len: usize,
68+
_len: usize,
6969
vector: &mut VectorRef,
7070
_ctx: &mut ExecutionCtx,
7171
) -> VortexResult<()> {
7272
match self.value.as_ref() {
7373
None => {
7474
// TODO(ngates): would be good if DuckDB supported constant null vectors.
75-
unsafe { vector.set_validity(&Mask::AllFalse(len), 0, len) };
75+
vector.set_all_false_validity();
7676
}
7777
Some(value) => {
7878
vector.reference_value(value);

vortex-duckdb/src/exporter/decimal.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ use vortex::array::ExecutionCtx;
88
use vortex::array::arrays::DecimalArray;
99
use vortex::array::arrays::decimal::DecimalDataParts;
1010
use vortex::array::match_each_decimal_value_type;
11+
use vortex::array::validity::Validity;
1112
use vortex::buffer::Buffer;
1213
use vortex::dtype::BigCast;
13-
use vortex::dtype::DType;
1414
use vortex::dtype::DecimalDType;
1515
use vortex::dtype::DecimalType;
1616
use vortex::dtype::NativeDecimalType;
@@ -19,7 +19,6 @@ use vortex::error::VortexResult;
1919
use vortex::error::vortex_bail;
2020
use vortex::mask::Mask;
2121

22-
use crate::duckdb::LogicalType;
2322
use crate::duckdb::VectorBuffer;
2423
use crate::duckdb::VectorRef;
2524
use crate::exporter::ColumnExporter;
@@ -49,13 +48,11 @@ pub(crate) fn new_exporter(
4948
values,
5049
} = array.into_data_parts();
5150
let dest_values_type = precision_to_duckdb_storage_size(&decimal_dtype)?;
52-
let nullability = validity.nullability();
53-
let validity = validity.to_array(len).execute::<Mask>(ctx)?;
5451

55-
if validity.all_false() {
56-
let ltype = LogicalType::try_from(DType::Decimal(decimal_dtype, nullability))?;
57-
return Ok(all_invalid::new_exporter(len, &ltype));
52+
if matches!(validity, Validity::AllInvalid) {
53+
return Ok(all_invalid::new_exporter());
5854
}
55+
let validity = validity.to_array(len).execute::<Mask>(ctx)?;
5956

6057
let exporter = if values_type == dest_values_type {
6158
match_each_decimal_value_type!(values_type, |D| {

vortex-duckdb/src/exporter/dict.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ use vortex::dtype::IntegerPType;
1717
use vortex::error::VortexResult;
1818
use vortex::mask::Mask;
1919

20-
use crate::duckdb::LogicalType;
2120
use crate::duckdb::ReusableDict;
2221
use crate::duckdb::SelectionVector;
2322
use crate::duckdb::VectorRef;
@@ -43,7 +42,6 @@ pub(crate) fn new_exporter_with_flatten(
4342
) -> VortexResult<Box<dyn ColumnExporter>> {
4443
// Grab the cache dictionary values.
4544
let values = array.values();
46-
let values_type: LogicalType = values.dtype().try_into()?;
4745
if let Some(constant) = values.as_opt::<Constant>() {
4846
return constant::new_exporter_with_mask(
4947
ConstantArray::new(constant.scalar().clone(), array.codes().len()),
@@ -57,7 +55,7 @@ pub(crate) fn new_exporter_with_flatten(
5755

5856
match codes_mask {
5957
Mask::AllTrue(_) => {}
60-
Mask::AllFalse(len) => return Ok(all_invalid::new_exporter(len, &values_type)),
58+
Mask::AllFalse(_) => return Ok(all_invalid::new_exporter()),
6159
Mask::Values(_) => {
6260
// duckdb cannot have a dictionary with validity in the codes, so flatten the array and
6361
// apply the validity mask there.

vortex-duckdb/src/exporter/fixed_size_list.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
use vortex::array::ExecutionCtx;
1212
use vortex::array::arrays::FixedSizeListArray;
1313
use vortex::array::arrays::fixed_size_list::FixedSizeListArrayExt;
14+
use vortex::array::validity::Validity;
1415
use vortex::error::VortexResult;
1516
use vortex::mask::Mask;
1617

1718
use super::ConversionCache;
1819
use super::all_invalid;
1920
use super::new_array_exporter_with_flatten;
2021
use super::validity;
21-
use crate::duckdb::LogicalType;
2222
use crate::duckdb::VectorRef;
2323
use crate::exporter::ColumnExporter;
2424

@@ -42,15 +42,14 @@ pub(crate) fn new_exporter(
4242
let parts = array.into_data_parts();
4343
let elements = parts.elements;
4444
let validity = parts.validity;
45-
let dtype = parts.dtype;
46-
let mask = validity.to_array(len).execute::<Mask>(ctx)?;
47-
let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?;
4845

49-
if mask.all_false() {
50-
let ltype = LogicalType::try_from(dtype)?;
51-
return Ok(all_invalid::new_exporter(len, &ltype));
46+
if matches!(validity, Validity::AllInvalid) {
47+
return Ok(all_invalid::new_exporter());
5248
}
5349

50+
let mask = validity.to_array(len).execute::<Mask>(ctx)?;
51+
let elements_exporter = new_array_exporter_with_flatten(elements, cache, ctx, true)?;
52+
5453
Ok(validity::new_exporter(
5554
mask,
5655
Box::new(FixedSizeListExporter {

vortex-duckdb/src/exporter/list.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use vortex::array::arrays::ListArray;
1010
use vortex::array::arrays::PrimitiveArray;
1111
use vortex::array::arrays::list::ListDataParts;
1212
use vortex::array::match_each_integer_ptype;
13+
use vortex::array::validity::Validity;
1314
use vortex::dtype::IntegerPType;
1415
use vortex::error::VortexResult;
1516
use vortex::error::vortex_err;
@@ -49,15 +50,14 @@ pub(crate) fn new_exporter(
4950
elements,
5051
offsets,
5152
validity,
52-
dtype,
53+
dtype: _dtype,
5354
} = array.into_data_parts();
5455
let num_elements = elements.len();
55-
let validity = validity.to_array(array_len).execute::<Mask>(ctx)?;
5656

57-
if validity.all_false() {
58-
let ltype = LogicalType::try_from(dtype)?;
59-
return Ok(all_invalid::new_exporter(array_len, &ltype));
57+
if matches!(validity, Validity::AllInvalid) {
58+
return Ok(all_invalid::new_exporter());
6059
}
60+
let validity = validity.to_array(array_len).execute::<Mask>(ctx)?;
6161

6262
let values_key = elements.addr();
6363
// Check if we have a cached vector and extract it if we do.

vortex-duckdb/src/exporter/list_view.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use vortex::array::arrays::ListViewArray;
1010
use vortex::array::arrays::PrimitiveArray;
1111
use vortex::array::arrays::listview::ListViewDataParts;
1212
use vortex::array::match_each_integer_ptype;
13-
use vortex::dtype::DType;
13+
use vortex::array::validity::Validity;
1414
use vortex::dtype::IntegerPType;
1515
use vortex::error::VortexResult;
1616
use vortex::error::vortex_err;
@@ -56,13 +56,11 @@ pub(crate) fn new_exporter(
5656
} = array.into_data_parts();
5757
// Cache an `elements` vector up front so that future exports can reference it.
5858
let num_elements = elements.len();
59-
let nullability = validity.nullability();
60-
let validity = validity.to_array(len).execute::<Mask>(ctx)?;
6159

62-
if validity.all_false() {
63-
let ltype = LogicalType::try_from(DType::List(elements_dtype, nullability))?;
64-
return Ok(all_invalid::new_exporter(len, &ltype));
60+
if matches!(validity, Validity::AllInvalid) {
61+
return Ok(all_invalid::new_exporter());
6562
}
63+
let validity = validity.to_array(len).execute::<Mask>(ctx)?;
6664

6765
let values_key = elements.addr();
6866
// Check if we have a cached vector and extract it if we do.

vortex-duckdb/src/exporter/mod.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ use vortex::error::VortexResult;
3737
use vortex::error::vortex_bail;
3838

3939
use crate::duckdb::DataChunkRef;
40-
use crate::duckdb::LogicalType;
4140
use crate::duckdb::VectorRef;
4241
use crate::duckdb::duckdb_vector_size;
4342

@@ -166,7 +165,7 @@ fn new_array_exporter_with_flatten(
166165

167166
// Otherwise, we fall back to canonical
168167
match array.execute::<Canonical>(ctx)? {
169-
Canonical::Null(array) => Ok(all_invalid::new_exporter(array.len(), &LogicalType::null())),
168+
Canonical::Null(_) => Ok(all_invalid::new_exporter()),
170169
Canonical::Bool(array) => bool::new_exporter(array, ctx),
171170
Canonical::Primitive(array) => primitive::new_exporter(array, ctx),
172171
Canonical::Decimal(array) => decimal::new_exporter(array, ctx),

vortex-duckdb/src/exporter/primitive.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ use std::marker::PhantomData;
66
use vortex::array::ExecutionCtx;
77
use vortex::array::arrays::PrimitiveArray;
88
use vortex::array::match_each_native_ptype;
9+
use vortex::array::validity::Validity;
910
use vortex::dtype::NativePType;
1011
use vortex::error::VortexResult;
1112
use vortex::mask::Mask;
1213

13-
use crate::duckdb::LogicalType;
1414
use crate::duckdb::VectorBuffer;
1515
use crate::duckdb::VectorRef;
1616
use crate::exporter::ColumnExporter;
@@ -28,15 +28,11 @@ pub fn new_exporter(
2828
array: PrimitiveArray,
2929
ctx: &mut ExecutionCtx,
3030
) -> VortexResult<Box<dyn ColumnExporter>> {
31-
let validity = array
32-
.validity()?
33-
.to_array(array.len())
34-
.execute::<Mask>(ctx)?;
35-
36-
if validity.all_false() {
37-
let ltype = LogicalType::try_from(array.ptype())?;
38-
return Ok(all_invalid::new_exporter(array.len(), &ltype));
39-
}
31+
let validity = array.validity()?;
32+
if matches!(validity, Validity::AllInvalid) {
33+
return Ok(all_invalid::new_exporter());
34+
};
35+
let validity = validity.to_array(array.len()).execute::<Mask>(ctx)?;
4036

4137
match_each_native_ptype!(array.ptype(), |T| {
4238
let buffer = array.to_buffer::<T>();

0 commit comments

Comments
 (0)