Skip to content

Commit 0a43978

Browse files
committed
duckdb: flatten runend arrays on export if requested
Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 254f91b commit 0a43978

3 files changed

Lines changed: 132 additions & 42 deletions

File tree

vortex-duckdb/src/exporter/list.rs

Lines changed: 94 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,11 @@ mod tests {
150150
use vortex::array::validity::Validity;
151151
use vortex::buffer::Buffer;
152152
use vortex::buffer::buffer;
153+
use vortex::dtype::DType;
154+
use vortex::dtype::PType;
155+
use vortex::encodings::runend::RunEnd;
153156
use vortex::error::VortexExpect;
157+
use vortex::error::VortexResult;
154158

155159
use super::*;
156160
use crate::SESSION;
@@ -160,13 +164,12 @@ mod tests {
160164

161165
#[test]
162166
fn test_export_empty_list() {
163-
let list = unsafe {
164-
ListArray::new_unchecked(
165-
Buffer::<u32>::empty().into_array(),
166-
Buffer::<u32>::empty().into_array(),
167-
Validity::AllValid,
168-
)
169-
}
167+
let list = ListArray::try_new(
168+
Buffer::<u32>::empty().into_array(),
169+
buffer![0u32].into_array(),
170+
Validity::AllValid,
171+
)
172+
.vortex_expect("list creation should succeed")
170173
.into_array();
171174

172175
let list_type = LogicalType::list_type(LogicalType::uint32())
@@ -189,20 +192,91 @@ mod tests {
189192
}
190193

191194
#[test]
192-
fn test_export_non_empty_list_of_strings() {
193-
let list = unsafe {
194-
ListArray::new_unchecked(
195-
<VarBinArray as FromIterator<_>>::from_iter([
196-
Some("abc"),
197-
Some("def"),
198-
None,
199-
Some("ghi"),
200-
])
201-
.into_array(),
202-
buffer![0u8, 1, 2, 3, 4].into_array(),
203-
Validity::from_iter([true, true, false, true]),
195+
fn test_export_u64_list() {
196+
let list = ListArray::try_new(
197+
buffer![1u64, 2, 3, 4, 5].into_array(),
198+
buffer![0u8, 1, 2, 3, 4, 5].into_array(),
199+
Validity::AllValid,
200+
)
201+
.vortex_expect("list creation should succeed")
202+
.into_array();
203+
assert_eq!(
204+
list.dtype(),
205+
&DType::List(
206+
Arc::new(DType::Primitive(PType::U64, false.into())),
207+
true.into()
204208
)
205-
}
209+
);
210+
211+
let list_type = LogicalType::list_type(LogicalType::uint64())
212+
.vortex_expect("LogicalTypeRef creation should succeed for test data");
213+
let mut chunk = DataChunk::new([list_type]);
214+
215+
let mut ctx = SESSION.create_execution_ctx();
216+
new_array_exporter(list, &ConversionCache::default(), &mut ctx)
217+
.unwrap()
218+
.export(0, 5, chunk.get_vector_mut(0), &mut ctx)
219+
.unwrap();
220+
chunk.set_len(5);
221+
222+
assert_eq!(
223+
format!("{}", String::try_from(&*chunk).unwrap()),
224+
r#"Chunk - [1 Columns]
225+
- FLAT UBIGINT[]: 5 = [ [1], [2], [3], [4], [5]]
226+
"#
227+
);
228+
}
229+
230+
// Ensure runend-compressed list is properly flattened
231+
#[test]
232+
fn test_export_list_with_runend_elements() -> VortexResult<()> {
233+
let mut ctx = SESSION.create_execution_ctx();
234+
let elements = RunEnd::encode(buffer![100u32, 100, 200, 200, 200].into_array(), &mut ctx)?;
235+
236+
let list = ListArray::try_new(
237+
elements.into_array(),
238+
buffer![0u32, 2, 5].into_array(),
239+
Validity::AllValid,
240+
)
241+
.vortex_expect("list creation should succeed")
242+
.into_array();
243+
244+
let list_type = LogicalType::list_type(LogicalType::uint32())
245+
.vortex_expect("LogicalTypeRef creation should succeed for test data");
246+
let mut chunk = DataChunk::new([list_type]);
247+
248+
new_array_exporter(list, &ConversionCache::default(), &mut ctx)?.export(
249+
0,
250+
2,
251+
chunk.get_vector_mut(0),
252+
&mut ctx,
253+
)?;
254+
chunk.set_len(2);
255+
256+
assert_eq!(
257+
format!("{}", String::try_from(&*chunk)?),
258+
r#"Chunk - [1 Columns]
259+
- FLAT UINTEGER[]: 2 = [ [100, 100], [200, 200, 200]]
260+
"#
261+
);
262+
263+
Ok(())
264+
}
265+
266+
#[test]
267+
fn test_export_non_empty_list_of_strings() {
268+
let list = ListArray::try_new(
269+
<VarBinArray as FromIterator<_>>::from_iter([
270+
Some("abc"),
271+
Some("def"),
272+
None,
273+
Some("ghi"),
274+
])
275+
.into_array(),
276+
buffer![0u8, 1, 2, 3, 4].into_array(),
277+
Validity::from_iter([true, true, false, true]),
278+
)
279+
.vortex_expect("list creation should succeed")
206280
.into_array();
207281

208282
let list_type = LogicalType::list_type(LogicalType::varchar())

vortex-duckdb/src/exporter/mod.rs

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,32 @@ pub trait ColumnExporter: 'static {
165165
) -> VortexResult<()>;
166166
}
167167

168+
fn canonicalize_array(
169+
array: ArrayRef,
170+
cache: &ConversionCache,
171+
ctx: &mut ExecutionCtx,
172+
) -> VortexResult<Box<dyn ColumnExporter>> {
173+
match array.execute::<Canonical>(ctx)? {
174+
Canonical::Null(_) => Ok(all_invalid::new_exporter()),
175+
Canonical::Bool(array) => bool::new_exporter(array, ctx),
176+
Canonical::Primitive(array) => primitive::new_exporter(array, ctx),
177+
Canonical::Decimal(array) => decimal::new_exporter(array, ctx),
178+
Canonical::VarBinView(array) => varbinview::new_exporter(array, ctx),
179+
Canonical::List(array) => list_view::new_exporter(array, cache, ctx),
180+
Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache, ctx),
181+
Canonical::Struct(array) => struct_::new_exporter(array, cache, ctx),
182+
Canonical::Extension(ext) => {
183+
if let Ok(temporal_array) = TemporalArray::try_from(ext) {
184+
return temporal::new_exporter(temporal_array, ctx);
185+
}
186+
vortex_bail!("no non-temporal extension exporter")
187+
}
188+
Canonical::Variant(_) => {
189+
vortex_bail!("Variant arrays can't be exported to DuckDB")
190+
}
191+
}
192+
}
193+
168194
fn new_array_exporter(
169195
array: ArrayRef,
170196
cache: &ConversionCache,
@@ -191,7 +217,7 @@ fn new_array_exporter_with_flatten(
191217
};
192218

193219
let array = match array.try_downcast::<RunEnd>() {
194-
Ok(array) => return run_end::new_exporter(array, cache, ctx),
220+
Ok(array) => return run_end::new_exporter_with_flatten(array, cache, ctx, flatten),
195221
Err(array) => array,
196222
};
197223

@@ -205,26 +231,7 @@ fn new_array_exporter_with_flatten(
205231
Err(array) => array,
206232
};
207233

208-
// Otherwise, we fall back to canonical
209-
match array.execute::<Canonical>(ctx)? {
210-
Canonical::Null(_) => Ok(all_invalid::new_exporter()),
211-
Canonical::Bool(array) => bool::new_exporter(array, ctx),
212-
Canonical::Primitive(array) => primitive::new_exporter(array, ctx),
213-
Canonical::Decimal(array) => decimal::new_exporter(array, ctx),
214-
Canonical::VarBinView(array) => varbinview::new_exporter(array, ctx),
215-
Canonical::List(array) => list_view::new_exporter(array, cache, ctx),
216-
Canonical::FixedSizeList(array) => fixed_size_list::new_exporter(array, cache, ctx),
217-
Canonical::Struct(array) => struct_::new_exporter(array, cache, ctx),
218-
Canonical::Extension(ext) => {
219-
if let Ok(temporal_array) = TemporalArray::try_from(ext) {
220-
return temporal::new_exporter(temporal_array, ctx);
221-
}
222-
vortex_bail!("no non-temporal extension exporter")
223-
}
224-
Canonical::Variant(_) => {
225-
vortex_bail!("Variant arrays can't be exported to DuckDB")
226-
}
227-
}
234+
canonicalize_array(array, cache, ctx)
228235
}
229236

230237
/// Copy the sliced bits from source into target, returning whether all copied bits are zero,

vortex-duckdb/src/exporter/run_end.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::marker::PhantomData;
55

66
use vortex::array::ArrayRef;
77
use vortex::array::ExecutionCtx;
8+
use vortex::array::IntoArray;
89
use vortex::array::arrays::PrimitiveArray;
910
use vortex::array::match_each_integer_ptype;
1011
use vortex::array::search_sorted::SearchSorted;
@@ -20,6 +21,7 @@ use crate::duckdb::SelectionVector;
2021
use crate::duckdb::VectorRef;
2122
use crate::exporter::ColumnExporter;
2223
use crate::exporter::cache::ConversionCache;
24+
use crate::exporter::canonicalize_array;
2325
use crate::exporter::new_array_exporter;
2426

2527
/// We export run-end arrays to a DuckDB dictionary vector, using a selection vector to
@@ -32,11 +34,18 @@ struct RunEndExporter<E: IntegerPType> {
3234
run_end_offset: usize,
3335
}
3436

35-
pub(crate) fn new_exporter(
37+
pub(crate) fn new_exporter_with_flatten(
3638
array: RunEndArray,
3739
cache: &ConversionCache,
3840
ctx: &mut ExecutionCtx,
41+
flatten: bool,
3942
) -> VortexResult<Box<dyn ColumnExporter>> {
43+
// Our canonicalization is 3x faster than creating a dictionary vector and
44+
// letting duckdb flatten it for us.
45+
if flatten {
46+
return canonicalize_array(array.into_array(), cache, ctx);
47+
}
48+
4049
let offset = array.offset();
4150
let ends = array.ends().clone();
4251
let values = array.values().clone();

0 commit comments

Comments
 (0)