Skip to content

Commit 17334a5

Browse files
author
Matthew Katz
committed
address comments
Signed-off-by: Matthew Katz <katz@spiraldb.com>
1 parent 362be7e commit 17334a5

5 files changed

Lines changed: 50 additions & 29 deletions

File tree

vortex-array/public-api.lock

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4454,8 +4454,6 @@ pub fn vortex_array::arrays::listview::ListViewArrayExt::compute_referenced_elem
44544454

44554455
pub fn vortex_array::arrays::listview::ListViewArrayExt::elements(&self) -> &vortex_array::ArrayRef
44564456

4457-
pub fn vortex_array::arrays::listview::ListViewArrayExt::estimate_density(&self, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<f32>
4458-
44594457
pub fn vortex_array::arrays::listview::ListViewArrayExt::list_elements_at(&self, usize) -> vortex_error::VortexResult<vortex_array::ArrayRef>
44604458

44614459
pub fn vortex_array::arrays::listview::ListViewArrayExt::listview_validity(&self) -> vortex_array::validity::Validity
@@ -4470,6 +4468,8 @@ pub fn vortex_array::arrays::listview::ListViewArrayExt::size_at(&self, usize) -
44704468

44714469
pub fn vortex_array::arrays::listview::ListViewArrayExt::sizes(&self) -> &vortex_array::ArrayRef
44724470

4471+
pub fn vortex_array::arrays::listview::ListViewArrayExt::upper_bound_density(&self, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<f32>
4472+
44734473
pub fn vortex_array::arrays::listview::ListViewArrayExt::verify_is_zero_copy_to_list(&self) -> bool
44744474

44754475
impl<T: vortex_array::TypedArrayRef<vortex_array::arrays::ListView>> vortex_array::arrays::listview::ListViewArrayExt for T
@@ -4480,8 +4480,6 @@ pub fn T::compute_referenced_elements_mask(&self, &mut vortex_array::ExecutionCt
44804480

44814481
pub fn T::elements(&self) -> &vortex_array::ArrayRef
44824482

4483-
pub fn T::estimate_density(&self, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<f32>
4484-
44854483
pub fn T::list_elements_at(&self, usize) -> vortex_error::VortexResult<vortex_array::ArrayRef>
44864484

44874485
pub fn T::listview_validity(&self) -> vortex_array::validity::Validity
@@ -4496,6 +4494,8 @@ pub fn T::size_at(&self, usize) -> usize
44964494

44974495
pub fn T::sizes(&self) -> &vortex_array::ArrayRef
44984496

4497+
pub fn T::upper_bound_density(&self, &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<f32>
4498+
44994499
pub fn T::verify_is_zero_copy_to_list(&self) -> bool
45004500

45014501
pub fn vortex_array::arrays::listview::list_from_list_view(vortex_array::arrays::ListViewArray) -> vortex_error::VortexResult<vortex_array::arrays::ListArray>

vortex-array/src/arrays/listview/array.rs

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,31 @@ impl Default for ListViewData {
315315
}
316316
}
317317

318+
/// Walks parallel `(offset, size)` slices and marks every referenced position in `buf`.
319+
///
320+
/// **Precondition**
321+
///
322+
/// `offsets` and `sizes` must be the same length (which is always the case in valid `ListViewArray`s).
323+
fn fill_referenced_mask<O: IntegerPType, S: IntegerPType>(
324+
buf: &mut BitBufferMut,
325+
offsets: &[O],
326+
sizes: &[S],
327+
) {
328+
let len = offsets.len();
329+
330+
assert_eq!(
331+
len,
332+
sizes.len(),
333+
"offsets and sizes must be the same length"
334+
);
335+
336+
for i in 0..len {
337+
let start: usize = offsets[i].as_();
338+
let size: usize = sizes[i].as_();
339+
buf.fill_range(start, start + size, true);
340+
}
341+
}
342+
318343
pub trait ListViewArrayExt: TypedArrayRef<ListView> {
319344
fn nullability(&self) -> crate::dtype::Nullability {
320345
match self.as_ref().dtype() {
@@ -402,35 +427,31 @@ pub trait ListViewArrayExt: TypedArrayRef<ListView> {
402427
}
403428

404429
/// Returns a [`Mask`] of length `elements.len()` where each bit is set iff that
405-
/// position in `elements` is referenced by at least one view.
430+
/// position in `elements` is referenced by at least one view. Caller must ensure `elements`
431+
/// is non-empty.
406432
///
407433
/// Walks every `(offset, size)` pair, canonicalizes both `offsets` and `sizes`,
408434
/// and allocates a `BitBuffer` of length `elements.len()`, so it is extremely costly.
409435
///
410436
/// **Preconditions**
411437
///
412-
/// Assumes that `self.elements()` is non-empty.
413-
#[allow(clippy::cognitive_complexity)]
438+
/// `self.elements()` must be non-empty.
414439
fn compute_referenced_elements_mask(&self, ctx: &mut ExecutionCtx) -> VortexResult<Mask> {
415-
debug_assert!(!self.elements().is_empty());
440+
assert!(!self.elements().is_empty());
416441
let len = self.elements().len();
417442

418443
let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?;
419444
let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?;
420445

421446
let mut buf = BitBufferMut::new_unset(len);
422-
let offset_len = self.as_ref().len();
423447

424448
match_each_integer_ptype!(offsets_primitive.ptype(), |O| {
425449
match_each_integer_ptype!(sizes_primitive.ptype(), |S| {
426-
let offsets_slice = offsets_primitive.as_slice::<O>();
427-
let sizes_slice = sizes_primitive.as_slice::<S>();
428-
429-
for i in 0..offset_len {
430-
let start: usize = offsets_slice[i].as_();
431-
let size: usize = sizes_slice[i].as_();
432-
buf.fill_range(start, start + size, true);
433-
}
450+
fill_referenced_mask::<O, S>(
451+
&mut buf,
452+
offsets_primitive.as_slice::<O>(),
453+
sizes_primitive.as_slice::<S>(),
454+
);
434455
})
435456
});
436457

@@ -464,7 +485,7 @@ pub trait ListViewArrayExt: TypedArrayRef<ListView> {
464485
/// Exact for non-overlapping views, but overcounts when multiple views share the same elements.
465486
///
466487
/// Returns `Ok(1.0)` when `elements` is empty.
467-
fn estimate_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
488+
fn upper_bound_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<f32> {
468489
let n_elts = self.elements().len();
469490
if n_elts == 0 {
470491
return Ok(1.0);
@@ -479,10 +500,10 @@ pub trait ListViewArrayExt: TypedArrayRef<ListView> {
479500
let sizes_sum = sizes
480501
.statistics()
481502
.compute_stat(Stat::Sum, ctx)?
482-
.ok_or_else(|| vortex_err!("Sum stat unavailable for sizes"))?
503+
.vortex_expect("sizes array has integer ptype elements")
483504
.as_primitive()
484505
.as_::<u64>()
485-
.ok_or_else(|| vortex_err!("could not cast sum of sizes to u64"))?;
506+
.vortex_expect("integer ptypes can be upcast to u64");
486507

487508
// if the same elements are referenced more than once the estimate may be
488509
// greater than 1.0, so clamp

vortex-array/src/arrays/listview/tests/density.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ fn full_density_no_overlap() -> VortexResult<()> {
3434
let mut ctx = test_execution_ctx();
3535
let lv = create_basic_listview();
3636
let exact = lv.compute_density(&mut ctx)?;
37-
let est = lv.estimate_density(&mut ctx)?;
37+
let est = lv.upper_bound_density(&mut ctx)?;
3838

3939
assert!((exact - 1.0).abs() < EPS);
4040
assert!((est - 1.0).abs() < EPS);
@@ -46,7 +46,7 @@ fn sparse_no_overlap_matches_exact() -> VortexResult<()> {
4646
let mut ctx = test_execution_ctx();
4747
let lv = create_large_listview();
4848
let exact = lv.compute_density(&mut ctx)?;
49-
let est = lv.estimate_density(&mut ctx)?;
49+
let est = lv.upper_bound_density(&mut ctx)?;
5050

5151
assert!((exact - 0.5).abs() < EPS);
5252
assert!((est - 0.5).abs() < EPS);
@@ -58,7 +58,7 @@ fn all_empty_lists_is_zero_density() -> VortexResult<()> {
5858
let mut ctx = test_execution_ctx();
5959
let lv = create_empty_lists_listview();
6060
let exact = lv.compute_density(&mut ctx)?;
61-
let est = lv.estimate_density(&mut ctx)?;
61+
let est = lv.upper_bound_density(&mut ctx)?;
6262

6363
assert_eq!(exact, 0.0);
6464
assert_eq!(est, 0.0);
@@ -70,7 +70,7 @@ fn overlap_full_coverage_clamps_estimate() -> VortexResult<()> {
7070
let mut ctx = test_execution_ctx();
7171
let lv = create_overlapping_listview();
7272
let exact = lv.compute_density(&mut ctx)?;
73-
let est = lv.estimate_density(&mut ctx)?;
73+
let est = lv.upper_bound_density(&mut ctx)?;
7474

7575
assert!((exact - 1.0).abs() < EPS);
7676
assert!((est - 1.0).abs() < EPS);
@@ -83,7 +83,7 @@ fn overlap_differential_exact_lower_than_estimate() -> VortexResult<()> {
8383
let lv = create_sparse_overlapping_listview();
8484

8585
let exact = lv.compute_density(&mut ctx)?;
86-
let est = lv.estimate_density(&mut ctx)?;
86+
let est = lv.upper_bound_density(&mut ctx)?;
8787

8888
assert!((exact - 0.25).abs() < EPS);
8989
assert!((est - 0.40).abs() < EPS);
@@ -96,7 +96,7 @@ fn empty_elements_returns_one() -> VortexResult<()> {
9696
let lv = create_empty_elements_listview();
9797

9898
let exact = lv.compute_density(&mut ctx)?;
99-
let est = lv.estimate_density(&mut ctx)?;
99+
let est = lv.upper_bound_density(&mut ctx)?;
100100

101101
assert!((exact - 1.0).abs() < EPS);
102102
assert!((est - 1.0).abs() < EPS);
@@ -113,7 +113,7 @@ fn estimate_uses_cached_sum_stat() -> VortexResult<()> {
113113
.statistics()
114114
.set(Stat::Sum, Precision::Exact(ScalarValue::from(5u64)));
115115

116-
let est = lv.estimate_density(&mut ctx)?;
116+
let est = lv.upper_bound_density(&mut ctx)?;
117117
assert!((est - 0.5).abs() < EPS);
118118
Ok(())
119119
}

vortex-array/src/arrow/executor/list_view.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pub(super) fn to_arrow_list_view<O: OffsetSizeTrait + IntegerPType>(
3434
// If the array is sufficiently sparse, rebuild before handing it to Arrow. Otherwise downstream
3535
// consumers hold an elements buffer containing unreferenced data in memory indefinitely,
3636
// and any compute pass over that buffer wastes work on data nothing references.
37-
let density = array.estimate_density(ctx)?;
37+
let density = array.upper_bound_density(ctx)?;
3838
let array = if density < DEFAULT_REBUILD_DENSITY_THRESHOLD {
3939
array.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
4040
} else {

vortex-duckdb/src/exporter/list_view.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ pub(crate) fn new_exporter(
5555
// If the array is sufficiently sparse, rebuild. Otherwise the DuckDB vector will
5656
// hold an elements buffer containing unreferenced data in memory indefinitely,
5757
// and any compute pass over that buffer wastes work on data nothing references.
58-
let density = array.estimate_density(ctx)?;
58+
let density = array.upper_bound_density(ctx)?;
5959
let array = if density < DEFAULT_REBUILD_DENSITY_THRESHOLD {
6060
array.rebuild(ListViewRebuildMode::MakeZeroCopyToList)?
6161
} else {

0 commit comments

Comments
 (0)