@@ -7,18 +7,21 @@ use std::sync::Arc;
77
88use num_traits:: AsPrimitive ;
99use smallvec:: smallvec;
10+ use vortex_buffer:: BitBufferMut ;
1011use vortex_error:: VortexExpect ;
1112use vortex_error:: VortexResult ;
1213use vortex_error:: vortex_bail;
1314use vortex_error:: vortex_ensure;
1415use vortex_error:: vortex_err;
16+ use vortex_mask:: Mask ;
1517
1618use crate :: ArrayRef ;
1719use crate :: ArraySlots ;
1820use crate :: LEGACY_SESSION ;
1921#[ expect( deprecated) ]
2022use crate :: ToCanonical as _;
2123use crate :: VortexSessionExecute ;
24+ use crate :: aggregate_fn:: fns:: sum:: sum;
2225use crate :: array:: Array ;
2326use crate :: array:: ArrayParts ;
2427use crate :: array:: TypedArrayRef ;
@@ -30,6 +33,9 @@ use crate::arrays::PrimitiveArray;
3033use crate :: arrays:: bool;
3134use crate :: dtype:: DType ;
3235use crate :: dtype:: IntegerPType ;
36+ use crate :: expr:: stats:: Precision ;
37+ use crate :: expr:: stats:: Stat ;
38+ use crate :: expr:: stats:: StatsProvider ;
3339use crate :: match_each_integer_ptype;
3440use crate :: validity:: Validity ;
3541
@@ -396,6 +402,92 @@ pub trait ListViewArrayExt: TypedArrayRef<ListView> {
396402 let sizes_primitive = self . sizes ( ) . to_primitive ( ) ;
397403 validate_zctl ( self . elements ( ) , offsets_primitive, sizes_primitive) . is_ok ( )
398404 }
405+
406+ /// Returns a [`Mask`] of length `elements.len()` where each bit is set iff that
407+ /// position in `elements` is referenced by at least one view.
408+ ///
409+ /// Walks every `(offset, size)` pair, canonicalizes both `offsets` and `sizes`,
410+ /// and allocates a `BitBuffer` of length `elements.len()`, so it is extremely costly.
411+ ///
412+ /// Returns `None` when `elements` is empty.
413+ fn compute_referenced_elements_mask ( & self ) -> Option < Mask > {
414+ let len = self . elements ( ) . len ( ) ;
415+ if len == 0 {
416+ return None ;
417+ }
418+
419+ let offsets_dtype = self . offsets ( ) . dtype ( ) ;
420+ let sizes_dtype = self . sizes ( ) . dtype ( ) ;
421+
422+ #[ expect( deprecated) ]
423+ let offsets_primitive = self . offsets ( ) . to_primitive ( ) ;
424+ #[ expect( deprecated) ]
425+ let sizes_primitive = self . sizes ( ) . to_primitive ( ) ;
426+
427+ let mut buf = BitBufferMut :: new_unset ( len) ;
428+ let offset_len = self . as_ref ( ) . len ( ) ;
429+
430+ match_each_integer_ptype ! ( offsets_dtype. as_ptype( ) , |O | {
431+ match_each_integer_ptype!( sizes_dtype. as_ptype( ) , |S | {
432+ let offsets_slice = offsets_primitive. as_slice:: <O >( ) ;
433+ let sizes_slice = sizes_primitive. as_slice:: <S >( ) ;
434+
435+ ( 0 ..offset_len) . for_each( |i| {
436+ let start = offsets_slice[ i] as usize ;
437+ let size = sizes_slice[ i] as usize ;
438+ buf. fill_range( start, start + size, true ) ;
439+ } ) ;
440+ } )
441+ } ) ;
442+
443+ Some ( Mask :: from_buffer ( buf. freeze ( ) ) )
444+ }
445+
446+ /// Exact fraction of `elements` referenced by some view, in `[0.0, 1.0]`. Extremely costly.
447+ ///
448+ /// Returns `None` when `elements` is empty.
449+ fn compute_density ( & self ) -> Option < f32 > {
450+ self . compute_referenced_elements_mask ( )
451+ . map ( |mask| match mask {
452+ Mask :: AllTrue ( _) => 1.0 ,
453+ Mask :: AllFalse ( _) => 0.0 ,
454+ Mask :: Values ( values) => values. true_count ( ) as f32 / self . elements ( ) . len ( ) as f32 ,
455+ } )
456+ }
457+
458+ /// Upper-bound estimate of [`compute_density`](Self::compute_density) via
459+ /// `sum(sizes) / elements.len()`, clamped to `[0.0, 1.0]`.
460+ ///
461+ /// Exact for non-overlapping views, but overcounts when multiple views share the same elements.
462+ ///
463+ /// Returns `Ok(None)` when `elements` is empty
464+ fn estimate_density ( & self ) -> VortexResult < Option < f32 > > {
465+ let n_elts = self . elements ( ) . len ( ) ;
466+ if n_elts == 0 {
467+ return Ok ( None ) ;
468+ }
469+
470+ let sizes = self . sizes ( ) ;
471+ if sizes. is_empty ( ) {
472+ return Ok ( Some ( 0.0 ) ) ;
473+ }
474+
475+ // Try to fetch the cached sum stat, otherwise fall back to calculating it on the spot
476+ let sizes_sum = if let Some ( Precision :: Exact ( scalar) ) = sizes. statistics ( ) . get ( Stat :: Sum )
477+ && let Some ( sum) = scalar. as_primitive ( ) . as_ :: < u64 > ( )
478+ {
479+ sum
480+ } else {
481+ sum ( sizes, & mut LEGACY_SESSION . create_execution_ctx ( ) ) ?
482+ . as_primitive ( )
483+ . as_ :: < u64 > ( )
484+ . unwrap ( )
485+ } ;
486+
487+ let estimate = ( sizes_sum as f32 / n_elts as f32 ) . clamp ( 0.0 , 1.0 ) ;
488+
489+ Ok ( Some ( estimate) )
490+ }
399491}
400492impl < T : TypedArrayRef < ListView > > ListViewArrayExt for T { }
401493
0 commit comments