1515// specific language governing permissions and limitations
1616// under the License.
1717
18+ //! Estimating the heap-allocated memory owned by a value.
19+ //!
20+ //! The [`DFHeapSize`] trait reports the number of bytes a value owns on the
21+ //! heap, **excluding** the stack size of the value itself.
22+ //!
23+ //! Implementations need to use [`DFHeapSizeCtx`] that is pushed through every
24+ //! nested call. The context records which allocations have already been measured
25+ //! so they are only counted once.
26+ //!
27+ //! # Example
28+ //!
29+ //! ```
30+ //! use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx};
31+ //! use std::sync::Arc;
32+ //!
33+ //! let shared: Arc<String> = Arc::new("hello".to_string());
34+ //! let alias = Arc::clone(&shared);
35+ //!
36+ //! let mut ctx = DFHeapSizeCtx::default();
37+ //! // The shared allocation is counted once even when reached twice.
38+ //! let total = shared.heap_size(&mut ctx) + alias.heap_size(&mut ctx);
39+ //! assert_eq!(total, shared.heap_size(&mut DFHeapSizeCtx::default()));
40+ //! ```
41+
1842use crate :: stats:: Precision ;
1943use crate :: { ColumnStatistics , ScalarValue , Statistics , TableReference } ;
2044use arrow:: array:: {
@@ -32,12 +56,15 @@ use std::collections::HashMap;
3256use std:: fmt:: Debug ;
3357use std:: sync:: Arc ;
3458
35- /// This is a temporary solution until <https://github.com/apache/datafusion/pull/19599> and
36- /// <https://github.com/apache/arrow-rs/pull/9138> are resolved.
37- /// Trait for calculating the size of various containers
59+ /// Trait for computing how many bytes a value has allocated on the heap.
60+ ///
61+ /// Implementations need to use [`DFHeapSizeCtx`] that is pushed through every
62+ /// nested call. The context records which allocations have already been measured
63+ /// so they are only counted once.
64+ ///
3865pub trait DFHeapSize {
39- /// Return the size of any bytes allocated on the heap by this object ,
40- /// including heap memory in those structures
66+ /// Return the number of bytes this value has allocated on the heap,
67+ /// including heap memory owned transitively by nested values.
4168 ///
4269 /// Note that the size of the type itself is not included in the result --
4370 /// instead, that size is added by the caller (e.g. container).
@@ -521,6 +548,10 @@ impl DFHeapSize for usize {
521548mod tests {
522549 use super :: * ;
523550
551+ fn size < T : DFHeapSize + ?Sized > ( v : & T ) -> usize {
552+ v. heap_size ( & mut DFHeapSizeCtx :: default ( ) )
553+ }
554+
524555 #[ test]
525556 fn test_heap_size_arc_avoid_double_accounting ( ) {
526557 let a1 = Arc :: new ( vec ! [ 1 , 2 , 3 ] ) ;
@@ -558,4 +589,175 @@ mod tests {
558589
559590 assert_eq ! ( heap_size, heap_size_with_clones) ;
560591 }
592+
593+ #[ test]
594+ fn test_arc_dyn ( ) {
595+ let a1: Arc < dyn DFHeapSize > = Arc :: new ( String :: from ( "hello" ) ) ;
596+ let baseline = size ( & a1) ;
597+
598+ let a2 = Arc :: clone ( & a1) ;
599+ let mut ctx = DFHeapSizeCtx :: default ( ) ;
600+ let with_clones = a1. heap_size ( & mut ctx) + a2. heap_size ( & mut ctx) ;
601+ assert_eq ! ( baseline, with_clones) ;
602+ }
603+
604+ #[ test]
605+ fn test_primitives ( ) {
606+ assert_eq ! ( size( & true ) , 0 ) ;
607+ assert_eq ! ( size( & 0u8 ) , 0 ) ;
608+ assert_eq ! ( size( & 0u16 ) , 0 ) ;
609+ assert_eq ! ( size( & 0u32 ) , 0 ) ;
610+ assert_eq ! ( size( & 0u64 ) , 0 ) ;
611+ assert_eq ! ( size( & 0usize ) , 0 ) ;
612+ assert_eq ! ( size( & 0i8 ) , 0 ) ;
613+ assert_eq ! ( size( & 0i16 ) , 0 ) ;
614+ assert_eq ! ( size( & 0i32 ) , 0 ) ;
615+ assert_eq ! ( size( & 0i64 ) , 0 ) ;
616+ assert_eq ! ( size( & 0i128 ) , 0 ) ;
617+ assert_eq ! ( size( & i256:: ZERO ) , 0 ) ;
618+ assert_eq ! ( size( & 0f32 ) , 0 ) ;
619+ assert_eq ! ( size( & 0f64 ) , 0 ) ;
620+ assert_eq ! ( size( & f16:: from_f32( 0.0 ) ) , 0 ) ;
621+ }
622+
623+ #[ test]
624+ fn test_string ( ) {
625+ let mut s = String :: with_capacity ( 32 ) ;
626+ s. push_str ( "hello" ) ;
627+ assert_eq ! ( size( & s) , 32 ) ;
628+
629+ let empty = String :: new ( ) ;
630+ assert_eq ! ( size( & empty) , 0 ) ;
631+ }
632+
633+ #[ test]
634+ fn test_str ( ) {
635+ let s: & str = "hello" ;
636+ assert ! ( size( s) > 0 ) ;
637+ assert_eq ! ( size( "" ) , 0 ) ;
638+ }
639+
640+ #[ test]
641+ fn test_option ( ) {
642+ let some: Option < String > = Some ( String :: from ( "hi" ) ) ;
643+ assert_eq ! ( size( & some) , some. as_ref( ) . unwrap( ) . capacity( ) ) ;
644+
645+ let none: Option < String > = None ;
646+ assert_eq ! ( size( & none) , 0 ) ;
647+ }
648+
649+ #[ test]
650+ fn test_vec ( ) {
651+ let v: Vec < i32 > = vec ! [ 1 , 2 , 3 ] ;
652+ assert ! ( size( & v) > 0 ) ;
653+
654+ let strings = vec ! [ String :: from( "ab" ) , String :: from( "cdef" ) ] ;
655+ assert ! ( size( & strings) > 0 ) ;
656+
657+ let empty: Vec < i32 > = Vec :: new ( ) ;
658+ assert_eq ! ( size( & empty) , 0 ) ;
659+ }
660+
661+ #[ test]
662+ fn test_box ( ) {
663+ let b: Box < i32 > = Box :: new ( 42 ) ;
664+ assert ! ( size( & b) > 0 ) ;
665+
666+ let b: Box < String > = Box :: new ( String :: from ( "hello" ) ) ;
667+ assert ! ( size( & b) > 0 ) ;
668+ }
669+
670+ #[ test]
671+ fn test_tuple ( ) {
672+ let zero = ( 1i32 , 2i64 ) ;
673+ assert_eq ! ( size( & zero) , 0 ) ;
674+
675+ let t = ( String :: from ( "hello" ) , String :: from ( "world" ) ) ;
676+ assert ! ( size( & t) > 0 ) ;
677+ }
678+
679+ #[ test]
680+ fn test_hashmap ( ) {
681+ let m: HashMap < i32 , i32 > = HashMap :: new ( ) ;
682+ assert_eq ! ( size( & m) , 0 ) ;
683+
684+ let mut m: HashMap < String , String > = HashMap :: new ( ) ;
685+ m. insert ( "key" . into ( ) , "value" . into ( ) ) ;
686+
687+ assert ! ( size( & m) > 0 ) ;
688+ }
689+
690+
691+ #[ test]
692+ fn test_precision ( ) {
693+ let exact: Precision < usize > = Precision :: Exact ( 42 ) ;
694+ assert_eq ! ( size( & exact) , 0 ) ;
695+
696+ let inexact: Precision < usize > = Precision :: Inexact ( 99 ) ;
697+ assert_eq ! ( size( & inexact) , 0 ) ;
698+
699+ let absent: Precision < usize > = Precision :: Absent ;
700+ assert_eq ! ( size( & absent) , 0 ) ;
701+ }
702+
703+ #[ test]
704+ fn test_scalar_values ( ) {
705+ assert_eq ! ( size( & ScalarValue :: Null ) , 0 ) ;
706+ assert_eq ! ( size( & ScalarValue :: Int32 ( Some ( 42 ) ) ) , 0 ) ;
707+ assert_eq ! ( size( & ScalarValue :: Boolean ( Some ( true ) ) ) , 0 ) ;
708+ assert_eq ! ( size( & ScalarValue :: Float64 ( None ) ) , 0 ) ;
709+
710+ let sv = ScalarValue :: Utf8 ( Some ( String :: from ( "hello" ) ) ) ;
711+ assert_eq ! ( size( & sv) , "hello" . len( ) ) ;
712+
713+ let sv = ScalarValue :: Utf8 ( None ) ;
714+ assert_eq ! ( size( & sv) , 0 ) ;
715+ }
716+
717+ #[ test]
718+ fn test_data_type_primitive ( ) {
719+ assert_eq ! ( size( & DataType :: Int32 ) , 0 ) ;
720+ assert_eq ! ( size( & DataType :: Utf8 ) , 0 ) ;
721+ assert_eq ! ( size( & DataType :: Boolean ) , 0 ) ;
722+ assert_eq ! ( size( & DataType :: Null ) , 0 ) ;
723+ }
724+
725+ #[ test]
726+ fn test_data_type_with_field ( ) {
727+ let list = DataType :: List ( Arc :: new ( Field :: new ( "item" , DataType :: Int32 , true ) ) ) ;
728+ assert ! ( size( & list) > 0 ) ;
729+ }
730+
731+ #[ test]
732+ fn test_table_reference ( ) {
733+ let tr = TableReference :: bare ( "users" ) ;
734+ // Arc<str> overhead (two usize counts) plus the bytes of "users".
735+ assert ! ( size( & tr) > 0 ) ;
736+ let tr = TableReference :: full ( "cat" , "schema" , "users" ) ;
737+ assert ! ( size( & tr) > 0 ) ;
738+ }
739+
740+ #[ test]
741+ fn test_column_statistics ( ) {
742+ let mut col = ColumnStatistics :: new_unknown ( ) ;
743+ col. max_value = Precision :: Exact ( ScalarValue :: Utf8 ( Some ( "hello" . into ( ) ) ) ) ;
744+ col. min_value = Precision :: Exact ( ScalarValue :: Utf8 ( Some ( "ab" . into ( ) ) ) ) ;
745+ assert_eq ! ( size( & col) , "hello" . len( ) + "ab" . len( ) ) ;
746+
747+ let mut col = ColumnStatistics :: new_unknown ( ) ;
748+ col. max_value = Precision :: Exact ( ScalarValue :: Utf8 ( Some ( "hello" . into ( ) ) ) ) ;
749+ let stats = Statistics {
750+ num_rows : Precision :: Exact ( 10 ) ,
751+ total_byte_size : Precision :: Absent ,
752+ column_statistics : vec ! [ col] ,
753+ } ;
754+ assert ! ( size( & stats) > 0 ) ;
755+ }
756+
757+ #[ test]
758+ fn test_field ( ) {
759+ let field = Field :: new ( "temperature" , DataType :: Float64 , true ) ;
760+ assert ! ( size( & field) >= "temperature" . len( ) ) ;
761+ }
762+
561763}
0 commit comments