Skip to content

Commit 20bf843

Browse files
committed
test: add more tests and docs for heap size estimation
1 parent e80076b commit 20bf843

1 file changed

Lines changed: 205 additions & 5 deletions

File tree

datafusion/common/src/heap_size.rs

Lines changed: 205 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,30 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
//! Estimating the heap-allocated memory owned by a value.
19+
//!
20+
//! The [`DFHeapSize`] trait reports the number of bytes a value owns on the
21+
//! heap, **excluding** the stack size of the value itself.
22+
//!
23+
//! Implementations need to use [`DFHeapSizeCtx`] that is pushed through every
24+
//! nested call. The context records which allocations have already been measured
25+
//! so they are only counted once.
26+
//!
27+
//! # Example
28+
//!
29+
//! ```
30+
//! use datafusion_common::heap_size::{DFHeapSize, DFHeapSizeCtx};
31+
//! use std::sync::Arc;
32+
//!
33+
//! let shared: Arc<String> = Arc::new("hello".to_string());
34+
//! let alias = Arc::clone(&shared);
35+
//!
36+
//! let mut ctx = DFHeapSizeCtx::default();
37+
//! // The shared allocation is counted once even when reached twice.
38+
//! let total = shared.heap_size(&mut ctx) + alias.heap_size(&mut ctx);
39+
//! assert_eq!(total, shared.heap_size(&mut DFHeapSizeCtx::default()));
40+
//! ```
41+
1842
use crate::stats::Precision;
1943
use crate::{ColumnStatistics, ScalarValue, Statistics, TableReference};
2044
use arrow::array::{
@@ -32,12 +56,15 @@ use std::collections::HashMap;
3256
use std::fmt::Debug;
3357
use std::sync::Arc;
3458

35-
/// This is a temporary solution until <https://github.com/apache/datafusion/pull/19599> and
36-
/// <https://github.com/apache/arrow-rs/pull/9138> are resolved.
37-
/// Trait for calculating the size of various containers
59+
/// Trait for computing how many bytes a value has allocated on the heap.
60+
///
61+
/// Implementations need to use [`DFHeapSizeCtx`] that is pushed through every
62+
/// nested call. The context records which allocations have already been measured
63+
/// so they are only counted once.
64+
///
3865
pub trait DFHeapSize {
39-
/// Return the size of any bytes allocated on the heap by this object,
40-
/// including heap memory in those structures
66+
/// Return the number of bytes this value has allocated on the heap,
67+
/// including heap memory owned transitively by nested values.
4168
///
4269
/// Note that the size of the type itself is not included in the result --
4370
/// instead, that size is added by the caller (e.g. container).
@@ -521,6 +548,10 @@ impl DFHeapSize for usize {
521548
mod tests {
522549
use super::*;
523550

551+
fn size<T: DFHeapSize + ?Sized>(v: &T) -> usize {
552+
v.heap_size(&mut DFHeapSizeCtx::default())
553+
}
554+
524555
#[test]
525556
fn test_heap_size_arc_avoid_double_accounting() {
526557
let a1 = Arc::new(vec![1, 2, 3]);
@@ -558,4 +589,173 @@ mod tests {
558589

559590
assert_eq!(heap_size, heap_size_with_clones);
560591
}
592+
593+
#[test]
594+
fn test_arc_dyn() {
595+
let a1: Arc<dyn DFHeapSize> = Arc::new(String::from("hello"));
596+
let baseline = size(&a1);
597+
598+
let a2 = Arc::clone(&a1);
599+
let mut ctx = DFHeapSizeCtx::default();
600+
let with_clones = a1.heap_size(&mut ctx) + a2.heap_size(&mut ctx);
601+
assert_eq!(baseline, with_clones);
602+
}
603+
604+
#[test]
605+
fn test_primitives() {
606+
assert_eq!(size(&true), 0);
607+
assert_eq!(size(&0u8), 0);
608+
assert_eq!(size(&0u16), 0);
609+
assert_eq!(size(&0u32), 0);
610+
assert_eq!(size(&0u64), 0);
611+
assert_eq!(size(&0usize), 0);
612+
assert_eq!(size(&0i8), 0);
613+
assert_eq!(size(&0i16), 0);
614+
assert_eq!(size(&0i32), 0);
615+
assert_eq!(size(&0i64), 0);
616+
assert_eq!(size(&0i128), 0);
617+
assert_eq!(size(&i256::ZERO), 0);
618+
assert_eq!(size(&0f32), 0);
619+
assert_eq!(size(&0f64), 0);
620+
assert_eq!(size(&f16::from_f32(0.0)), 0);
621+
}
622+
623+
#[test]
624+
fn test_string() {
625+
let mut s = String::with_capacity(32);
626+
s.push_str("hello");
627+
assert_eq!(size(&s), 32);
628+
629+
let empty = String::new();
630+
assert_eq!(size(&empty), 0);
631+
}
632+
633+
#[test]
634+
fn test_str() {
635+
let s: &str = "hello";
636+
assert!(size(s) > 0);
637+
assert_eq!(size(""), 0);
638+
}
639+
640+
#[test]
641+
fn test_option() {
642+
let some: Option<String> = Some(String::from("hi"));
643+
assert_eq!(size(&some), some.as_ref().unwrap().capacity());
644+
645+
let none: Option<String> = None;
646+
assert_eq!(size(&none), 0);
647+
}
648+
649+
#[test]
650+
fn test_vec() {
651+
let v: Vec<i32> = vec![1, 2, 3];
652+
assert!(size(&v) > 0);
653+
654+
let strings = vec![String::from("ab"), String::from("cdef")];
655+
assert!(size(&strings) > 0);
656+
657+
let empty: Vec<i32> = Vec::new();
658+
assert_eq!(size(&empty), 0);
659+
}
660+
661+
#[test]
662+
fn test_box() {
663+
let b: Box<i32> = Box::new(42);
664+
assert!(size(&b) > 0);
665+
666+
let b: Box<String> = Box::new(String::from("hello"));
667+
assert!(size(&b) > 0);
668+
}
669+
670+
#[test]
671+
fn test_tuple() {
672+
let zero = (1i32, 2i64);
673+
assert_eq!(size(&zero), 0);
674+
675+
let t = (String::from("hello"), String::from("world"));
676+
assert!(size(&t) > 0);
677+
}
678+
679+
#[test]
680+
fn test_hashmap() {
681+
let m: HashMap<i32, i32> = HashMap::new();
682+
assert_eq!(size(&m), 0);
683+
684+
let mut m: HashMap<String, String> = HashMap::new();
685+
m.insert("key".into(), "value".into());
686+
687+
assert!(size(&m) > 0);
688+
}
689+
690+
#[test]
691+
fn test_precision() {
692+
let exact: Precision<usize> = Precision::Exact(42);
693+
assert_eq!(size(&exact), 0);
694+
695+
let inexact: Precision<usize> = Precision::Inexact(99);
696+
assert_eq!(size(&inexact), 0);
697+
698+
let absent: Precision<usize> = Precision::Absent;
699+
assert_eq!(size(&absent), 0);
700+
}
701+
702+
#[test]
703+
fn test_scalar_values() {
704+
assert_eq!(size(&ScalarValue::Null), 0);
705+
assert_eq!(size(&ScalarValue::Int32(Some(42))), 0);
706+
assert_eq!(size(&ScalarValue::Boolean(Some(true))), 0);
707+
assert_eq!(size(&ScalarValue::Float64(None)), 0);
708+
709+
let sv = ScalarValue::Utf8(Some(String::from("hello")));
710+
assert_eq!(size(&sv), "hello".len());
711+
712+
let sv = ScalarValue::Utf8(None);
713+
assert_eq!(size(&sv), 0);
714+
}
715+
716+
#[test]
717+
fn test_data_type_primitives() {
718+
assert_eq!(size(&DataType::Int32), 0);
719+
assert_eq!(size(&DataType::Utf8), 0);
720+
assert_eq!(size(&DataType::Boolean), 0);
721+
assert_eq!(size(&DataType::Null), 0);
722+
}
723+
724+
#[test]
725+
fn test_data_type_with_field() {
726+
let list = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
727+
assert!(size(&list) > 0);
728+
}
729+
730+
#[test]
731+
fn test_table_references() {
732+
let tr = TableReference::bare("users");
733+
// Arc<str> overhead (two usize counts) plus the bytes of "users".
734+
assert!(size(&tr) > 0);
735+
let tr = TableReference::full("cat", "schema", "users");
736+
assert!(size(&tr) > 0);
737+
}
738+
739+
#[test]
740+
fn test_column_statistics() {
741+
let mut col = ColumnStatistics::new_unknown();
742+
col.max_value = Precision::Exact(ScalarValue::Utf8(Some("hello".into())));
743+
col.min_value = Precision::Exact(ScalarValue::Utf8(Some("ab".into())));
744+
assert_eq!(size(&col), "hello".len() + "ab".len());
745+
746+
let mut col = ColumnStatistics::new_unknown();
747+
col.max_value = Precision::Exact(ScalarValue::Utf8(Some("hello".into())));
748+
let stats = Statistics {
749+
num_rows: Precision::Exact(10),
750+
total_byte_size: Precision::Absent,
751+
column_statistics: vec![col],
752+
};
753+
assert!(size(&stats) > 0);
754+
}
755+
756+
#[test]
757+
fn test_field() {
758+
let field = Field::new("temperature", DataType::Float64, true);
759+
assert!(size(&field) > 0);
760+
}
561761
}

0 commit comments

Comments
 (0)