Skip to content

Commit 41b875d

Browse files
committed
Add heap_size to statistics
This adds a heap_size method retruning the amount of memory a statistics struct allocates on the heap.
1 parent 9a9ff8d commit 41b875d

File tree

2 files changed

+59
-2
lines changed

2 files changed

+59
-2
lines changed

datafusion/common/src/stats.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,13 @@ impl Statistics {
321321
}
322322
}
323323

324+
/// Returns the memory size in bytes.
325+
pub fn heap_size(&self) -> usize {
326+
// column_statistics + num_rows + total_byte_size
327+
self.column_statistics.capacity() * size_of::<ColumnStatistics>()
328+
+ size_of::<Precision<usize>>() * 2
329+
}
330+
324331
/// Calculates `total_byte_size` based on the schema and `num_rows`.
325332
/// If any of the columns has non-primitive width, `total_byte_size` is set to inexact.
326333
pub fn calculate_total_byte_size(&mut self, schema: &Schema) {
@@ -1757,4 +1764,54 @@ mod tests {
17571764
// total_byte_size should fall back to scaling: 8000 * 0.1 = 800
17581765
assert_eq!(result.total_byte_size, Precision::Inexact(800));
17591766
}
1767+
1768+
#[test]
1769+
fn test_statistics_heap_size() {
1770+
let stats = Statistics {
1771+
num_rows: Precision::Exact(100),
1772+
total_byte_size: Precision::Exact(100),
1773+
column_statistics: vec![],
1774+
};
1775+
1776+
assert_eq!(stats.heap_size(), 32);
1777+
1778+
let stats = Statistics {
1779+
num_rows: Precision::Exact(100),
1780+
total_byte_size: Precision::Exact(100),
1781+
column_statistics: vec![ColumnStatistics {
1782+
null_count: Precision::Absent,
1783+
max_value: Precision::Absent,
1784+
min_value: Precision::Absent,
1785+
sum_value: Precision::Absent,
1786+
distinct_count: Precision::Absent,
1787+
byte_size: Precision::Exact(100),
1788+
}],
1789+
};
1790+
1791+
assert_eq!(stats.heap_size(), 320);
1792+
1793+
let stats = Statistics {
1794+
num_rows: Precision::Exact(100),
1795+
total_byte_size: Precision::Exact(100),
1796+
column_statistics: vec![
1797+
ColumnStatistics {
1798+
null_count: Precision::Absent,
1799+
max_value: Precision::Absent,
1800+
min_value: Precision::Absent,
1801+
sum_value: Precision::Absent,
1802+
distinct_count: Precision::Absent,
1803+
byte_size: Precision::Exact(100),
1804+
},
1805+
ColumnStatistics {
1806+
null_count: Precision::Exact(10),
1807+
max_value: Precision::Absent,
1808+
min_value: Precision::Absent,
1809+
sum_value: Precision::Absent,
1810+
distinct_count: Precision::Absent,
1811+
byte_size: Precision::Exact(100),
1812+
},
1813+
],
1814+
};
1815+
assert_eq!(stats.heap_size(), 608);
1816+
}
17601817
}

datafusion/execution/src/cache/cache_unit.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ impl FileStatisticsCache for DefaultFileStatisticsCache {
5555
num_rows: stats.num_rows,
5656
num_columns: stats.column_statistics.len(),
5757
table_size_bytes: stats.total_byte_size,
58-
statistics_size_bytes: 0, // TODO: set to the real size in the future
58+
statistics_size_bytes: stats.heap_size(),
5959
},
6060
);
6161
}
@@ -196,7 +196,7 @@ mod tests {
196196
num_rows: Precision::Absent,
197197
num_columns: 1,
198198
table_size_bytes: Precision::Absent,
199-
statistics_size_bytes: 0,
199+
statistics_size_bytes: 320,
200200
}
201201
)])
202202
);

0 commit comments

Comments
 (0)