Skip to content

Commit 090157c

Browse files
committed
Improve naming and doc comments
1 parent 97efdb9 commit 090157c

File tree

2 files changed

+33
-12
lines changed

2 files changed

+33
-12
lines changed

datafusion/core/tests/statistics/mod.rs

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,30 @@ struct StatsVsMetricsDisplayOptions {
3131
display_output_bytes: bool,
3232
}
3333

34-
struct Node {
34+
/// Represents a node in a plan for which some statistics where estimated, and some metrics where
35+
/// collected at runtime.
36+
struct StatsCheckerNode {
37+
/// The name of the original [ExecutionPlan].
3538
name: String,
36-
stats: Statistics,
39+
/// The stats attached to the original [ExecutionPlan].
40+
stats: Arc<Statistics>,
41+
/// How many rows actually flowed through the [ExecutionPlan] at runtime.
3742
output_rows: Option<usize>,
43+
/// Now many bytes actually flowed through the [ExecutionPlan] at runtime.
3844
output_bytes: Option<usize>,
39-
children: Vec<Node>,
45+
/// The children of the [ExecutionPlan], represented as other [StatsCheckerNode].
46+
children: Vec<StatsCheckerNode>,
47+
/// Visualization options for this node.j
4048
opts: StatsVsMetricsDisplayOptions,
4149
}
4250

43-
impl Debug for Node {
51+
impl Debug for StatsCheckerNode {
4452
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
45-
fn fmt(f: &mut Formatter<'_>, node: &Node, depth: usize) -> std::fmt::Result {
53+
fn fmt(
54+
f: &mut Formatter<'_>,
55+
node: &StatsCheckerNode,
56+
depth: usize,
57+
) -> std::fmt::Result {
4658
for _ in 0..depth {
4759
write!(f, " ")?;
4860
}
@@ -85,17 +97,22 @@ impl Debug for Node {
8597
}
8698
}
8799

88-
impl Node {
100+
impl StatsCheckerNode {
101+
/// Given an already executed [ExecutionPlan], builds a [StatsCheckerNode] taking into account:
102+
/// - its planning time statistics
103+
/// - its runtime metrics
104+
///
105+
/// The plan passed in this constructor should have been fully executed.
89106
fn from_plan(
90107
plan: &Arc<dyn ExecutionPlan>,
91108
opts: StatsVsMetricsDisplayOptions,
92109
) -> Result<Self> {
93110
let mut children = vec![];
94111
for child in plan.children() {
95-
children.push(Node::from_plan(child, opts)?);
112+
children.push(StatsCheckerNode::from_plan(child, opts)?);
96113
}
97114

98-
let mut node = Node {
115+
let mut node = StatsCheckerNode {
99116
name: plan.name().to_string(),
100117
stats: plan.partition_statistics(None)?,
101118
output_rows: None,
@@ -113,8 +130,10 @@ impl Node {
113130
Ok(node)
114131
}
115132

133+
/// An accuracy score about number of rows that was estimated through [Statistics] vs what
134+
/// was actually collected at runtime.
116135
fn avg_row_accuracy(&self) -> usize {
117-
fn collect_accuracy(node: &Node) -> Vec<usize> {
136+
fn collect_accuracy(node: &StatsCheckerNode) -> Vec<usize> {
118137
let mut results = vec![];
119138
for child in &node.children {
120139
results.extend(collect_accuracy(child));
@@ -130,8 +149,10 @@ impl Node {
130149
accuracy.iter().sum::<usize>() / accuracy.len()
131150
}
132151

152+
/// An accuracy score about number of bytes that was estimated through [Statistics] vs what
153+
/// was actually collected at runtime.
133154
fn avg_byte_accuracy(&self) -> usize {
134-
fn collect_accuracy(node: &Node) -> Vec<usize> {
155+
fn collect_accuracy(node: &StatsCheckerNode) -> Vec<usize> {
135156
let mut results = vec![];
136157
for child in &node.children {
137158
results.extend(collect_accuracy(child));

datafusion/core/tests/statistics/tpcds.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use std::fs;
2727
#[cfg(test)]
2828
mod tests {
2929
use super::*;
30-
use crate::statistics::Node;
30+
use crate::statistics::StatsCheckerNode;
3131

3232
#[tokio::test]
3333
async fn tpcds_1() -> Result<()> {
@@ -837,7 +837,7 @@ mod tests {
837837
let df = df.unwrap();
838838
let plan = df.create_physical_plan().await?;
839839
collect(plan.clone(), ctx.task_ctx()).await?;
840-
let node = Node::from_plan(
840+
let node = StatsCheckerNode::from_plan(
841841
&plan,
842842
StatsVsMetricsDisplayOptions {
843843
display_output_bytes: true,

0 commit comments

Comments
 (0)