Add FilterExec chain benchmark for partition-preserving double-walk cost

asolimando · asolimando · commit bf43bc71b2f4 · 2026-05-19T20:23:42.000+02:00
diff --git a/datafusion/physical-plan/benches/compute_statistics.rs b/datafusion/physical-plan/benches/compute_statistics.rs
@@ -32,14 +32,18 @@ use std::sync::Arc;
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use datafusion_common::ScalarValue;
 use datafusion_common::tree_node::TreeNodeRecursion;
 use datafusion_common::{Result, Statistics};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_physical_expr::PhysicalExpr;
+use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::execution_plan::{
     Boundedness, EmissionType, ExecutionPlan, PlanProperties,
 };
+use datafusion_physical_plan::filter::FilterExec;
 use datafusion_physical_plan::joins::CrossJoinExec;
 use datafusion_physical_plan::statistics_context::{StatisticsArgs, compute_statistics};
 use datafusion_physical_plan::{
@@ -102,9 +106,7 @@ impl ExecutionPlan for BenchLeaf {
 
     fn apply_expressions(
         &self,
-        _f: &mut dyn FnMut(
-            &dyn datafusion_physical_expr::PhysicalExpr,
-        ) -> Result<TreeNodeRecursion>,
+        _f: &mut dyn FnMut(&dyn PhysicalExpr) -> Result<TreeNodeRecursion>,
     ) -> Result<TreeNodeRecursion> {
         Ok(TreeNodeRecursion::Continue)
     }
@@ -145,6 +147,23 @@ fn build_cross_join_tree(depth: usize, next_col: &mut usize) -> Arc<dyn Executio
     Arc::new(CrossJoinExec::new(left, right))
 }
 
+/// Build: Filter^depth -> BenchLeaf (always-true predicate).
+/// Demonstrates the cost of the double-walk for partition-preserving chains:
+/// the framework first walks the entire tree computing None stats, then each
+/// filter requests per-partition stats on demand via compute_child_statistics.
+fn build_filter_chain(depth: usize) -> Arc<dyn ExecutionPlan> {
+    let mut plan: Arc<dyn ExecutionPlan> = Arc::new(BenchLeaf::new("a"));
+    let predicate: Arc<dyn PhysicalExpr> =
+        Arc::new(Literal::new(ScalarValue::Boolean(Some(true))));
+    for _ in 0..depth {
+        plan = Arc::new(
+            FilterExec::try_new(Arc::clone(&predicate), plan)
+                .expect("FilterExec::try_new failed"),
+        );
+    }
+    plan
+}
+
 /// Recursive walk without a shared cross-node cache, simulating pre-cache behavior.
 /// Each operator's internal `compute_child_statistics` call triggers a fresh
 /// subtree walk, resulting in O(n^2) total node visits for a chain of depth n.
@@ -211,6 +230,40 @@ fn bench_compute_statistics(c: &mut Criterion) {
         );
     }
     group.finish();
+
+    // --- Filter chain (partition-preserving linear plan) ---
+    // When called with Some(0), the framework first walks the entire tree
+    // computing None stats, then each filter requests Some(0) on demand.
+    // Both walks are cached, so the total cost is ~2n vs n node visits for None.
+    let mut group = c.benchmark_group("compute_statistics_filter_chain");
+    for depth in [10, 20, 50] {
+        let plan = build_filter_chain(depth);
+        group.bench_with_input(
+            BenchmarkId::new("cached_partition", depth),
+            &plan,
+            |b, plan| {
+                b.iter(|| compute_statistics(plan.as_ref(), Some(0)).unwrap());
+            },
+        );
+        group.bench_with_input(
+            BenchmarkId::new("cached_overall", depth),
+            &plan,
+            |b, plan| {
+                b.iter(|| compute_statistics(plan.as_ref(), None).unwrap());
+            },
+        );
+        group.bench_with_input(
+            BenchmarkId::new("no_shared_cache", depth),
+            &plan,
+            |b, plan| {
+                b.iter(|| {
+                    compute_statistics_without_shared_cache(plan.as_ref(), Some(0))
+                        .unwrap()
+                });
+            },
+        );
+    }
+    group.finish();
 }
 
 criterion_group!(benches, bench_compute_statistics);