Skip to content

Commit 84c05a6

Browse files
adriangbclaude
andcommitted
feat: thread statistics requests into ScanArgs
Add a `statistics_requests` field to `ScanArgs` (with `with_statistics_requests` / `statistics_requests` accessors) and have the physical planner thread `TableScan::statistics_requests` into it. This completes the request-side path: a custom optimizer rule annotates `TableScan`, and the request reaches a custom `TableProvider` in `scan_with_args`. DataFusion's own providers ignore the field; the default `ScanArgs` value is an empty slice. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent ebf6f8f commit 84c05a6

2 files changed

Lines changed: 26 additions & 1 deletion

File tree

datafusion/catalog/src/table.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use async_trait::async_trait;
2626
use datafusion_common::{Constraints, Statistics, not_impl_err};
2727
use datafusion_common::{Result, internal_err};
2828
use datafusion_expr::Expr;
29+
use datafusion_expr::statistics::StatisticsRequest;
2930

3031
use datafusion_expr::dml::InsertOp;
3132
use datafusion_expr::{
@@ -406,6 +407,7 @@ pub struct ScanArgs<'a> {
406407
filters: Option<&'a [Expr]>,
407408
projection: Option<&'a [usize]>,
408409
limit: Option<usize>,
410+
statistics_requests: &'a [StatisticsRequest],
409411
}
410412

411413
impl<'a> ScanArgs<'a> {
@@ -467,6 +469,27 @@ impl<'a> ScanArgs<'a> {
467469
pub fn limit(&self) -> Option<usize> {
468470
self.limit
469471
}
472+
473+
/// Set the statistics the caller would like the provider to answer for
474+
/// this scan, if it can do so cheaply.
475+
///
476+
/// Providers read these via [`Self::statistics_requests()`]; anything a
477+
/// provider cannot answer cheaply it simply ignores. DataFusion's own
478+
/// `TableProvider`s ignore this field — it exists so a request can be
479+
/// threaded from a custom optimizer rule (which annotates
480+
/// `TableScan::statistics_requests`) through to a custom provider.
481+
pub fn with_statistics_requests(
482+
mut self,
483+
statistics_requests: &'a [StatisticsRequest],
484+
) -> Self {
485+
self.statistics_requests = statistics_requests;
486+
self
487+
}
488+
489+
/// Get the statistics requests for the scan. Empty if none were set.
490+
pub fn statistics_requests(&self) -> &'a [StatisticsRequest] {
491+
self.statistics_requests
492+
}
470493
}
471494

472495
/// Result of a table scan operation from [`TableProvider::scan_with_args`].

datafusion/core/src/physical_planner.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ impl DefaultPhysicalPlanner {
645645
filters,
646646
fetch,
647647
projected_schema,
648+
statistics_requests,
648649
..
649650
} = scan;
650651

@@ -657,7 +658,8 @@ impl DefaultPhysicalPlanner {
657658
let opts = ScanArgs::default()
658659
.with_projection(projection.as_deref())
659660
.with_filters(Some(&filters_vec))
660-
.with_limit(*fetch);
661+
.with_limit(*fetch)
662+
.with_statistics_requests(statistics_requests);
661663
let res = source.scan_with_args(session_state, opts).await?;
662664
Arc::clone(res.plan())
663665
} else {

0 commit comments

Comments
 (0)