@@ -26,6 +26,7 @@ use async_trait::async_trait;
2626use datafusion_common:: { Constraints , Statistics , not_impl_err} ;
2727use datafusion_common:: { Result , internal_err} ;
2828use datafusion_expr:: Expr ;
29+ use datafusion_expr:: statistics:: StatisticsRequest ;
2930
3031use datafusion_expr:: dml:: InsertOp ;
3132use datafusion_expr:: {
@@ -406,6 +407,7 @@ pub struct ScanArgs<'a> {
406407 filters : Option < & ' a [ Expr ] > ,
407408 projection : Option < & ' a [ usize ] > ,
408409 limit : Option < usize > ,
410+ statistics_requests : & ' a [ StatisticsRequest ] ,
409411}
410412
411413impl < ' a > ScanArgs < ' a > {
@@ -467,6 +469,32 @@ impl<'a> ScanArgs<'a> {
467469 pub fn limit ( & self ) -> Option < usize > {
468470 self . limit
469471 }
472+
473+ /// Specifies the statistics the caller may use when optimizing the query.
474+ ///
475+ /// This is intended to allow the `TableProvider` to cheaply provide
476+ /// statistics that may help, such as those it has in an in-memory catalog
477+ /// or from some other metadata source.
478+ ///
479+ /// `TableProvider`s read these via [`Self::statistics_requests()`]; anything
480+ /// a `TableProvider` cannot answer cheaply it simply ignores. DataFusion's
481+ /// own `TableProvider`s ignore this field — it exists so a request can be
482+ /// threaded from a custom optimizer rule (which annotates
483+ /// `TableScan::statistics_requests`) through to a custom `TableProvider`.
484+ pub fn with_statistics_requests (
485+ mut self ,
486+ statistics_requests : & ' a [ StatisticsRequest ] ,
487+ ) -> Self {
488+ self . statistics_requests = statistics_requests;
489+ self
490+ }
491+
492+ /// Get the statistics requests for the scan. Empty if none were set.
493+ ///
494+ /// See [`Self::with_statistics_requests`] for more details
495+ pub fn statistics_requests ( & self ) -> & ' a [ StatisticsRequest ] {
496+ self . statistics_requests
497+ }
470498}
471499
472500/// Result of a table scan operation from [`TableProvider::scan_with_args`].
0 commit comments