77//! to get a blanket [`TableFunction`] implementation covering init, scan, progress, filter
88//! pushdown, cardinality, and partitioning.
99
10+ use std:: cmp:: max;
1011use std:: fmt:: Debug ;
1112use std:: ops:: Range ;
1213use std:: sync:: Arc ;
@@ -76,6 +77,7 @@ use crate::duckdb::DuckdbStringMapRef;
7677use crate :: duckdb:: ExpressionRef ;
7778use crate :: duckdb:: LogicalType ;
7879use crate :: duckdb:: PartitionData ;
80+ use crate :: duckdb:: TableFilterClass ;
7981use crate :: duckdb:: TableFilterSetRef ;
8082use crate :: duckdb:: TableFunction ;
8183use crate :: duckdb:: TableInitInput ;
@@ -120,6 +122,7 @@ pub struct DataSourceBindData {
120122 data_source : Arc < MultiLayoutDataSource > ,
121123 filter_exprs : Vec < Expression > ,
122124 column_fields : Vec < DuckdbField > ,
125+ has_non_optional_filter : bool ,
123126}
124127
125128impl Clone for DataSourceBindData {
@@ -129,6 +132,7 @@ impl Clone for DataSourceBindData {
129132 // filter_exprs are consumed once in `init_global`.
130133 filter_exprs : vec ! [ ] ,
131134 column_fields : self . column_fields . clone ( ) ,
135+ has_non_optional_filter : self . has_non_optional_filter ,
132136 }
133137 }
134138}
@@ -254,6 +258,20 @@ impl ColumnStatisticsAggregate {
254258 }
255259}
256260
261+ // Duckdb requires post-filter cardinality estimates, otherwise join
262+ // planner may flip join sides which is a huge regression for some
263+ // queries i.e. 1000x for tpcds 85.
264+ //
265+ // See duckdb/src/optimizer/join_order/relation_statistics_helper.cpp
266+ const DEFAULT_SELECTIVITY : f64 = 0.2 ;
267+ fn postfilter_cardinality ( cardinality : u64 , has_non_optional_filter : bool ) -> u64 {
268+ if has_non_optional_filter {
269+ max ( 1 , ( cardinality as f64 * DEFAULT_SELECTIVITY ) as u64 )
270+ } else {
271+ cardinality
272+ }
273+ }
274+
257275impl < T : DataSourceTableFunction > TableFunction for T {
258276 type BindData = DataSourceBindData ;
259277 type GlobalState = DataSourceGlobal ;
@@ -277,6 +295,7 @@ impl<T: DataSourceTableFunction> TableFunction for T {
277295 data_source : Arc :: new ( data_source) ,
278296 filter_exprs : vec ! [ ] ,
279297 column_fields,
298+ has_non_optional_filter : false ,
280299 } )
281300 }
282301
@@ -299,13 +318,15 @@ impl<T: DataSourceTableFunction> TableFunction for T {
299318 row_range,
300319 file_selection,
301320 file_range,
321+ has_non_optional_filter,
302322 } = extract_table_filter_expr (
303323 init_input. table_filter_set ( ) ,
304324 column_ids,
305325 & bind_data. column_fields ,
306326 & bind_data. filter_exprs ,
307327 bind_data. data_source . dtype ( ) ,
308328 ) ?;
329+ bind_data. has_non_optional_filter = has_non_optional_filter;
309330
310331 let filter_expr_str = filter
311332 . as_ref ( )
@@ -504,17 +525,9 @@ impl<T: DataSourceTableFunction> TableFunction for T {
504525 let Some ( expr) = try_from_bound_expression ( expr) ? else {
505526 return Ok ( false ) ;
506527 } ;
507- bind_data. filter_exprs . push ( expr) ;
508528
509- // NOTE(ngates): Vortex does indeed run exact filters, so in theory we should return `true`
510- // here to tell DuckDB we've handled the filter. However, DuckDB applies some crude
511- // cardinality estimation heuristics (e.g. an equality filter => 20% selectivity) that
512- // means by returning false, DuckDB runs an additional filter (a little bit of overhead)
513- // but tends to end up with a better query plan.
514- // If we plumb row count estimation into the layout tree, perhaps we could use zone maps
515- // etc. to return estimates. But this function is probably called too late anyway. Maybe
516- // we need our own cardinality heuristics.
517- Ok ( false )
529+ bind_data. filter_exprs . push ( expr) ;
530+ Ok ( true )
518531 }
519532
520533 /// Get column-wise statistics. Available only if we're reading a single
@@ -542,8 +555,10 @@ impl<T: DataSourceTableFunction> TableFunction for T {
542555
543556 fn cardinality ( bind_data : & Self :: BindData ) -> Cardinality {
544557 match bind_data. data_source . row_count ( ) {
545- Some ( Precision :: Exact ( v) ) => Cardinality :: Maximum ( v) ,
546- Some ( Precision :: Inexact ( v) ) => Cardinality :: Estimate ( v) ,
558+ Some ( Precision :: Exact ( v) | Precision :: Inexact ( v) ) => {
559+ // Post-filter estimate is always a heuristic.
560+ Cardinality :: Estimate ( postfilter_cardinality ( v, bind_data. has_non_optional_filter ) )
561+ }
547562 None => Cardinality :: Unknown ,
548563 }
549564 }
@@ -687,6 +702,7 @@ struct FilterWithVirtualColumns {
687702 row_range : Option < Range < u64 > > ,
688703 file_selection : Selection ,
689704 file_range : Option < Range < u64 > > ,
705+ has_non_optional_filter : bool ,
690706}
691707
692708/// Creates a table filter expression, row selection, and row range from the table filter set,
@@ -698,6 +714,8 @@ fn extract_table_filter_expr(
698714 additional_filters : & [ Expression ] ,
699715 dtype : & DType ,
700716) -> VortexResult < FilterWithVirtualColumns > {
717+ let mut has_non_optional_filter = false ;
718+
701719 let mut table_filter_exprs: HashSet < Expression > = if let Some ( filter) = table_filter_set {
702720 filter
703721 . into_iter ( )
@@ -706,6 +724,8 @@ fn extract_table_filter_expr(
706724 !is_virtual_column ( column_ids[ idx_u] )
707725 } )
708726 . map ( |( idx, ex) | {
727+ has_non_optional_filter |= !matches ! ( ex. as_class( ) , TableFilterClass :: Optional ( _) ) ;
728+
709729 let idx_u: usize = idx. as_ ( ) ;
710730 let col_idx: usize = column_ids[ idx_u] . as_ ( ) ;
711731 let name = & column_fields. get ( col_idx) . vortex_expect ( "exists" ) . name ;
@@ -741,6 +761,7 @@ fn extract_table_filter_expr(
741761 row_range,
742762 file_selection,
743763 file_range,
764+ has_non_optional_filter,
744765 } ;
745766 Ok ( out)
746767}
0 commit comments