77//! to get a blanket [`TableFunction`] implementation covering init, scan, progress, filter
88//! pushdown, cardinality, and partitioning.
99
10+ use std:: cmp:: max;
1011use std:: fmt:: Debug ;
1112use std:: ops:: Range ;
1213use std:: sync:: Arc ;
@@ -74,6 +75,7 @@ use crate::duckdb::DuckdbStringMapRef;
7475use crate :: duckdb:: ExpressionRef ;
7576use crate :: duckdb:: LogicalType ;
7677use crate :: duckdb:: PartitionData ;
78+ use crate :: duckdb:: TableFilterClass ;
7779use crate :: duckdb:: TableFilterSetRef ;
7880use crate :: duckdb:: TableFunction ;
7981use crate :: duckdb:: TableInitInput ;
@@ -118,6 +120,7 @@ pub struct DataSourceBindData {
118120 data_source : Arc < MultiLayoutDataSource > ,
119121 filter_exprs : Vec < Expression > ,
120122 column_fields : Vec < DuckdbField > ,
123+ has_non_optional_filter : bool ,
121124}
122125
123126impl Clone for DataSourceBindData {
@@ -127,6 +130,7 @@ impl Clone for DataSourceBindData {
127130 // filter_exprs are consumed once in `init_global`.
128131 filter_exprs : vec ! [ ] ,
129132 column_fields : self . column_fields . clone ( ) ,
133+ has_non_optional_filter : self . has_non_optional_filter ,
130134 }
131135 }
132136}
@@ -252,6 +256,23 @@ impl ColumnStatisticsAggregate {
252256 }
253257}
254258
259+ // Duckdb requires post-filter cardinality estimates, otherwise join
260+ // planner may flip join sides which is a huge regression for some
261+ // queries i.e. 1000x for tpcds 85.
262+ //
263+ // See duckdb/src/optimizer/join_order/relation_statistics_helper.cpp
264+ const DEFAULT_SELECTIVITY : f64 = 0.2 ;
265+ fn postfilter_cardinality ( initial_cardinality : u64 , has_non_optional_filter : bool ) -> u64 {
266+ if has_non_optional_filter {
267+ let post_cardinality = initial_cardinality as f64 * DEFAULT_SELECTIVITY ;
268+ // Clamp intentionally
269+ let post_cardinality: u64 = post_cardinality. as_ ( ) ;
270+ max ( 1 , post_cardinality)
271+ } else {
272+ initial_cardinality
273+ }
274+ }
275+
255276impl < T : DataSourceTableFunction > TableFunction for T {
256277 type BindData = DataSourceBindData ;
257278 type GlobalState = DataSourceGlobal ;
@@ -275,6 +296,7 @@ impl<T: DataSourceTableFunction> TableFunction for T {
275296 data_source : Arc :: new ( data_source) ,
276297 filter_exprs : vec ! [ ] ,
277298 column_fields,
299+ has_non_optional_filter : false ,
278300 } )
279301 }
280302
@@ -297,13 +319,15 @@ impl<T: DataSourceTableFunction> TableFunction for T {
297319 row_range,
298320 file_selection,
299321 file_range,
322+ has_non_optional_filter,
300323 } = extract_table_filter_expr (
301324 init_input. table_filter_set ( ) ,
302325 column_ids,
303326 & bind_data. column_fields ,
304327 & bind_data. filter_exprs ,
305328 bind_data. data_source . dtype ( ) ,
306329 ) ?;
330+ bind_data. has_non_optional_filter |= has_non_optional_filter;
307331
308332 debug ! (
309333 %projection,
@@ -502,22 +526,37 @@ impl<T: DataSourceTableFunction> TableFunction for T {
502526 expr : & ExpressionRef ,
503527 ) -> VortexResult < bool > {
504528 debug ! ( %expr, "pushing down expression" ) ;
529+
505530 let Some ( expr) = try_from_bound_expression ( expr) ? else {
506531 debug ! ( %expr, "failed to push down expression" ) ;
507532 return Ok ( false ) ;
508533 } ;
509- debug ! ( %expr, "pushed down expression" ) ;
510- bind_data. filter_exprs . push ( expr) ;
511534
512- // NOTE(ngates): Vortex does indeed run exact filters, so in theory we should return `true`
513- // here to tell DuckDB we've handled the filter. However, DuckDB applies some crude
514- // cardinality estimation heuristics (e.g. an equality filter => 20% selectivity) that
515- // means by returning false, DuckDB runs an additional filter (a little bit of overhead)
516- // but tends to end up with a better query plan.
517- // If we plumb row count estimation into the layout tree, perhaps we could use zone maps
518- // etc. to return estimates. But this function is probably called too late anyway. Maybe
519- // we need our own cardinality heuristics.
520- Ok ( false )
535+ // Duckdb calls pushdown_complex_filter during planning phase.
536+ // If all filters are pushed down, duckdb enables a LEFT_DELIM_JOIN ->
537+ // COMPARISON_JOIN (HASH_JOIN) optimization:
538+ // duckdb/src/optimizer/deliminator.cpp: Deliminator::HasSelection,
539+ // Deliminator::Optimize.
540+ //
541+ // This leads to a massive regression on tpch sf=10 q17 and other
542+ // benchmarks.
543+ //
544+ // This bug is reported to Duckdb
545+ // https://github.com/duckdb/duckdb/issues/22669
546+ //
547+ // As a hack, report first filter as not pushed.
548+ // As pushdown_complex_filter is called during planning phase,
549+ // no table filters are pushed yet (this will happen in init_global),
550+ // so our first filter is a first filter indeed.
551+ let report_pushed = bind_data. has_non_optional_filter ;
552+
553+ // Only table filters may be optional, any complex filter is
554+ // non-optional by definition.
555+ bind_data. has_non_optional_filter = true ;
556+
557+ debug ! ( %expr, report_pushed, "pushed down expression" ) ;
558+ bind_data. filter_exprs . push ( expr) ;
559+ Ok ( report_pushed)
521560 }
522561
523562 /// Get column-wise statistics. Available only if we're reading a single
@@ -545,8 +584,10 @@ impl<T: DataSourceTableFunction> TableFunction for T {
545584
546585 fn cardinality ( bind_data : & Self :: BindData ) -> Cardinality {
547586 match bind_data. data_source . row_count ( ) {
548- Some ( Precision :: Exact ( v) ) => Cardinality :: Maximum ( v) ,
549- Some ( Precision :: Inexact ( v) ) => Cardinality :: Estimate ( v) ,
587+ Some ( Precision :: Exact ( v) | Precision :: Inexact ( v) ) => {
588+ // Post-filter estimate is always a heuristic.
589+ Cardinality :: Estimate ( postfilter_cardinality ( v, bind_data. has_non_optional_filter ) )
590+ }
550591 None => Cardinality :: Unknown ,
551592 }
552593 }
@@ -565,8 +606,8 @@ impl<T: DataSourceTableFunction> TableFunction for T {
565606 fn to_string ( bind_data : & Self :: BindData , map : & mut DuckdbStringMapRef ) {
566607 map. push ( "Function" , "Vortex Scan" ) ;
567608 if !bind_data. filter_exprs . is_empty ( ) {
568- let mut filters = bind_data. filter_exprs . iter ( ) . map ( |f| format ! ( "{}" , f ) ) ;
569- map. push ( "Filters" , & filters. join ( " / \\ \n " ) ) ;
609+ let mut filters = bind_data. filter_exprs . iter ( ) . map ( |f| format ! ( "{f}" ) ) ;
610+ map. push ( "Filters" , & filters. join ( "\n " ) ) ;
570611 }
571612 }
572613}
@@ -687,6 +728,7 @@ struct FilterWithVirtualColumns {
687728 row_range : Option < Range < u64 > > ,
688729 file_selection : Selection ,
689730 file_range : Option < Range < u64 > > ,
731+ has_non_optional_filter : bool ,
690732}
691733
692734/// Creates a table filter expression, row selection, and row range from the table filter set,
@@ -698,6 +740,8 @@ fn extract_table_filter_expr(
698740 additional_filters : & [ Expression ] ,
699741 dtype : & DType ,
700742) -> VortexResult < FilterWithVirtualColumns > {
743+ let mut has_non_optional_filter = false ;
744+
701745 let mut table_filter_exprs: HashSet < Expression > = if let Some ( filter) = table_filter_set {
702746 filter
703747 . into_iter ( )
@@ -706,6 +750,8 @@ fn extract_table_filter_expr(
706750 !is_virtual_column ( column_ids[ idx_u] )
707751 } )
708752 . map ( |( idx, ex) | {
753+ has_non_optional_filter |= !matches ! ( ex. as_class( ) , TableFilterClass :: Optional ( _) ) ;
754+
709755 let idx_u: usize = idx. as_ ( ) ;
710756 let col_idx: usize = column_ids[ idx_u] . as_ ( ) ;
711757 let name = & column_fields. get ( col_idx) . vortex_expect ( "exists" ) . name ;
@@ -741,6 +787,7 @@ fn extract_table_filter_expr(
741787 row_range,
742788 file_selection,
743789 file_range,
790+ has_non_optional_filter,
744791 } ;
745792 Ok ( out)
746793}
0 commit comments