Skip to content

Commit 9439602

Browse files
committed
fix
Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent c59e16f commit 9439602

4 files changed

Lines changed: 82 additions & 21 deletions

File tree

vortex-duckdb/cpp/include/duckdb_vx/table_function.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ void duckdb_vx_string_map_insert(duckdb_vx_string_map map, const char *key, cons
4242

4343
// Input data passed into the init_global and init_local callbacks.
4444
typedef struct {
45-
const void *bind_data;
45+
void *bind_data;
4646

4747
/**
4848
* Projected columns that are requested to be read. These are not

vortex-duckdb/cpp/table_function.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,27 @@ unique_ptr<BaseStatistics> numeric_stats(duckdb_column_statistics &stats, Logica
112112

113113
unique_ptr<BaseStatistics> string_stats(duckdb_column_statistics &stats, LogicalType type) {
114114
BaseStatistics out = StringStats::CreateUnknown(type);
115-
if (stats.min) {
115+
if (stats.min && stats.max) {
116+
const std::string &min = StringValue::Get(UnwrapValue(stats.min));
117+
StringStats::SetMin(out, min);
118+
119+
const std::string &max = StringValue::Get(UnwrapValue(stats.max));
120+
StringStats::SetMax(out, max);
121+
122+
if (min == max) {
123+
out.SetDistinctCount(1);
124+
}
125+
126+
duckdb_destroy_value(&stats.min);
127+
duckdb_destroy_value(&stats.max);
128+
} else if (stats.min) {
116129
StringStats::SetMin(out, StringValue::Get(UnwrapValue(stats.min)));
117130
duckdb_destroy_value(&stats.min);
118-
}
119-
if (stats.max) {
131+
} else if (stats.max) {
120132
StringStats::SetMax(out, StringValue::Get(UnwrapValue(stats.max)));
121133
duckdb_destroy_value(&stats.max);
122134
}
135+
123136
if (stats.max_string_length >> 63) {
124137
StringStats::SetMaxStringLength(out, uint32_t(stats.max_string_length));
125138
}

vortex-duckdb/src/datasource.rs

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
//! to get a blanket [`TableFunction`] implementation covering init, scan, progress, filter
88
//! pushdown, cardinality, and partitioning.
99
10+
use std::cmp::max;
1011
use std::fmt::Debug;
1112
use std::ops::Range;
1213
use std::sync::Arc;
@@ -74,6 +75,7 @@ use crate::duckdb::DuckdbStringMapRef;
7475
use crate::duckdb::ExpressionRef;
7576
use crate::duckdb::LogicalType;
7677
use crate::duckdb::PartitionData;
78+
use crate::duckdb::TableFilterClass;
7779
use crate::duckdb::TableFilterSetRef;
7880
use crate::duckdb::TableFunction;
7981
use crate::duckdb::TableInitInput;
@@ -118,6 +120,7 @@ pub struct DataSourceBindData {
118120
data_source: Arc<MultiLayoutDataSource>,
119121
filter_exprs: Vec<Expression>,
120122
column_fields: Vec<DuckdbField>,
123+
has_non_optional_filter: bool,
121124
}
122125

123126
impl Clone for DataSourceBindData {
@@ -127,6 +130,7 @@ impl Clone for DataSourceBindData {
127130
// filter_exprs are consumed once in `init_global`.
128131
filter_exprs: vec![],
129132
column_fields: self.column_fields.clone(),
133+
has_non_optional_filter: self.has_non_optional_filter,
130134
}
131135
}
132136
}
@@ -252,6 +256,23 @@ impl ColumnStatisticsAggregate {
252256
}
253257
}
254258

259+
// Duckdb requires post-filter cardinality estimates, otherwise join
260+
// planner may flip join sides which is a huge regression for some
261+
// queries i.e. 1000x for tpcds 85.
262+
//
263+
// See duckdb/src/optimizer/join_order/relation_statistics_helper.cpp
264+
const DEFAULT_SELECTIVITY: f64 = 0.2;
265+
fn postfilter_cardinality(initial_cardinality: u64, has_non_optional_filter: bool) -> u64 {
266+
if has_non_optional_filter {
267+
let post_cardinality = initial_cardinality as f64 * DEFAULT_SELECTIVITY;
268+
// Clamp intentionally
269+
let post_cardinality: u64 = post_cardinality.as_();
270+
max(1, post_cardinality)
271+
} else {
272+
initial_cardinality
273+
}
274+
}
275+
255276
impl<T: DataSourceTableFunction> TableFunction for T {
256277
type BindData = DataSourceBindData;
257278
type GlobalState = DataSourceGlobal;
@@ -275,6 +296,7 @@ impl<T: DataSourceTableFunction> TableFunction for T {
275296
data_source: Arc::new(data_source),
276297
filter_exprs: vec![],
277298
column_fields,
299+
has_non_optional_filter: false,
278300
})
279301
}
280302

@@ -297,13 +319,15 @@ impl<T: DataSourceTableFunction> TableFunction for T {
297319
row_range,
298320
file_selection,
299321
file_range,
322+
has_non_optional_filter,
300323
} = extract_table_filter_expr(
301324
init_input.table_filter_set(),
302325
column_ids,
303326
&bind_data.column_fields,
304327
&bind_data.filter_exprs,
305328
bind_data.data_source.dtype(),
306329
)?;
330+
bind_data.has_non_optional_filter |= has_non_optional_filter;
307331

308332
debug!(
309333
%projection,
@@ -502,22 +526,37 @@ impl<T: DataSourceTableFunction> TableFunction for T {
502526
expr: &ExpressionRef,
503527
) -> VortexResult<bool> {
504528
debug!(%expr, "pushing down expression");
529+
505530
let Some(expr) = try_from_bound_expression(expr)? else {
506531
debug!(%expr, "failed to push down expression");
507532
return Ok(false);
508533
};
509-
debug!(%expr, "pushed down expression");
510-
bind_data.filter_exprs.push(expr);
511534

512-
// NOTE(ngates): Vortex does indeed run exact filters, so in theory we should return `true`
513-
// here to tell DuckDB we've handled the filter. However, DuckDB applies some crude
514-
// cardinality estimation heuristics (e.g. an equality filter => 20% selectivity) that
515-
// means by returning false, DuckDB runs an additional filter (a little bit of overhead)
516-
// but tends to end up with a better query plan.
517-
// If we plumb row count estimation into the layout tree, perhaps we could use zone maps
518-
// etc. to return estimates. But this function is probably called too late anyway. Maybe
519-
// we need our own cardinality heuristics.
520-
Ok(false)
535+
// Duckdb calls pushdown_complex_filter during planning phase.
536+
// If all filters are pushed down, duckdb enables a LEFT_DELIM_JOIN ->
537+
// COMPARISON_JOIN (HASH_JOIN) optimization:
538+
// duckdb/src/optimizer/deliminator.cpp: Deliminator::HasSelection,
539+
// Deliminator::Optimize.
540+
//
541+
// This leads to a massive regression on tpch sf=10 q17 and other
542+
// benchmarks.
543+
//
544+
// This bug is reported to Duckdb
545+
// https://github.com/duckdb/duckdb/issues/22669
546+
//
547+
// As a hack, report first filter as not pushed.
548+
// As pushdown_complex_filter is called during planning phase,
549+
// no table filters are pushed yet (this will happen in init_global),
550+
// so our first filter is a first filter indeed.
551+
let report_pushed = bind_data.has_non_optional_filter;
552+
553+
// Only table filters may be optional, any complex filter is
554+
// non-optional by definition.
555+
bind_data.has_non_optional_filter = true;
556+
557+
debug!(%expr, report_pushed, "pushed down expression");
558+
bind_data.filter_exprs.push(expr);
559+
Ok(report_pushed)
521560
}
522561

523562
/// Get column-wise statistics. Available only if we're reading a single
@@ -545,8 +584,10 @@ impl<T: DataSourceTableFunction> TableFunction for T {
545584

546585
fn cardinality(bind_data: &Self::BindData) -> Cardinality {
547586
match bind_data.data_source.row_count() {
548-
Some(Precision::Exact(v)) => Cardinality::Maximum(v),
549-
Some(Precision::Inexact(v)) => Cardinality::Estimate(v),
587+
Some(Precision::Exact(v) | Precision::Inexact(v)) => {
588+
// Post-filter estimate is always a heuristic.
589+
Cardinality::Estimate(postfilter_cardinality(v, bind_data.has_non_optional_filter))
590+
}
550591
None => Cardinality::Unknown,
551592
}
552593
}
@@ -565,8 +606,8 @@ impl<T: DataSourceTableFunction> TableFunction for T {
565606
fn to_string(bind_data: &Self::BindData, map: &mut DuckdbStringMapRef) {
566607
map.push("Function", "Vortex Scan");
567608
if !bind_data.filter_exprs.is_empty() {
568-
let mut filters = bind_data.filter_exprs.iter().map(|f| format!("{}", f));
569-
map.push("Filters", &filters.join(" /\\\n"));
609+
let mut filters = bind_data.filter_exprs.iter().map(|f| format!("{f}"));
610+
map.push("Filters", &filters.join("\n"));
570611
}
571612
}
572613
}
@@ -687,6 +728,7 @@ struct FilterWithVirtualColumns {
687728
row_range: Option<Range<u64>>,
688729
file_selection: Selection,
689730
file_range: Option<Range<u64>>,
731+
has_non_optional_filter: bool,
690732
}
691733

692734
/// Creates a table filter expression, row selection, and row range from the table filter set,
@@ -698,6 +740,8 @@ fn extract_table_filter_expr(
698740
additional_filters: &[Expression],
699741
dtype: &DType,
700742
) -> VortexResult<FilterWithVirtualColumns> {
743+
let mut has_non_optional_filter = false;
744+
701745
let mut table_filter_exprs: HashSet<Expression> = if let Some(filter) = table_filter_set {
702746
filter
703747
.into_iter()
@@ -706,6 +750,8 @@ fn extract_table_filter_expr(
706750
!is_virtual_column(column_ids[idx_u])
707751
})
708752
.map(|(idx, ex)| {
753+
has_non_optional_filter |= !matches!(ex.as_class(), TableFilterClass::Optional(_));
754+
709755
let idx_u: usize = idx.as_();
710756
let col_idx: usize = column_ids[idx_u].as_();
711757
let name = &column_fields.get(col_idx).vortex_expect("exists").name;
@@ -741,6 +787,7 @@ fn extract_table_filter_expr(
741787
row_range,
742788
file_selection,
743789
file_range,
790+
has_non_optional_filter,
744791
};
745792
Ok(out)
746793
}

vortex-duckdb/src/duckdb/table_function/init.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,10 @@ impl<'a, T: TableFunction> TableInitInput<'a, T> {
7474
}
7575
}
7676

77+
#[allow(clippy::mut_from_ref)]
7778
/// Returns the bind data for the table function.
78-
pub fn bind_data(&self) -> &T::BindData {
79-
unsafe { &*self.input.bind_data.cast::<T::BindData>() }
79+
pub fn bind_data(&self) -> &mut T::BindData {
80+
unsafe { &mut *self.input.bind_data.cast::<T::BindData>() }
8081
}
8182

8283
pub fn column_ids(&self) -> &[u64] {

0 commit comments

Comments
 (0)