Skip to content

Commit aeae579

Browse files
authored
Report DuckDB max cardinality for exact scans (#8582)
## Rational for this change Reports exact cardinality to DuckDB when we know it. Signed-off-by: "Nicholas Gates" <nick@nickgates.com>
1 parent 9d3aafb commit aeae579

4 files changed

Lines changed: 23 additions & 6 deletions

File tree

vortex-duckdb/cpp/include/table_function.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ typedef struct {
6262
typedef struct {
6363
idx_t estimated_cardinality;
6464
bool has_estimated_cardinality;
65+
idx_t max_cardinality;
66+
bool has_max_cardinality;
6567
} duckdb_vx_node_statistics;
6668

6769
typedef struct {

vortex-duckdb/cpp/table_function.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,8 @@ unique_ptr<NodeStatistics> c_cardinality(ClientContext &, const FunctionData *bi
297297
auto out = make_uniq<NodeStatistics>();
298298
out->has_estimated_cardinality = stats.has_estimated_cardinality;
299299
out->estimated_cardinality = stats.estimated_cardinality;
300-
out->has_max_cardinality = false;
300+
out->has_max_cardinality = stats.has_max_cardinality;
301+
out->max_cardinality = stats.max_cardinality;
301302

302303
return out;
303304
}

vortex-duckdb/src/ffi.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,12 @@ pub unsafe extern "C-unwind" fn duckdb_table_function_cardinality(
172172

173173
match cardinality(bind_data) {
174174
Cardinality::Unknown => {}
175+
Cardinality::Exact(c) => {
176+
node_stats.has_estimated_cardinality = true;
177+
node_stats.estimated_cardinality = c as _;
178+
node_stats.has_max_cardinality = true;
179+
node_stats.max_cardinality = c as _;
180+
}
175181
Cardinality::Estimate(c) => {
176182
node_stats.has_estimated_cardinality = true;
177183
node_stats.estimated_cardinality = c as _;

vortex-duckdb/src/table_function.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ pub struct PartitionData {
138138
pub enum Cardinality {
139139
/// Unknown number of rows
140140
Unknown,
141+
/// The exact number of rows.
142+
Exact(u64),
141143
/// An estimate of the number of rows.
142144
Estimate(u64),
143145
}
@@ -478,12 +480,18 @@ pub fn statistics(bind_data: &TableFunctionBind, column_index: usize) -> Option<
478480
/// here.
479481
const DEFAULT_SELECTIVITY: f64 = 0.2;
480482
pub fn cardinality(bind_data: &TableFunctionBind) -> Cardinality {
483+
let has_non_optional_filter = bind_data.has_non_optional_filter.load(Ordering::Relaxed);
481484
match bind_data.data_source.row_count() {
482-
Precision::Exact(v) | Precision::Inexact(v) => {
483-
if !bind_data.has_non_optional_filter.load(Ordering::Relaxed) {
484-
// Although we may have an exact upper bound here, reporting
485-
// it as exact has a negative performance impact on tpcds as
486-
// it's not a real post-filter calculation.
485+
Precision::Exact(v) => {
486+
if !has_non_optional_filter {
487+
return Cardinality::Exact(v);
488+
}
489+
let post_cardinality = v as f64 * DEFAULT_SELECTIVITY;
490+
let post_cardinality: u64 = post_cardinality.as_();
491+
Cardinality::Estimate(max(1, post_cardinality))
492+
}
493+
Precision::Inexact(v) => {
494+
if !has_non_optional_filter {
487495
return Cardinality::Estimate(v);
488496
}
489497
let post_cardinality = v as f64 * DEFAULT_SELECTIVITY;

0 commit comments

Comments
 (0)