Skip to content

Commit 7dace0d

Browse files
authored
Expression pushdown for duckdb (#7727)
- Support expression pushdown similarly to how it's done for filter pushdown. - Support pushing down constants, comparisons, IN/NOT in, struct_extract, contains, prefix, suffix, and LIKE/NOT LIKE. Signed-off-by: Mikhail Kot <to@myrrc.dev>
1 parent 96ffd3f commit 7dace0d

9 files changed

Lines changed: 272 additions & 86 deletions

File tree

vortex-bench/src/runner.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,10 +184,11 @@ impl SqlBenchmarkRunner {
184184
if let Some(expected_counts) = &self.expected_row_counts
185185
&& query_idx < expected_counts.len()
186186
{
187+
let expected = expected_counts[query_idx];
187188
assert_eq!(
188189
row_count,
189-
expected_counts[query_idx],
190-
"Row count mismatch for query {query_idx} - {engine}:{format}",
190+
expected,
191+
"Row count mismatch for query {query_idx} - {engine}:{format}, expected {expected}, got {row_count}",
191192
engine = self.engine,
192193
);
193194
}

vortex-duckdb/cpp/table_function.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ DUCKDB_INCLUDES_BEGIN
1515
#include "duckdb/main/capi/capi_internal.hpp"
1616
#include "duckdb/main/connection.hpp"
1717
#include "duckdb/parser/parsed_data/create_table_function_info.hpp"
18+
#include "duckdb/planner/expression/bound_operator_expression.hpp"
19+
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
20+
#include "duckdb/planner/expression/bound_between_expression.hpp"
21+
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"
22+
#include "duckdb/planner/expression/bound_function_expression.hpp"
1823
DUCKDB_INCLUDES_END
1924

2025
using namespace duckdb;
@@ -263,11 +268,22 @@ void function(ClientContext &, TableFunctionInput &input, DataChunk &output) {
263268
}
264269
}
265270

266-
void c_pushdown_complex_filter(ClientContext &,
267-
LogicalGet &,
268-
FunctionData *bind_data,
269-
vector<unique_ptr<Expression>> &filters) {
270-
auto &bind = bind_data->Cast<CTableBindData>();
271+
using FilterVec = vector<unique_ptr<Expression>>;
272+
273+
/*
274+
* Table filter pushdown is used for two tasks in duckdb:
275+
*
276+
* 1. Prune files based on filename or hive partitioning, see Parquet
277+
* filter pushdown. We don't use this because we do own file-level pruning in
278+
* FileStatsLayoutReader, and we don't support hive partitioning yet.
279+
*
280+
* 2. Avoid reading unused file data. Filter expressions are pushed to Vortex,
281+
* converted to Vortex expressions and used during the scan.
282+
* Duckdb pushes a subset of expressions i.e. equality operators, and also
283+
* expressions which return true in pushdown_expression.
284+
*/
285+
void pushdown_complex_filter(const FunctionData &bind_data, FilterVec &filters) {
286+
const auto &bind = bind_data.Cast<CTableBindData>();
271287
void *const ffi_bind = bind.ffi_data->DataPtr();
272288
duckdb_vx_error error_out = nullptr;
273289

@@ -278,8 +294,6 @@ void c_pushdown_complex_filter(ClientContext &,
278294
if (error_out) {
279295
throw BinderException(IntoErrString(error_out));
280296
}
281-
282-
// If the pushdown complex filter returns true, we can remove the filter from the list.
283297
iter = pushed ? filters.erase(iter) : std::next(iter);
284298
}
285299
}
@@ -381,6 +395,9 @@ InsertionOrderPreservingMap<string> c_to_string(TableFunctionToStringInput &inpu
381395
return result;
382396
}
383397

398+
// pushdown_expression misses FunctionData so we can't place it in vtab
399+
extern "C" bool duckdb_vx_pushdown_expression(duckdb_vx_expr expr);
400+
384401
extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const duckdb_vx_tfunc_vtab_t *vtab) {
385402
D_ASSERT(ffi_db);
386403
D_ASSERT(vtab);
@@ -394,7 +411,12 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_database ffi_db, const d
394411
tf.filter_prune = true;
395412
tf.sampling_pushdown = false;
396413

397-
tf.pushdown_complex_filter = c_pushdown_complex_filter;
414+
tf.pushdown_expression = [](auto &, const auto &, Expression &expression) {
415+
return duckdb_vx_pushdown_expression(reinterpret_cast<duckdb_vx_expr>(&expression));
416+
};
417+
tf.pushdown_complex_filter = [](auto &, auto &, FunctionData *bind_data, FilterVec &filters) {
418+
pushdown_complex_filter(*bind_data, filters);
419+
};
398420
tf.cardinality = c_cardinality;
399421
tf.get_partition_info = get_partition_info;
400422
tf.get_partition_data = get_partition_data;

vortex-duckdb/include/vortex.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ const char *vortex_version_rust(void);
3838
*/
3939
const char *vortex_extension_version_rust(void);
4040

41+
bool duckdb_vx_pushdown_expression(duckdb_vx_expr expr);
42+
4143
#ifdef __cplusplus
4244
}
4345
#endif

0 commit comments

Comments
 (0)