Skip to content

Commit 7c58710

Browse files
zhuqi-lucasclaude
andcommitted
feat: add sort pushdown benchmark and SLT tests
Add benchmark and integration tests for sort pushdown optimization as a precursor to the core optimization PR (#21182). Benchmark: new `sort-pushdown` subcommand with 4 queries testing sort elimination (ASC full scan, ASC LIMIT, wide full, wide LIMIT). SLT tests (5 new groups): - Test A: Non-overlapping files + WITH ORDER → Sort eliminated - Test B: Overlapping files → SortExec retained - Test C: LIMIT queries (ASC sort elimination + DESC reverse scan) - Test D: target_partitions=2 → SPM + per-partition sort elimination - Test E: Inferred ordering from Parquet metadata (no WITH ORDER) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 06b4791 commit 7c58710

File tree

9 files changed

+797
-1
lines changed

9 files changed

+797
-1
lines changed

benchmarks/bench.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ clickbench_partitioned: ClickBench queries against partitioned (100 files) parqu
106106
clickbench_pushdown: ClickBench queries against partitioned (100 files) parquet w/ filter_pushdown enabled
107107
clickbench_extended: ClickBench \"inspired\" queries against a single parquet (DataFusion specific)
108108
109+
# Sort Pushdown Benchmarks
110+
sort_pushdown: Sort pushdown baseline (no WITH ORDER) on TPC-H data (SF=1)
111+
sort_pushdown_sorted: Sort pushdown with WITH ORDER — tests sort elimination on non-overlapping files
112+
109113
# Sorted Data Benchmarks (ORDER BY Optimization)
110114
clickbench_sorted: ClickBench queries on pre-sorted data using prefer_existing_sort (tests sort elimination optimization)
111115
@@ -309,6 +313,10 @@ main() {
309313
# same data as for tpch
310314
data_tpch "1" "parquet"
311315
;;
316+
sort_pushdown|sort_pushdown_sorted)
317+
# same data as for tpch
318+
data_tpch "1" "parquet"
319+
;;
312320
sort_tpch)
313321
# same data as for tpch
314322
data_tpch "1" "parquet"
@@ -509,6 +517,12 @@ main() {
509517
external_aggr)
510518
run_external_aggr
511519
;;
520+
sort_pushdown)
521+
run_sort_pushdown
522+
;;
523+
sort_pushdown_sorted)
524+
run_sort_pushdown_sorted
525+
;;
512526
sort_tpch)
513527
run_sort_tpch "1"
514528
;;
@@ -1070,6 +1084,22 @@ run_external_aggr() {
10701084
debug_run $CARGO_COMMAND --bin external_aggr -- benchmark --partitions 4 --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG}
10711085
}
10721086

1087+
# Runs the sort pushdown benchmark (without WITH ORDER)
1088+
run_sort_pushdown() {
1089+
TPCH_DIR="${DATA_DIR}/tpch_sf1"
1090+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown.json"
1091+
echo "Running sort pushdown benchmark (no WITH ORDER)..."
1092+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1093+
}
1094+
1095+
# Runs the sort pushdown benchmark with WITH ORDER (enables sort elimination)
1096+
run_sort_pushdown_sorted() {
1097+
TPCH_DIR="${DATA_DIR}/tpch_sf1"
1098+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_sorted.json"
1099+
echo "Running sort pushdown benchmark (with WITH ORDER)..."
1100+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${TPCH_DIR}" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1101+
}
1102+
10731103
# Runs the sort integration benchmark
10741104
run_sort_tpch() {
10751105
SCALE_FACTOR=$1
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- Sort elimination: ORDER BY sort key ASC (full scan)
2+
-- With --sorted: SortExec removed, sequential scan in file order
3+
-- Without --sorted: full SortExec required
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Sort elimination + limit pushdown
2+
-- With --sorted: SortExec removed + limit pushed to DataSourceExec
3+
-- Without --sorted: TopK sort over all data
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey
7+
LIMIT 100
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Sort elimination: wide projection (all columns)
2+
-- Tests sort elimination benefit with larger row payload
3+
SELECT *
4+
FROM lineitem
5+
ORDER BY l_orderkey
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Sort elimination + limit: wide projection
2+
SELECT *
3+
FROM lineitem
4+
ORDER BY l_orderkey
5+
LIMIT 100

benchmarks/src/bin/dfbench.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;
3434
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
3535

3636
use datafusion_benchmarks::{
37-
cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_tpch, tpcds, tpch,
37+
cancellation, clickbench, h2o, hj, imdb, nlj, smj, sort_pushdown, sort_tpch, tpcds,
38+
tpch,
3839
};
3940

4041
#[derive(Debug, Parser)]
@@ -53,6 +54,7 @@ enum Options {
5354
Imdb(imdb::RunOpt),
5455
Nlj(nlj::RunOpt),
5556
Smj(smj::RunOpt),
57+
SortPushdown(sort_pushdown::RunOpt),
5658
SortTpch(sort_tpch::RunOpt),
5759
Tpch(tpch::RunOpt),
5860
Tpcds(tpcds::RunOpt),
@@ -72,6 +74,7 @@ pub async fn main() -> Result<()> {
7274
Options::Imdb(opt) => Box::pin(opt.run()).await,
7375
Options::Nlj(opt) => opt.run().await,
7476
Options::Smj(opt) => opt.run().await,
77+
Options::SortPushdown(opt) => opt.run().await,
7578
Options::SortTpch(opt) => opt.run().await,
7679
Options::Tpch(opt) => Box::pin(opt.run()).await,
7780
Options::Tpcds(opt) => Box::pin(opt.run()).await,

benchmarks/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ pub mod hj;
2323
pub mod imdb;
2424
pub mod nlj;
2525
pub mod smj;
26+
pub mod sort_pushdown;
2627
pub mod sort_tpch;
2728
pub mod tpcds;
2829
pub mod tpch;

0 commit comments

Comments
 (0)