Skip to content

Commit afc0784

Browse files
authored
feat: add sort_pushdown_inexact benchmark for RG reorder (#21674)
## Which issue does this PR close? Closes #21582 ## Rationale for this change The existing sort_pushdown benchmarks only cover the Exact path (sort elimination). The Inexact path — where TopK is preserved and row group reorder by statistics helps threshold convergence — had no benchmark to measure its impact. ## What changes are included in this PR? - New benchmark: `sort_pushdown_inexact` with 4 DESC LIMIT queries (narrow/wide rows, small/large LIMIT) - Data generation: single large parquet file with shuffled row groups (simulates append-heavy workloads) - Enables `pushdown_filters` in sort_pushdown benchmarks so TopK's dynamic filter is pushed to the parquet reader for late materialization ## How to run ```bash ./bench.sh data sort_pushdown_inexact ./bench.sh run sort_pushdown_inexact ``` Or on GKE: `@alamb benchmark sort_pushdown_inexact` ## Are these changes tested? Benchmark code only — validated locally. ## Are there any user-facing changes? No. New benchmark only.
1 parent 03b390d commit afc0784

File tree

15 files changed

+216
-0
lines changed

15 files changed

+216
-0
lines changed

benchmarks/bench.sh

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ clickbench_extended: ClickBench \"inspired\" queries against a single parquet
109109
# Sort Pushdown Benchmarks
110110
sort_pushdown: Sort pushdown baseline (no WITH ORDER) on TPC-H data (SF=1)
111111
sort_pushdown_sorted: Sort pushdown with WITH ORDER — tests sort elimination on non-overlapping files
112+
sort_pushdown_inexact: Sort pushdown Inexact path (--sorted DESC) — tests reverse scan + RG reorder
113+
sort_pushdown_inexact_unsorted: Sort pushdown Inexact path (no WITH ORDER) — tests Unsupported path + RG reorder
114+
sort_pushdown_inexact_overlap: Sort pushdown Inexact path — partially overlapping RGs (streaming data scenario)
112115
113116
# Sorted Data Benchmarks (ORDER BY Optimization)
114117
clickbench_sorted: ClickBench queries on pre-sorted data using prefer_existing_sort (tests sort elimination optimization)
@@ -316,6 +319,9 @@ main() {
316319
sort_pushdown|sort_pushdown_sorted)
317320
data_sort_pushdown
318321
;;
322+
sort_pushdown_inexact|sort_pushdown_inexact_unsorted|sort_pushdown_inexact_overlap)
323+
data_sort_pushdown_inexact
324+
;;
319325
sort_tpch)
320326
# same data as for tpch
321327
data_tpch "1" "parquet"
@@ -522,6 +528,15 @@ main() {
522528
sort_pushdown_sorted)
523529
run_sort_pushdown_sorted
524530
;;
531+
sort_pushdown_inexact)
532+
run_sort_pushdown_inexact
533+
;;
534+
sort_pushdown_inexact_unsorted)
535+
run_sort_pushdown_inexact_unsorted
536+
;;
537+
sort_pushdown_inexact_overlap)
538+
run_sort_pushdown_inexact_overlap
539+
;;
525540
sort_tpch)
526541
run_sort_tpch "1"
527542
;;
@@ -1137,6 +1152,121 @@ run_sort_pushdown_sorted() {
11371152
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${SORT_PUSHDOWN_DIR}" --queries-path "${SCRIPT_DIR}/queries/sort_pushdown" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
11381153
}
11391154

1155+
# Generates data for sort pushdown Inexact benchmark.
1156+
#
1157+
# Produces a single large lineitem parquet file where row groups have
1158+
# NON-OVERLAPPING but OUT-OF-ORDER l_orderkey ranges (each RG internally
1159+
# sorted, RGs shuffled). This simulates append-heavy workloads where data
1160+
# is written in batches at different times.
1161+
data_sort_pushdown_inexact() {
1162+
INEXACT_DIR="${DATA_DIR}/sort_pushdown_inexact/lineitem"
1163+
if [ -d "${INEXACT_DIR}" ] && [ "$(ls -A ${INEXACT_DIR}/*.parquet 2>/dev/null)" ]; then
1164+
echo "Sort pushdown Inexact data already exists at ${INEXACT_DIR}"
1165+
return
1166+
fi
1167+
1168+
echo "Generating sort pushdown Inexact benchmark data (single file, shuffled RGs)..."
1169+
1170+
# Re-use the sort_pushdown data as the source (generate if missing)
1171+
data_sort_pushdown
1172+
1173+
mkdir -p "${INEXACT_DIR}"
1174+
SRC_DIR="${DATA_DIR}/sort_pushdown/lineitem"
1175+
1176+
# Use datafusion-cli to bucket rows into 64 groups by a deterministic
1177+
# scrambler, then sort within each bucket by orderkey. This produces
1178+
# ~64 RG-sized segments where each has a tight orderkey range but the
1179+
# segments appear in scrambled (non-sorted) order in the file.
1180+
(cd "${SCRIPT_DIR}/.." && cargo run --release -p datafusion-cli -- -c "
1181+
CREATE EXTERNAL TABLE src
1182+
STORED AS PARQUET
1183+
LOCATION '${SRC_DIR}';
1184+
1185+
COPY (
1186+
SELECT * FROM src
1187+
ORDER BY
1188+
(l_orderkey * 1664525 + 1013904223) % 64,
1189+
l_orderkey
1190+
)
1191+
TO '${INEXACT_DIR}/shuffled.parquet'
1192+
STORED AS PARQUET
1193+
OPTIONS ('format.max_row_group_size' '100000');
1194+
")
1195+
1196+
echo "Sort pushdown Inexact shuffled data generated at ${INEXACT_DIR}"
1197+
ls -la "${INEXACT_DIR}"
1198+
1199+
# Also generate a file with partially overlapping row groups.
1200+
# Simulates streaming data with network delays: each chunk is mostly
1201+
# in order but has a small overlap with the next chunk (±5% of the
1202+
# chunk range). This is the pattern described by @adriangb — data
1203+
# arriving with timestamps that are generally increasing but with
1204+
# network-induced jitter causing small overlaps between row groups.
1205+
OVERLAP_DIR="${DATA_DIR}/sort_pushdown_inexact_overlap/lineitem"
1206+
if [ -d "${OVERLAP_DIR}" ] && [ "$(ls -A ${OVERLAP_DIR}/*.parquet 2>/dev/null)" ]; then
1207+
echo "Sort pushdown Inexact overlap data already exists at ${OVERLAP_DIR}"
1208+
return
1209+
fi
1210+
1211+
echo "Generating sort pushdown Inexact overlap data (partially overlapping RGs)..."
1212+
mkdir -p "${OVERLAP_DIR}"
1213+
1214+
(cd "${SCRIPT_DIR}/.." && cargo run --release -p datafusion-cli -- -c "
1215+
CREATE EXTERNAL TABLE src
1216+
STORED AS PARQUET
1217+
LOCATION '${SRC_DIR}';
1218+
1219+
-- Add jitter to l_orderkey: shift each row by a random-ish offset
1220+
-- proportional to its position. This creates overlap between adjacent
1221+
-- row groups while preserving the general ascending trend.
1222+
-- Formula: l_orderkey + (l_orderkey * 7 % 5000) - 2500
1223+
-- This adds ±2500 jitter, creating ~5K overlap between adjacent 100K-row RGs.
1224+
COPY (
1225+
SELECT * FROM src
1226+
ORDER BY l_orderkey + (l_orderkey * 7 % 5000) - 2500
1227+
)
1228+
TO '${OVERLAP_DIR}/overlapping.parquet'
1229+
STORED AS PARQUET
1230+
OPTIONS ('format.max_row_group_size' '100000');
1231+
")
1232+
1233+
echo "Sort pushdown Inexact overlap data generated at ${OVERLAP_DIR}"
1234+
ls -la "${OVERLAP_DIR}"
1235+
}
1236+
1237+
# Runs the sort pushdown Inexact benchmark (tests RG reorder by statistics).
1238+
# Enables pushdown_filters so TopK's dynamic filter is pushed to the parquet
1239+
# reader for late materialization (only needed for Inexact path).
1240+
run_sort_pushdown_inexact() {
1241+
INEXACT_DIR="${DATA_DIR}/sort_pushdown_inexact"
1242+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_inexact.json"
1243+
echo "Running sort pushdown Inexact benchmark (--sorted, DESC, reverse scan path)..."
1244+
DATAFUSION_EXECUTION_PARQUET_PUSHDOWN_FILTERS=true \
1245+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${INEXACT_DIR}" --queries-path "${SCRIPT_DIR}/queries/sort_pushdown_inexact" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1246+
}
1247+
1248+
# Runs the sort pushdown Inexact benchmark WITHOUT declared ordering.
1249+
# Tests the Unsupported path in try_pushdown_sort where RG reorder by
1250+
# statistics can still help TopK queries without any file ordering guarantee.
1251+
run_sort_pushdown_inexact_unsorted() {
1252+
INEXACT_DIR="${DATA_DIR}/sort_pushdown_inexact"
1253+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_inexact_unsorted.json"
1254+
echo "Running sort pushdown Inexact benchmark (no WITH ORDER, Unsupported path)..."
1255+
DATAFUSION_EXECUTION_PARQUET_PUSHDOWN_FILTERS=true \
1256+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --iterations 5 --path "${INEXACT_DIR}" --queries-path "${SCRIPT_DIR}/queries/sort_pushdown_inexact_unsorted" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1257+
}
1258+
1259+
# Runs the sort pushdown benchmark with partially overlapping RGs.
1260+
# Simulates streaming data with network jitter — RGs are mostly in order
1261+
# but have small overlaps (±2500 orderkey jitter between adjacent RGs).
1262+
run_sort_pushdown_inexact_overlap() {
1263+
OVERLAP_DIR="${DATA_DIR}/sort_pushdown_inexact_overlap"
1264+
RESULTS_FILE="${RESULTS_DIR}/sort_pushdown_inexact_overlap.json"
1265+
echo "Running sort pushdown Inexact benchmark (overlapping RGs, streaming data pattern)..."
1266+
DATAFUSION_EXECUTION_PARQUET_PUSHDOWN_FILTERS=true \
1267+
debug_run $CARGO_COMMAND --bin dfbench -- sort-pushdown --sorted --iterations 5 --path "${OVERLAP_DIR}" --queries-path "${SCRIPT_DIR}/queries/sort_pushdown_inexact_overlap" -o "${RESULTS_FILE}" ${QUERY_ARG} ${LATENCY_ARG}
1268+
}
1269+
11401270
# Runs the sort integration benchmark
11411271
run_sort_tpch() {
11421272
SCALE_FACTOR=$1
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Inexact path: TopK + DESC LIMIT on ASC-declared file.
2+
-- With RG reorder, the first RG read contains the highest max value,
3+
-- so TopK's threshold tightens quickly and subsequent RGs get filtered
4+
-- efficiently via dynamic filter pushdown.
5+
SELECT l_orderkey, l_partkey, l_suppkey
6+
FROM lineitem
7+
ORDER BY l_orderkey DESC
8+
LIMIT 100
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Inexact path: TopK + DESC LIMIT with larger fetch (1000).
2+
-- Larger LIMIT means more row_replacements; RG reorder reduces the
3+
-- total replacement count by tightening the threshold faster.
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey DESC
7+
LIMIT 1000
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Inexact path: wide projection (all columns) + DESC LIMIT.
2+
-- Shows the row-level filter benefit: with a tight threshold from the
3+
-- first RG, subsequent RGs skip decoding non-sort columns for filtered
4+
-- rows — bigger wins for wide tables.
5+
SELECT *
6+
FROM lineitem
7+
ORDER BY l_orderkey DESC
8+
LIMIT 100
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Inexact path: wide projection + DESC LIMIT with larger fetch.
2+
-- Combines wide-row row-level filter benefit with larger LIMIT to
3+
-- demonstrate cumulative gains from RG reorder.
4+
SELECT *
5+
FROM lineitem
6+
ORDER BY l_orderkey DESC
7+
LIMIT 1000
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Overlapping RGs: TopK + DESC LIMIT on file with partially overlapping
2+
-- row groups (simulates streaming data with network jitter).
3+
-- RG reorder places highest-max RG first for fastest threshold convergence.
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey DESC
7+
LIMIT 100
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Overlapping RGs: DESC LIMIT with larger fetch.
2+
SELECT l_orderkey, l_partkey, l_suppkey
3+
FROM lineitem
4+
ORDER BY l_orderkey DESC
5+
LIMIT 1000
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- Overlapping RGs: wide projection + DESC LIMIT.
2+
-- Row-level filter benefit: tight threshold skips decoding non-sort columns.
3+
SELECT *
4+
FROM lineitem
5+
ORDER BY l_orderkey DESC
6+
LIMIT 100
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Overlapping RGs: wide projection + DESC LIMIT larger fetch.
2+
SELECT *
3+
FROM lineitem
4+
ORDER BY l_orderkey DESC
5+
LIMIT 1000
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
-- Unsupported path: TopK + ASC LIMIT on file without declared ordering.
2+
-- Tests RG reorder benefit when no WITH ORDER is declared — the
3+
-- Unsupported path in try_pushdown_sort triggers RG reorder.
4+
SELECT l_orderkey, l_partkey, l_suppkey
5+
FROM lineitem
6+
ORDER BY l_orderkey
7+
LIMIT 100

0 commit comments

Comments
 (0)