Skip to content

Commit 37d7d05

Browse files
committed
Support row group limit pruning
1 parent 4d1c409 commit 37d7d05

1 file changed

Lines changed: 20 additions & 18 deletions

File tree

datafusion/core/tests/parquet/row_group_pruning.rs

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,11 @@ impl RowGroupPruningTest {
174174
self,
175175
schema: Arc<Schema>,
176176
batches: Vec<RecordBatch>,
177+
max_row_per_group: usize,
177178
) {
178179
let output = ContextWithParquet::with_custom_data(
179180
self.scenario,
180-
RowGroup(2),
181+
RowGroup(max_row_per_group),
181182
schema,
182183
batches,
183184
)
@@ -1721,28 +1722,29 @@ fn make_i32_batch(
17211722
#[tokio::test]
17221723
async fn test_limit_pruning() -> datafusion_common::error::Result<()> {
17231724
// Scenario: Simple integer column, multiple row groups
1724-
// Query: SELECT c1 FROM t WHERE c1 > 0 LIMIT 2
1725+
// Query: SELECT c1 FROM t WHERE c1 = 0 LIMIT 2
17251726
// We expect 2 rows in total.
17261727

1727-
// Row Group 0: c1 = [1, 2] -> Fully matched, 2 rows
1728-
// Row Group 1: c1 = [3, 4] -> Fully matched, 2 rows
1729-
// Row Group 2: c1 = [5, 6] -> Fully matched, 2 rows
1730-
// Row Group 3: c1 = [-1, 0] -> Pruned by statistics, 0 rows
1728+
// Row Group 0: c1 = [0, -2] -> Partially matched, 1 row
1729+
// Row Group 1: c1 = [1, 2] -> Fully matched, 2 rows
1730+
// Row Group 2: c1 = [3, 4] -> Fully matched, 2 rows
1731+
// Row Group 3: c1 = [5, 6] -> Fully matched, 2 rows
1732+
// Row Group 4: c1 = [-1, -2] -> Not matched
17311733

1732-
// If limit = 2, and RG0 is fully matched and has 2 rows, we should
1733-
// only scan RG0 and prune other row groups (RG1, RG2, RG3)
1734-
// RG3 is pruned by statistics. RG1 and RG2 are pruned by limit.
1735-
// So 3 row groups are effectively pruned due to limit pruning.
1734+
// If limit = 2, and RG1 is fully matched and has 2 rows, we should
1735+
// only scan RG1 and prune other row groups
1736+
// RG4 is pruned by statistics. RG2 and RG3 are pruned by limit.
1737+
// So 2 row groups are effectively pruned due to limit pruning.
17361738

17371739
let schema = Arc::new(Schema::new(vec![Field::new("c1", DataType::Int32, false)]));
1738-
let query = "explain verbose SELECT c1 FROM t WHERE c1 > 0 LIMIT 2";
1740+
let query = "SELECT c1 FROM t WHERE c1 >= 0 LIMIT 2";
17391741

17401742
let batches = vec![
17411743
make_i32_batch("c1", vec![0, -2])?,
1742-
make_i32_batch("c1", vec![0, 0])?, // RG0: Fully matched, 2 rows
1743-
make_i32_batch("c1", vec![0, 0])?, // RG1: Fully matched, 2 rows
1744-
make_i32_batch("c1", vec![0, 0])?, // RG2: Fully matched, 2 rows
1745-
make_i32_batch("c1", vec![-1, 0])?, // RG3: Pruned by statistics, 0 rows
1744+
make_i32_batch("c1", vec![0, 0])?,
1745+
make_i32_batch("c1", vec![0, 0])?,
1746+
make_i32_batch("c1", vec![0, 0])?,
1747+
make_i32_batch("c1", vec![-1, -2])?,
17461748
];
17471749

17481750
RowGroupPruningTest::new()
@@ -1751,9 +1753,9 @@ async fn test_limit_pruning() -> datafusion_common::error::Result<()> {
17511753
.with_expected_errors(Some(0))
17521754
.with_expected_rows(2)
17531755
.with_pruned_files(Some(0))
1754-
.with_matched_by_stats(Some(5)) // RG0, RG1, RG2 are matched by stats (c1 > 0)
1755-
.with_pruned_by_stats(Some(0)) // RG3 is pruned by stats (c1 = [-1, 0] does not satisfy c1 > 0)
1756-
.with_limit_pruned_row_groups(Some(4)) // RG1, RG2 are pruned by limit. (RG3 is already pruned by stats)
1756+
.with_matched_by_stats(Some(4))
1757+
.with_pruned_by_stats(Some(1))
1758+
.with_limit_pruned_row_groups(Some(2))
17571759
.test_row_group_prune_with_custom_data(schema, batches, 2)
17581760
.await;
17591761

0 commit comments

Comments
 (0)