Skip to content

Commit 5089cf4

Browse files
adriangbclaude
andcommitted
refactor(parquet-datasource): extract ExpectedPruning into a shared test_util module
`ExpectedPruning` is the only test helper shared between the row-group statistics tests and the bloom-filter tests. Move it into a new `#[cfg(test)] mod test_util` so the bloom-filter tests can be relocated into `bloom_filter.rs` without it. Its `assert` method previously reached into the private `access_plan` field of `RowGroupAccessPlanFilter`; add a test-only `pub(crate)` `access_plan()` accessor so the helper works from a sibling module without widening the field's visibility. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1337959 commit 5089cf4

3 files changed

Lines changed: 83 additions & 48 deletions

File tree

datafusion/datasource-parquet/src/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ mod row_group_filter;
3838
mod sort;
3939
pub mod source;
4040
mod supported_predicates;
41+
#[cfg(test)]
42+
mod test_util;
4143
mod writer;
4244

4345
pub use access_plan::{ParquetAccessPlan, RowGroupAccess};

datafusion/datasource-parquet/src/row_group_filter.rs

Lines changed: 10 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,15 @@ impl RowGroupAccessPlanFilter {
7575
self.access_plan
7676
}
7777

78+
/// Returns a reference to the inner access plan.
79+
///
80+
/// Test-only accessor used by the shared assertion helpers in
81+
/// [`crate::test_util`].
82+
#[cfg(test)]
83+
pub(crate) fn access_plan(&self) -> &ParquetAccessPlan {
84+
&self.access_plan
85+
}
86+
7887
/// Returns the is_fully_matched vector.
7988
pub fn is_fully_matched(&self) -> &Vec<bool> {
8089
self.access_plan.fully_matched()
@@ -519,6 +528,7 @@ mod tests {
519528

520529
use super::*;
521530
use crate::reader::ParquetFileReader;
531+
use crate::test_util::ExpectedPruning;
522532

523533
use arrow::datatypes::DataType::Decimal128;
524534
use arrow::datatypes::{DataType, Field};
@@ -1525,54 +1535,6 @@ mod tests {
15251535
.await
15261536
}
15271537

1528-
// What row groups are expected to be left after pruning
1529-
#[derive(Debug)]
1530-
enum ExpectedPruning {
1531-
All,
1532-
/// Only the specified row groups are expected to REMAIN (not what is pruned)
1533-
Some(Vec<usize>),
1534-
None,
1535-
}
1536-
1537-
impl ExpectedPruning {
1538-
/// asserts that the pruned row group match this expectation
1539-
fn assert(&self, row_groups: &RowGroupAccessPlanFilter) {
1540-
let num_row_groups = row_groups.access_plan.len();
1541-
assert!(num_row_groups > 0);
1542-
let num_pruned = (0..num_row_groups)
1543-
.filter_map(|i| {
1544-
if row_groups.access_plan.should_scan(i) {
1545-
None
1546-
} else {
1547-
Some(1)
1548-
}
1549-
})
1550-
.sum::<usize>();
1551-
1552-
match self {
1553-
Self::All => {
1554-
assert_eq!(
1555-
num_row_groups, num_pruned,
1556-
"Expected all row groups to be pruned, but got {row_groups:?}"
1557-
);
1558-
}
1559-
ExpectedPruning::None => {
1560-
assert_eq!(
1561-
num_pruned, 0,
1562-
"Expected no row groups to be pruned, but got {row_groups:?}"
1563-
);
1564-
}
1565-
ExpectedPruning::Some(expected) => {
1566-
let actual = row_groups.access_plan.row_group_indexes();
1567-
assert_eq!(
1568-
expected, &actual,
1569-
"Unexpected row groups pruned. Expected {expected:?}, got {actual:?}"
1570-
);
1571-
}
1572-
}
1573-
}
1574-
}
1575-
15761538
fn assert_pruned(row_groups: RowGroupAccessPlanFilter, expected: ExpectedPruning) {
15771539
expected.assert(&row_groups);
15781540
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Test helpers shared across the Parquet datasource crate.
19+
20+
use crate::row_group_filter::RowGroupAccessPlanFilter;
21+
22+
/// What row groups are expected to be left after pruning.
23+
#[derive(Debug)]
24+
pub(crate) enum ExpectedPruning {
25+
/// All row groups are expected to be pruned.
26+
All,
27+
/// Only the specified row groups are expected to REMAIN (not what is pruned).
28+
Some(Vec<usize>),
29+
/// No row groups are expected to be pruned.
30+
None,
31+
}
32+
33+
impl ExpectedPruning {
34+
/// Asserts that the pruned row groups match this expectation.
35+
pub(crate) fn assert(&self, row_groups: &RowGroupAccessPlanFilter) {
36+
let access_plan = row_groups.access_plan();
37+
let num_row_groups = access_plan.len();
38+
assert!(num_row_groups > 0);
39+
let num_pruned = (0..num_row_groups)
40+
.filter_map(|i| {
41+
if access_plan.should_scan(i) {
42+
None
43+
} else {
44+
Some(1)
45+
}
46+
})
47+
.sum::<usize>();
48+
49+
match self {
50+
Self::All => {
51+
assert_eq!(
52+
num_row_groups, num_pruned,
53+
"Expected all row groups to be pruned, but got {row_groups:?}"
54+
);
55+
}
56+
ExpectedPruning::None => {
57+
assert_eq!(
58+
num_pruned, 0,
59+
"Expected no row groups to be pruned, but got {row_groups:?}"
60+
);
61+
}
62+
ExpectedPruning::Some(expected) => {
63+
let actual = access_plan.row_group_indexes();
64+
assert_eq!(
65+
expected, &actual,
66+
"Unexpected row groups pruned. Expected {expected:?}, got {actual:?}"
67+
);
68+
}
69+
}
70+
}
71+
}

0 commit comments

Comments
 (0)