Skip to content

Commit 725d8bf

Browse files
authored
Revert "Row group limit pruning"
1 parent 19af083 commit 725d8bf

9 files changed

Lines changed: 18 additions & 881 deletions

File tree

datafusion/core/tests/parquet/mod.rs

Lines changed: 5 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -150,11 +150,6 @@ impl TestOutput {
150150
self.metric_value("row_groups_matched_statistics")
151151
}
152152

153-
/// The number of row_groups fully matched by statistics
154-
fn row_groups_fully_matched_statistics(&self) -> Option<usize> {
155-
self.metric_value("row_groups_fully_matched_statistics")
156-
}
157-
158153
/// The number of row_groups pruned by statistics
159154
fn row_groups_pruned_statistics(&self) -> Option<usize> {
160155
self.metric_value("row_groups_pruned_statistics")
@@ -183,11 +178,6 @@ impl TestOutput {
183178
self.metric_value("page_index_rows_pruned")
184179
}
185180

186-
/// The number of row groups pruned by limit pruning
187-
fn limit_pruned_row_groups(&self) -> Option<usize> {
188-
self.metric_value("limit_pruned_row_groups")
189-
}
190-
191181
fn description(&self) -> String {
192182
format!(
193183
"Input:\n{}\nQuery:\n{}\nOutput:\n{}\nMetrics:\n{}",
@@ -201,41 +191,20 @@ impl TestOutput {
201191
/// and the appropriate scenario
202192
impl ContextWithParquet {
203193
async fn new(scenario: Scenario, unit: Unit) -> Self {
204-
Self::with_config(scenario, unit, SessionConfig::new(), None, None).await
205-
}
206-
207-
/// Set custom schema and batches for the test
208-
pub async fn with_custom_data(
209-
scenario: Scenario,
210-
unit: Unit,
211-
schema: Arc<Schema>,
212-
batches: Vec<RecordBatch>,
213-
) -> Self {
214-
Self::with_config(
215-
scenario,
216-
unit,
217-
SessionConfig::new(),
218-
Some(schema),
219-
Some(batches),
220-
)
221-
.await
194+
Self::with_config(scenario, unit, SessionConfig::new()).await
222195
}
223196

224197
async fn with_config(
225198
scenario: Scenario,
226199
unit: Unit,
227200
mut config: SessionConfig,
228-
custom_schema: Option<Arc<Schema>>,
229-
custom_batches: Option<Vec<RecordBatch>>,
230201
) -> Self {
231202
// Use a single partition for deterministic results no matter how many CPUs the host has
232203
config = config.with_target_partitions(1);
233204
let file = match unit {
234205
Unit::RowGroup(row_per_group) => {
235206
config = config.with_parquet_bloom_filter_pruning(true);
236-
config.options_mut().execution.parquet.pushdown_filters = true;
237-
make_test_file_rg(scenario, row_per_group, custom_schema, custom_batches)
238-
.await
207+
make_test_file_rg(scenario, row_per_group).await
239208
}
240209
Unit::Page(row_per_page) => {
241210
config = config.with_parquet_page_index_pruning(true);
@@ -1061,12 +1030,7 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
10611030
}
10621031

10631032
/// Create a test parquet file with various data types
1064-
async fn make_test_file_rg(
1065-
scenario: Scenario,
1066-
row_per_group: usize,
1067-
custom_schema: Option<Arc<Schema>>,
1068-
custom_batches: Option<Vec<RecordBatch>>,
1069-
) -> NamedTempFile {
1033+
async fn make_test_file_rg(scenario: Scenario, row_per_group: usize) -> NamedTempFile {
10701034
let mut output_file = tempfile::Builder::new()
10711035
.prefix("parquet_pruning")
10721036
.suffix(".parquet")
@@ -1079,14 +1043,8 @@ async fn make_test_file_rg(
10791043
.set_statistics_enabled(EnabledStatistics::Page)
10801044
.build();
10811045

1082-
let (batches, schema) =
1083-
if let (Some(schema), Some(batches)) = (custom_schema, custom_batches) {
1084-
(batches, schema)
1085-
} else {
1086-
let batches = create_data_batch(scenario);
1087-
let schema = batches[0].schema();
1088-
(batches, schema)
1089-
};
1046+
let batches = create_data_batch(scenario);
1047+
let schema = batches[0].schema();
10901048

10911049
let mut writer = ArrowWriter::try_new(&mut output_file, schema, Some(props)).unwrap();
10921050

0 commit comments

Comments
 (0)