Skip to content

Commit e5905e2

Browse files
committed
Conditionally build page pruning predicates
Page pruning predicates in the Parquet opener are constructed regardless of whether enable_page_index is set. Under high query load, this uses significant CPU time although these predicates are created and discarded quickly. This commit reorders the predicate creation flow to only construct page pruning predicates if enable_page_index is enabled. Regular predicates are created always as before.
1 parent 91c2e04 commit e5905e2

File tree

2 files changed

+19
-32
lines changed

2 files changed

+19
-32
lines changed

datafusion/datasource-parquet/src/opener.rs

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -772,12 +772,22 @@ impl MetadataLoadedParquetOpen {
772772
prepared.physical_file_schema = Arc::clone(&physical_file_schema);
773773

774774
// Build predicates for this specific file
775-
let (pruning_predicate, page_pruning_predicate) = build_pruning_predicates(
775+
let pruning_predicate = build_pruning_predicates(
776776
prepared.predicate.as_ref(),
777777
&physical_file_schema,
778778
&prepared.predicate_creation_errors,
779779
);
780780

781+
// Only build page pruning predicate if page index is enabled
782+
let page_pruning_predicate = if prepared.enable_page_index {
783+
prepared.predicate.as_ref().and_then(|predicate| {
784+
let p = build_page_pruning_predicate(predicate, &physical_file_schema);
785+
(p.filter_number() > 0).then_some(p)
786+
})
787+
} else {
788+
None
789+
};
790+
781791
Ok(FiltersPreparedParquetOpen {
782792
loaded: MetadataLoadedParquetOpen {
783793
prepared,
@@ -797,10 +807,7 @@ impl FiltersPreparedParquetOpen {
797807
// metadata load above may not have read the page index structures yet.
798808
// If we need them for reading and they aren't yet loaded, we need to
799809
// load them now.
800-
if should_enable_page_index(
801-
self.loaded.prepared.enable_page_index,
802-
&self.page_pruning_predicate,
803-
) {
810+
if self.page_pruning_predicate.is_some() {
804811
self.loaded.reader_metadata = load_page_index(
805812
self.loaded.reader_metadata,
806813
&mut self.loaded.prepared.async_file_reader,
@@ -1513,20 +1520,11 @@ pub(crate) fn build_pruning_predicates(
15131520
predicate: Option<&Arc<dyn PhysicalExpr>>,
15141521
file_schema: &SchemaRef,
15151522
predicate_creation_errors: &Count,
1516-
) -> (
1517-
Option<Arc<PruningPredicate>>,
1518-
Option<Arc<PagePruningAccessPlanFilter>>,
1519-
) {
1523+
) -> Option<Arc<PruningPredicate>> {
15201524
let Some(predicate) = predicate.as_ref() else {
1521-
return (None, None);
1525+
return None;
15221526
};
1523-
let pruning_predicate = build_pruning_predicate(
1524-
Arc::clone(predicate),
1525-
file_schema,
1526-
predicate_creation_errors,
1527-
);
1528-
let page_pruning_predicate = build_page_pruning_predicate(predicate, file_schema);
1529-
(pruning_predicate, Some(page_pruning_predicate))
1527+
build_pruning_predicate(Arc::clone(predicate), file_schema, predicate_creation_errors)
15301528
}
15311529

15321530
/// Returns a `ArrowReaderMetadata` with the page index loaded, loading
@@ -1560,17 +1558,6 @@ async fn load_page_index<T: AsyncFileReader>(
15601558
}
15611559
}
15621560

1563-
fn should_enable_page_index(
1564-
enable_page_index: bool,
1565-
page_pruning_predicate: &Option<Arc<PagePruningAccessPlanFilter>>,
1566-
) -> bool {
1567-
enable_page_index
1568-
&& page_pruning_predicate.is_some()
1569-
&& page_pruning_predicate
1570-
.as_ref()
1571-
.map(|p| p.filter_number() > 0)
1572-
.unwrap_or(false)
1573-
}
15741561

15751562
#[cfg(test)]
15761563
mod test {

datafusion/datasource-parquet/src/source.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -627,17 +627,17 @@ impl FileSource for ParquetSource {
627627
write!(f, ", reverse_row_groups=true")?;
628628
}
629629

630-
// Try to build a the pruning predicates.
630+
// Try to build the pruning predicates.
631631
// These are only generated here because it's useful to have *some*
632632
// idea of what pushdown is happening when viewing plans.
633-
// However it is important to note that these predicates are *not*
633+
// However, it is important to note that these predicates are *not*
634634
// necessarily the predicates that are actually evaluated:
635635
// the actual predicates are built in reference to the physical schema of
636636
// each file, which we do not have at this point and hence cannot use.
637-
// Instead we use the logical schema of the file (the table schema without partition columns).
637+
// Instead, we use the logical schema of the file (the table schema without partition columns).
638638
if let Some(predicate) = &self.predicate {
639639
let predicate_creation_errors = Count::new();
640-
if let (Some(pruning_predicate), _) = build_pruning_predicates(
640+
if let Some(pruning_predicate) = build_pruning_predicates(
641641
Some(predicate),
642642
self.table_schema.table_schema(),
643643
&predicate_creation_errors,

0 commit comments

Comments
 (0)