|
18 | 18 | */ |
19 | 19 | package org.apache.parquet.hadoop; |
20 | 20 |
|
| 21 | +import static org.apache.parquet.hadoop.ParquetFileReaderMetrics.PagesIncluded; |
| 22 | +import static org.apache.parquet.hadoop.ParquetFileReaderMetrics.PagesSkipped; |
| 23 | + |
21 | 24 | import it.unimi.dsi.fastutil.ints.IntArrayList; |
22 | 25 | import it.unimi.dsi.fastutil.ints.IntList; |
23 | 26 | import java.util.ArrayList; |
@@ -129,14 +132,30 @@ public String toString() { |
129 | 132 | /* |
130 | 133 | * Returns the filtered offset index containing only the pages which are overlapping with rowRanges. |
131 | 134 | */ |
132 | | - static OffsetIndex filterOffsetIndex(OffsetIndex offsetIndex, RowRanges rowRanges, long totalRowCount) { |
| 135 | + static OffsetIndex filterOffsetIndex( |
| 136 | + OffsetIndex offsetIndex, |
| 137 | + RowRanges rowRanges, |
| 138 | + long totalRowCount, |
| 139 | + org.apache.parquet.ParquetReadOptions options) { |
133 | 140 | IntList indexMap = new IntArrayList(); |
| 141 | + int pagesIncluded = 0; |
| 142 | + int pagesSkipped = 0; |
134 | 143 | for (int i = 0, n = offsetIndex.getPageCount(); i < n; ++i) { |
135 | 144 | long from = offsetIndex.getFirstRowIndex(i); |
136 | 145 | if (rowRanges.isOverlapping(from, offsetIndex.getLastRowIndex(i, totalRowCount))) { |
137 | 146 | indexMap.add(i); |
| 147 | + pagesIncluded++; |
| 148 | + } else { |
| 149 | + pagesSkipped++; |
138 | 150 | } |
139 | 151 | } |
| 152 | + |
| 153 | + if (options != null && options.getMetricsCallback() != null) { |
| 154 | + final ParquetMetricsCallback metricsCallback = options.getMetricsCallback(); |
| 155 | + metricsCallback.setValueInt(PagesIncluded.name(), pagesIncluded); |
| 156 | + metricsCallback.setValueInt(PagesSkipped.name(), pagesSkipped); |
| 157 | + } |
| 158 | + |
140 | 159 | return new FilteredOffsetIndex(offsetIndex, indexMap.toIntArray()); |
141 | 160 | } |
142 | 161 |
|
|
0 commit comments