Skip to content

Commit 6297e2f

Browse files
committed
bench: add pathBasedReader A/B comparison benchmark to IcebergReaderBenchmark
1 parent a740462 commit 6297e2f

2 files changed

Lines changed: 45 additions & 0 deletions

File tree

benchmarks/pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,12 @@
240240
<version>${project.parent.version}</version>
241241
<scope>test</scope>
242242
</dependency>
243+
<dependency>
244+
<groupId>org.apache.druid.extensions</groupId>
245+
<artifactId>druid-parquet-extensions</artifactId>
246+
<version>${project.parent.version}</version>
247+
<scope>test</scope>
248+
</dependency>
243249
<dependency>
244250
<groupId>org.apache.iceberg</groupId>
245251
<artifactId>iceberg-arrow</artifactId>

benchmarks/src/test/java/org/apache/druid/benchmark/IcebergReaderBenchmark.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,15 @@
2828
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
2929
import org.apache.druid.data.input.impl.LongDimensionSchema;
3030
import org.apache.druid.data.input.impl.StringDimensionSchema;
31+
import org.apache.druid.data.input.impl.LocalInputSourceFactory;
3132
import org.apache.druid.data.input.impl.TimestampSpec;
33+
import org.apache.druid.data.input.InputSourceReader;
34+
import org.apache.druid.data.input.parquet.ParquetInputFormat;
3235
import org.apache.druid.iceberg.input.IcebergArrowInputSourceReader;
36+
import org.apache.druid.iceberg.input.IcebergInputSource;
3337
import org.apache.druid.iceberg.input.LocalCatalog;
3438
import org.apache.druid.java.util.common.parsers.CloseableIterator;
39+
import org.apache.hadoop.conf.Configuration;
3540
import org.apache.iceberg.DataFile;
3641
import org.apache.iceberg.PartitionSpec;
3742
import org.apache.iceberg.Schema;
@@ -199,6 +204,40 @@ public void arrowReaderLargeBatch(final Blackhole bh) throws IOException
199204
}
200205
}
201206

207+
/**
208+
* Existing path-based reader (current production behaviour when useArrowReader=false):
209+
* IcebergCatalog extracts data-file paths from the snapshot, then a LocalInputSource +
210+
* ParquetInputFormat re-opens and re-parses each Parquet file generically.
211+
* No delete-file awareness, no scan-level column projection, no schema evolution.
212+
*/
213+
@Benchmark
214+
public void pathBasedReader(final Blackhole bh) throws IOException
215+
{
216+
final IcebergInputSource source = new IcebergInputSource(
217+
TABLE,
218+
NAMESPACE,
219+
null,
220+
catalog,
221+
new LocalInputSourceFactory(),
222+
null,
223+
null,
224+
false,
225+
0
226+
);
227+
final ParquetInputFormat parquetFormat = new ParquetInputFormat(null, null, new Configuration());
228+
final InputSourceReader reader = source.reader(inputRowSchema, parquetFormat, warehouseDir);
229+
int count = 0;
230+
try (CloseableIterator<InputRow> it = reader.read(NoopStats.INSTANCE)) {
231+
while (it.hasNext()) {
232+
bh.consume(it.next());
233+
count++;
234+
}
235+
}
236+
if (count != numRows) {
237+
throw new RuntimeException("Expected " + numRows + " rows but got " + count);
238+
}
239+
}
240+
202241
// --- helpers ---
203242

204243
private static Schema buildSchema(final int numColumns)

0 commit comments

Comments
 (0)