Add microbenchmark for unified query APIs

dai-chen · dai-chen · commit 0762a1abb027 · 2026-01-14T14:17:18.000-08:00
Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;
diff --git a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -10,7 +10,18 @@ The microbenchmark suite is also handy for ad-hoc microbenchmarks but please rem
 
 ## Getting Started
 
-Just run `./gradlew :benchmarks:jmh` from the project root directory or run specific benchmark via your IDE. It will build all microbenchmarks, execute them and print the result.
+Run all benchmarks from the project root directory:
+
+```bash
+./gradlew :benchmarks:jmh
+```
+
+Run specific benchmarks using the `-Pjmh.includes` parameter:
+
+```bash
+./gradlew :benchmarks:jmh -Pjmh.includes='UnifiedQueryBenchmark'
+./gradlew :benchmarks:jmh -Pjmh.includes='UnifiedQueryBenchmark.plan.*'
+```
 
 ## Adding Microbenchmarks
 
diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle
@@ -15,6 +15,8 @@ repositories {
 dependencies {
     implementation project(':core')
     implementation project(':opensearch')
+    implementation project(':api')
+    jmhImplementation testFixtures(project(':api'))
 
     // Dependencies required by JMH micro benchmark
     api group: 'org.openjdk.jmh', name: 'jmh-core', version: '1.36'
@@ -30,4 +32,9 @@ spotless {
     }
 }
 
+// JMH configuration passed via command line
+jmh {
+    includes = project.hasProperty('jmh.includes') ? [project.property('jmh.includes')] : []
+}
+
 compileJava.options.compilerArgs.addAll(["-processor", "org.openjdk.jmh.generators.BenchmarkProcessor"])
diff --git a/benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java b/benchmarks/src/jmh/java/org/opensearch/sql/api/UnifiedQueryBenchmark.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.api;
+
+import java.sql.PreparedStatement;
+import java.util.concurrent.TimeUnit;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.sql.dialect.SparkSqlDialect;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.opensearch.sql.api.compiler.UnifiedQueryCompiler;
+import org.opensearch.sql.api.transpiler.UnifiedQueryTranspiler;
+
+/**
+ * JMH benchmark for measuring the overhead of unified query API components when processing PPL
+ * queries. This provides baseline metrics for integration with the opensearch-spark repository.
+ *
+ * <p>Benchmarks cover:
+ *
+ * <ul>
+ *   <li>{@link UnifiedQueryPlanner}: PPL parsing and Calcite logical plan generation
+ *   <li>{@link UnifiedQueryTranspiler}: Logical plan to SQL string conversion
+ *   <li>{@link UnifiedQueryCompiler}: Logical plan to executable statement compilation
+ * </ul>
+ *
+ * <p>Query patterns tested:
+ *
+ * <ul>
+ *   <li>Simple source scan
+ *   <li>Filter with WHERE clause
+ *   <li>Aggregation with GROUP BY
+ *   <li>Sort with ORDER BY
+ *   <li>Combined operations (filter + aggregation + sort)
+ * </ul>
+ */
+@Warmup(iterations = 2, time = 1)
+@Measurement(iterations = 5, time = 1)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Thread)
+@Fork(value = 1)
+public class UnifiedQueryBenchmark extends UnifiedQueryTestBase {
+
+  /** Common PPL query patterns for benchmarking. */
+  @Param({
+    "source = catalog.employees",
+    "source = catalog.employees | where age > 30",
+    "source = catalog.employees | stats count() by department",
+    "source = catalog.employees | sort - age",
+    "source = catalog.employees | where age > 25 | stats avg(age) by department | sort - department"
+  })
+  private String pplQuery;
+
+  private UnifiedQueryTranspiler transpiler;
+  private UnifiedQueryCompiler compiler;
+
+  @Setup(Level.Trial)
+  public void setUpBenchmark() {
+    super.setUp();
+    transpiler = UnifiedQueryTranspiler.builder().dialect(SparkSqlDialect.DEFAULT).build();
+    compiler = new UnifiedQueryCompiler(context);
+  }
+
+  @TearDown(Level.Trial)
+  public void tearDownBenchmark() throws Exception {
+    super.tearDown();
+  }
+
+  /** Benchmarks PPL parsing and Calcite logical plan generation. */
+  @Benchmark
+  public RelNode planPplQuery() {
+    return planner.plan(pplQuery);
+  }
+
+  /** Benchmarks the full transpilation pipeline: PPL → logical plan → SQL string. */
+  @Benchmark
+  public String transpilePplToSparkSql() {
+    RelNode plan = planner.plan(pplQuery);
+    return transpiler.toSql(plan);
+  }
+
+  /** Benchmarks the compilation pipeline: PPL → logical plan → executable statement. */
+  @Benchmark
+  public PreparedStatement compilePplQuery() {
+    RelNode plan = planner.plan(pplQuery);
+    return compiler.compile(plan);
+  }
+}