Add unified query transpiler API (opensearch-project#4871)

dai-chen · web-flow · commit d4daa34d8313 · 2025-12-08T12:02:10.000-08:00
* Add basic transpiler impl

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Add builder

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Use lombok builder

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Modify unified query planner UT to extend new test base class

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Update doc with API design caveat

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Move opensearch spark sql dialect out of test folder

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Update doc and test assertion message

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

* Fix line separator and license header

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;

---------

Signed-off-by: Chen Dai &lt;daichen@amazon.com&gt;
diff --git a/api/README.md b/api/README.md
@@ -4,10 +4,21 @@ This module provides a high-level integration layer for the Calcite-based query
 
 ## Overview
 
-The `UnifiedQueryPlanner` serves as the primary entry point for external consumers. It accepts PPL (Piped Processing Language) queries and returns Calcite `RelNode` logical plans as intermediate representation.
+This module provides two primary components:
+
+- **`UnifiedQueryPlanner`**: Accepts PPL (Piped Processing Language) queries and returns Calcite `RelNode` logical plans as intermediate representation.
+- **`UnifiedQueryTranspiler`**: Converts Calcite logical plans (`RelNode`) into SQL strings for various target databases using different SQL dialects.
+
+Together, these components enable a complete workflow: parse PPL queries into logical plans, then transpile those plans into target database SQL.
+
+### Experimental API Design
+
+**This API is currently experimental.** The design intentionally exposes Calcite abstractions (`Schema` for catalogs, `RelNode` as IR, `SqlDialect` for dialects) rather than creating custom wrapper interfaces. This is to avoid overdesign by leveraging the flexible Calcite interface in the short term. If a more abstracted API becomes necessary in the future, breaking changes may be introduced with the new abstraction layer.
 
 ## Usage
 
+### UnifiedQueryPlanner
+
 Use the declarative, fluent builder API to initialize the `UnifiedQueryPlanner`.
 
 ```java
@@ -21,6 +32,49 @@ UnifiedQueryPlanner planner = UnifiedQueryPlanner.builder()
 RelNode plan = planner.plan("source = opensearch.test");
 ```
 
+### UnifiedQueryTranspiler
+
+Use `UnifiedQueryTranspiler` to convert Calcite logical plans into SQL strings for target databases. The transpiler supports various SQL dialects through Calcite's `SqlDialect` interface.
+
+```java
+UnifiedQueryTranspiler transpiler = UnifiedQueryTranspiler.builder()
+    .dialect(SparkSqlDialect.DEFAULT)
+    .build();
+
+String sql = transpiler.toSql(plan);
+```
+
+### Complete Workflow Example
+
+Combining both components to transpile PPL queries into target database SQL:
+
+```java
+// Step 1: Initialize planner
+UnifiedQueryPlanner planner = UnifiedQueryPlanner.builder()
+    .language(QueryType.PPL)
+    .catalog("catalog", schema)
+    .defaultNamespace("catalog")
+    .build();
+
+// Step 2: Parse PPL query into logical plan
+RelNode plan = planner.plan("source = employees | where age > 30");
+
+// Step 3: Initialize transpiler with target dialect
+UnifiedQueryTranspiler transpiler = UnifiedQueryTranspiler.builder()
+    .dialect(SparkSqlDialect.DEFAULT)
+    .build();
+
+// Step 4: Transpile to target SQL
+String sparkSql = transpiler.toSql(plan);
+// Result: SELECT * FROM `catalog`.`employees` WHERE `age` > 30
+```
+
+Supported SQL dialects include:
+- `SparkSqlDialect.DEFAULT` - Apache Spark SQL
+- `PostgresqlSqlDialect.DEFAULT` - PostgreSQL
+- `MysqlSqlDialect.DEFAULT` - MySQL
+- And other Calcite-supported dialects
+
 ## Development & Testing
 
 A set of unit tests is provided to validate planner behavior.
diff --git a/api/build.gradle b/api/build.gradle
@@ -5,6 +5,7 @@
 
 plugins {
     id 'java-library'
+    id "io.freefair.lombok"
     id 'jacoco'
     id 'com.diffplug.spotless'
 }
@@ -25,6 +26,10 @@ spotless {
             exclude '**/build/**', '**/build-*/**', 'src/main/gen/**'
         }
         importOrder()
+        licenseHeader("/*\n" +
+                " * Copyright OpenSearch Contributors\n" +
+                " * SPDX-License-Identifier: Apache-2.0\n" +
+                " */\n\n")
         removeUnusedImports()
         trimTrailingWhitespace()
         endWithNewline()
diff --git a/api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java b/api/src/main/java/org/opensearch/sql/api/EmptyDataSourceService.java
@@ -1,3 +1,8 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
 package org.opensearch.sql.api;
 
 import java.util.Map;
diff --git a/api/src/main/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspiler.java b/api/src/main/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspiler.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.api.transpiler;
+
+import lombok.Builder;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.rel2sql.RelToSqlConverter;
+import org.apache.calcite.sql.SqlDialect;
+import org.apache.calcite.sql.SqlNode;
+
+/**
+ * Transpiles Calcite logical plans ({@link RelNode}) into SQL strings for various target databases.
+ * Uses Calcite's {@link RelToSqlConverter} to perform the conversion, respecting the specified SQL
+ * dialect.
+ */
+@Builder
+public class UnifiedQueryTranspiler {
+
+  /** Target SQL dialect */
+  private final SqlDialect dialect;
+
+  /**
+   * Converts a Calcite logical plan to a SQL string using the configured target dialect.
+   *
+   * @param plan the logical plan to convert (must not be null)
+   * @return the generated SQL string
+   */
+  public String toSql(RelNode plan) {
+    try {
+      RelToSqlConverter converter = new RelToSqlConverter(dialect);
+      SqlNode sqlNode = converter.visitRoot(plan).asStatement();
+      return sqlNode.toSqlString(dialect).getSql();
+    } catch (Exception e) {
+      throw new IllegalStateException("Failed to transpile logical plan to SQL", e);
+    }
+  }
+}
diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerTest.java
@@ -8,41 +8,15 @@
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertThrows;
 
-import java.util.List;
 import java.util.Map;
 import org.apache.calcite.rel.RelNode;
-import org.apache.calcite.rel.type.RelDataType;
-import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.schema.Schema;
-import org.apache.calcite.schema.Table;
 import org.apache.calcite.schema.impl.AbstractSchema;
-import org.apache.calcite.schema.impl.AbstractTable;
-import org.apache.calcite.sql.type.SqlTypeName;
 import org.junit.Test;
 import org.opensearch.sql.common.antlr.SyntaxCheckException;
 import org.opensearch.sql.executor.QueryType;
 
-public class UnifiedQueryPlannerTest {
-
-  /** Test schema consists of a test table with id and name columns */
-  private final AbstractSchema testSchema =
-      new AbstractSchema() {
-        @Override
-        protected Map<String, Table> getTableMap() {
-          return Map.of(
-              "index",
-              new AbstractTable() {
-                @Override
-                public RelDataType getRowType(RelDataTypeFactory typeFactory) {
-                  return typeFactory.createStructType(
-                      List.of(
-                          typeFactory.createSqlType(SqlTypeName.INTEGER),
-                          typeFactory.createSqlType(SqlTypeName.VARCHAR)),
-                      List.of("id", "name"));
-                }
-              });
-        }
-      };
+public class UnifiedQueryPlannerTest extends UnifiedQueryTestBase {
 
   /** Test catalog consists of test schema above */
   private final AbstractSchema testDeepSchema =
@@ -61,7 +35,7 @@ public void testPPLQueryPlanning() {
             .catalog("opensearch", testSchema)
             .build();
 
-    RelNode plan = planner.plan("source = opensearch.index | eval f = abs(id)");
+    RelNode plan = planner.plan("source = opensearch.employees | eval f = abs(id)");
     assertNotNull("Plan should be created", plan);
   }
 
@@ -74,8 +48,8 @@ public void testPPLQueryPlanningWithDefaultNamespace() {
             .defaultNamespace("opensearch")
             .build();
 
-    assertNotNull("Plan should be created", planner.plan("source = opensearch.index"));
-    assertNotNull("Plan should be created", planner.plan("source = index"));
+    assertNotNull("Plan should be created", planner.plan("source = opensearch.employees"));
+    assertNotNull("Plan should be created", planner.plan("source = employees"));
   }
 
   @Test
@@ -87,12 +61,12 @@ public void testPPLQueryPlanningWithDefaultNamespaceMultiLevel() {
             .defaultNamespace("catalog.opensearch")
             .build();
 
-    assertNotNull("Plan should be created", planner.plan("source = catalog.opensearch.index"));
-    assertNotNull("Plan should be created", planner.plan("source = index"));
+    assertNotNull("Plan should be created", planner.plan("source = catalog.opensearch.employees"));
+    assertNotNull("Plan should be created", planner.plan("source = employees"));
 
     // This is valid in SparkSQL, but Calcite requires "catalog" as the default root schema to
     // resolve it
-    assertThrows(IllegalStateException.class, () -> planner.plan("source = opensearch.index"));
+    assertThrows(IllegalStateException.class, () -> planner.plan("source = opensearch.employees"));
   }
 
   @Test
@@ -105,7 +79,8 @@ public void testPPLQueryPlanningWithMultipleCatalogs() {
             .build();
 
     RelNode plan =
-        planner.plan("source = catalog1.index | lookup catalog2.index id | eval f = abs(id)");
+        planner.plan(
+            "source = catalog1.employees | lookup catalog2.employees id | eval f = abs(id)");
     assertNotNull("Plan should be created with multiple catalogs", plan);
   }
 
@@ -119,7 +94,8 @@ public void testPPLQueryPlanningWithMultipleCatalogsAndDefaultNamespace() {
             .defaultNamespace("catalog2")
             .build();
 
-    RelNode plan = planner.plan("source = catalog1.index | lookup index id | eval f = abs(id)");
+    RelNode plan =
+        planner.plan("source = catalog1.employees | lookup employees id | eval f = abs(id)");
     assertNotNull("Plan should be created with multiple catalogs", plan);
   }
 
@@ -132,7 +108,7 @@ public void testPPLQueryPlanningWithMetadataCaching() {
             .cacheMetadata(true)
             .build();
 
-    RelNode plan = planner.plan("source = opensearch.index");
+    RelNode plan = planner.plan("source = opensearch.employees");
     assertNotNull("Plan should be created", plan);
   }
 
@@ -166,7 +142,7 @@ public void testUnsupportedStatementType() {
             .catalog("opensearch", testSchema)
             .build();
 
-    planner.plan("explain source = index"); // explain statement
+    planner.plan("explain source = employees"); // explain statement
   }
 
   @Test(expected = SyntaxCheckException.class)
@@ -177,6 +153,6 @@ public void testPlanPropagatingSyntaxCheckException() {
             .catalog("opensearch", testSchema)
             .build();
 
-    planner.plan("source = index | eval"); // Trigger syntax error from parser
+    planner.plan("source = employees | eval"); // Trigger syntax error from parser
   }
 }
diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/test/java/org/opensearch/sql/api/UnifiedQueryTestBase.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.api;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.schema.Table;
+import org.apache.calcite.schema.impl.AbstractSchema;
+import org.apache.calcite.schema.impl.AbstractTable;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.junit.Before;
+import org.opensearch.sql.executor.QueryType;
+
+/** Base class for unified query tests providing common test schema and utilities. */
+public abstract class UnifiedQueryTestBase {
+
+  /** Test schema containing sample tables for testing */
+  protected AbstractSchema testSchema;
+
+  /** Unified query planner configured with test schema */
+  protected UnifiedQueryPlanner planner;
+
+  @Before
+  public void setUp() {
+    testSchema =
+        new AbstractSchema() {
+          @Override
+          protected Map<String, Table> getTableMap() {
+            return Map.of("employees", createEmployeesTable());
+          }
+        };
+
+    planner =
+        UnifiedQueryPlanner.builder()
+            .language(QueryType.PPL)
+            .catalog("catalog", testSchema)
+            .defaultNamespace("catalog")
+            .build();
+  }
+
+  protected Table createEmployeesTable() {
+    return new AbstractTable() {
+      @Override
+      public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+        return typeFactory.createStructType(
+            List.of(
+                typeFactory.createSqlType(SqlTypeName.INTEGER),
+                typeFactory.createSqlType(SqlTypeName.VARCHAR),
+                typeFactory.createSqlType(SqlTypeName.INTEGER),
+                typeFactory.createSqlType(SqlTypeName.VARCHAR)),
+            List.of("id", "name", "age", "department"));
+      }
+    };
+  }
+}
diff --git a/api/src/test/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspilerTest.java b/api/src/test/java/org/opensearch/sql/api/transpiler/UnifiedQueryTranspilerTest.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.api.transpiler;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.sql.dialect.SparkSqlDialect;
+import org.junit.Before;
+import org.junit.Test;
+import org.opensearch.sql.api.UnifiedQueryTestBase;
+import org.opensearch.sql.ppl.calcite.OpenSearchSparkSqlDialect;
+
+public class UnifiedQueryTranspilerTest extends UnifiedQueryTestBase {
+
+  private UnifiedQueryTranspiler transpiler;
+
+  @Before
+  public void setUp() {
+    super.setUp();
+    transpiler = UnifiedQueryTranspiler.builder().dialect(SparkSqlDialect.DEFAULT).build();
+  }
+
+  @Test
+  public void testToSql() {
+    String pplQuery = "source = employees";
+    RelNode plan = planner.plan(pplQuery);
+
+    String actualSql = transpiler.toSql(plan);
+    String expectedSql = normalize("SELECT *\nFROM `catalog`.`employees`");
+    assertEquals(
+        "Transpiled SQL using SparkSqlDialect should match expected SQL", expectedSql, actualSql);
+  }
+
+  @Test
+  public void testToSqlWithCustomDialect() {
+    String pplQuery = "source = employees | where name = 123";
+    RelNode plan = planner.plan(pplQuery);
+
+    UnifiedQueryTranspiler customTranspiler =
+        UnifiedQueryTranspiler.builder().dialect(OpenSearchSparkSqlDialect.DEFAULT).build();
+    String actualSql = customTranspiler.toSql(plan);
+    String expectedSql =
+        normalize(
+            "SELECT *\nFROM `catalog`.`employees`\nWHERE TRY_CAST(`name` AS DOUBLE) = 1.230E2");
+    assertEquals(
+        "Transpiled query using OpenSearchSparkSqlDialect should translate SAFE_CAST to TRY_CAST",
+        expectedSql,
+        actualSql);
+  }
+
+  /** Normalizes line endings to platform-specific format for cross-platform test compatibility. */
+  private String normalize(String sql) {
+    return sql.replace("\n", System.lineSeparator());
+  }
+}
diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java b/ppl/src/main/java/org/opensearch/sql/ppl/calcite/OpenSearchSparkSqlDialect.java