Skip to content

Commit fbbeb86

Browse files
committed
feat(api): Define unified SQL language spec with composable extensions (#5346)
Introduce UnifiedSqlSpec that configures Calcite's parser/validator seams to accept OpenSearch SQL syntax, with a composable SqlExtension interface for domain-specific functions and AST rewriters. - Lex.BIG_QUERY: hyphenated identifiers, backtick quoting - SqlBabelParserImpl: de-reserves MATCH and most keywords - SqlConformanceEnum.BABEL: lenient GROUP BY, LIMIT, optional FROM - SearchExtension: relevance functions + NamedArgRewriter Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 3f740fb commit fbbeb86

8 files changed

Lines changed: 339 additions & 30 deletions

File tree

api/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ plugins {
1313

1414
dependencies {
1515
api project(':ppl')
16+
api group: 'org.apache.calcite', name: 'calcite-babel', version: '1.41.0'
1617

1718
testImplementation testFixtures(project(':api'))
1819
testImplementation group: 'junit', name: 'junit', version: '4.13.2'

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,19 @@
1717
import java.util.concurrent.Callable;
1818
import lombok.AllArgsConstructor;
1919
import lombok.Getter;
20-
import org.apache.calcite.avatica.util.Casing;
2120
import org.apache.calcite.jdbc.CalciteSchema;
2221
import org.apache.calcite.plan.RelTraitDef;
2322
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
2423
import org.apache.calcite.schema.Schema;
2524
import org.apache.calcite.schema.SchemaPlus;
26-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
27-
import org.apache.calcite.sql.parser.SqlParser;
28-
import org.apache.calcite.sql.util.SqlOperatorTables;
2925
import org.apache.calcite.tools.FrameworkConfig;
3026
import org.apache.calcite.tools.Frameworks;
3127
import org.apache.calcite.tools.Programs;
3228
import org.opensearch.sql.api.parser.CalciteSqlQueryParser;
3329
import org.opensearch.sql.api.parser.PPLQueryParser;
3430
import org.opensearch.sql.api.parser.UnifiedQueryParser;
35-
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
31+
import org.opensearch.sql.api.spec.LanguageSpec;
32+
import org.opensearch.sql.api.spec.UnifiedSqlSpec;
3633
import org.opensearch.sql.calcite.CalcitePlanContext;
3734
import org.opensearch.sql.calcite.SysLimit;
3835
import org.opensearch.sql.common.setting.Settings;
@@ -60,6 +57,9 @@ public class UnifiedQueryContext implements AutoCloseable {
6057
/** Query parser created eagerly from this context's configuration. */
6158
private final UnifiedQueryParser<?> parser;
6259

60+
/** Language spec for SQL, or null for PPL (TODO: converge PPL onto LanguageSpec). */
61+
private final LanguageSpec langSpec;
62+
6363
/**
6464
* Returns the profiling result. Call after query execution to retrieve collected metrics. Returns
6565
* empty if profiling was not enabled.
@@ -208,12 +208,14 @@ public Builder setting(String name, Object value) {
208208
public UnifiedQueryContext build() {
209209
Objects.requireNonNull(queryType, "Must specify language before build");
210210

211+
LanguageSpec langSpec = (queryType == QueryType.SQL) ? UnifiedSqlSpec.extended() : null;
211212
Settings settings = buildSettings();
212213
CalcitePlanContext planContext =
213214
CalcitePlanContext.create(
214-
buildFrameworkConfig(), SysLimit.fromSettings(settings), queryType);
215+
buildFrameworkConfig(langSpec), SysLimit.fromSettings(settings), queryType);
215216
QueryProfiling.activate(profiling);
216-
return new UnifiedQueryContext(planContext, settings, createParser(planContext, settings));
217+
return new UnifiedQueryContext(
218+
planContext, settings, createParser(planContext, settings), langSpec);
217219
}
218220

219221
private UnifiedQueryParser<?> createParser(CalcitePlanContext planContext, Settings settings) {
@@ -239,25 +241,24 @@ public List<?> getSettings() {
239241
}
240242

241243
@SuppressWarnings({"rawtypes"})
242-
private FrameworkConfig buildFrameworkConfig() {
244+
private FrameworkConfig buildFrameworkConfig(LanguageSpec langSpec) {
243245
SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus();
244246
catalogs.forEach(rootSchema::add);
245247

246248
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
247-
return Frameworks.newConfigBuilder()
248-
.parserConfig(buildParserConfig())
249-
.operatorTable(
250-
SqlOperatorTables.chain(
251-
SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable()))
252-
.defaultSchema(defaultSchema)
253-
.traitDefs((List<RelTraitDef>) null)
254-
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))
255-
.build();
256-
}
249+
Frameworks.ConfigBuilder builder =
250+
Frameworks.newConfigBuilder()
251+
.defaultSchema(defaultSchema)
252+
.traitDefs((List<RelTraitDef>) null)
253+
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE));
257254

258-
private SqlParser.Config buildParserConfig() {
259-
// Preserve identifier case for lowercase OpenSearch index names
260-
return SqlParser.Config.DEFAULT.withUnquotedCasing(Casing.UNCHANGED);
255+
if (langSpec != null) {
256+
builder
257+
.parserConfig(langSpec.parserConfig())
258+
.sqlValidatorConfig(langSpec.validatorConfig())
259+
.operatorTable(langSpec.operatorTable());
260+
}
261+
return builder.build();
261262
}
262263

263264
private SchemaPlus findSchemaByPath(SchemaPlus rootSchema, String defaultPath) {

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
import org.apache.calcite.rel.logical.LogicalSort;
1717
import org.apache.calcite.sql.SqlKind;
1818
import org.apache.calcite.sql.SqlNode;
19+
import org.apache.calcite.sql.util.SqlVisitor;
1920
import org.apache.calcite.tools.Frameworks;
2021
import org.apache.calcite.tools.Planner;
21-
import org.opensearch.sql.api.parser.NamedArgRewriter;
2222
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2323
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2424
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
@@ -87,7 +87,12 @@ public RelNode plan(String query) throws Exception {
8787
"Only query statements are supported. Got: " + parsed.getKind());
8888
}
8989

90-
SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE);
90+
// TODO: move post-parse rewriting into CalciteSqlQueryParser
91+
SqlNode rewritten = parsed;
92+
for (SqlVisitor<SqlNode> visitor : context.getLangSpec().postParseRules()) {
93+
rewritten = rewritten.accept(visitor);
94+
}
95+
9196
SqlNode validated = planner.validate(rewritten);
9297
RelRoot relRoot = planner.rel(validated);
9398
return relRoot.project();
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import org.apache.calcite.sql.SqlNode;
11+
import org.apache.calcite.sql.SqlOperatorTable;
12+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
13+
import org.apache.calcite.sql.parser.SqlParser;
14+
import org.apache.calcite.sql.util.SqlOperatorTables;
15+
import org.apache.calcite.sql.util.SqlVisitor;
16+
import org.apache.calcite.sql.validate.SqlValidator;
17+
18+
/**
19+
* Language specification defining the dialect the engine accepts. Provides parser configuration,
20+
* validator configuration, and composable {@link LanguageExtension}s that contribute operators and
21+
* post-parse rewrite rules.
22+
*
23+
* <p>Implementations define a complete language surface — for example, {@link UnifiedSqlSpec}
24+
* provides ANSI and extended SQL modes. A future PPL spec would implement this same interface once
25+
* PPL converges on the Calcite pipeline.
26+
*/
27+
public interface LanguageSpec {
28+
29+
/**
30+
* A composable language extension that contributes operators and post-parse rewrite rules. All
31+
* methods have defaults so extensions only override what they need.
32+
*/
33+
interface LanguageExtension {
34+
35+
/**
36+
* Operators (functions, aggregates) this extension adds. Chained with the standard operator
37+
* table during validation.
38+
*/
39+
default SqlOperatorTable operators() {
40+
return SqlOperatorTables.of();
41+
}
42+
43+
/**
44+
* AST rewrite rules applied after parsing and before validation. Each visitor transforms the
45+
* parse tree (e.g., rewriting named arguments into MAP literals).
46+
*/
47+
default List<SqlVisitor<SqlNode>> postParseRules() {
48+
return List.of();
49+
}
50+
}
51+
52+
/**
53+
* Parser configuration controlling how SQL text is tokenized and parsed into a parse tree,
54+
* including parser factory, lexical rules, and conformance.
55+
*/
56+
SqlParser.Config parserConfig();
57+
58+
/**
59+
* Validator configuration controlling what SQL semantics the validator accepts, such as GROUP BY
60+
* behavior, LIMIT syntax, and type coercion.
61+
*/
62+
SqlValidator.Config validatorConfig();
63+
64+
/**
65+
* Language extensions registered with this spec. Each extension contributes operators and
66+
* post-parse rewrite rules that are composed by {@link #operatorTable()} and {@link
67+
* #postParseRules()}.
68+
*/
69+
List<LanguageExtension> extensions();
70+
71+
/**
72+
* Chained operator table combining the standard Calcite operators with all operators contributed
73+
* by registered extensions.
74+
*/
75+
default SqlOperatorTable operatorTable() {
76+
List<SqlOperatorTable> tables = new ArrayList<>();
77+
tables.add(SqlStdOperatorTable.instance());
78+
extensions().forEach(ext -> tables.add(ext.operators()));
79+
return SqlOperatorTables.chain(tables);
80+
}
81+
82+
/**
83+
* All post-parse rewrite rules from registered extensions, flattened in registration order.
84+
* Applied to the parse tree after parsing and before validation.
85+
*/
86+
default List<SqlVisitor<SqlNode>> postParseRules() {
87+
return extensions().stream().flatMap(ext -> ext.postParseRules().stream()).toList();
88+
}
89+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import java.util.List;
9+
import lombok.AccessLevel;
10+
import lombok.Getter;
11+
import lombok.RequiredArgsConstructor;
12+
import lombok.experimental.Accessors;
13+
import org.apache.calcite.config.Lex;
14+
import org.apache.calcite.sql.SqlOperatorTable;
15+
import org.apache.calcite.sql.parser.SqlParser;
16+
import org.apache.calcite.sql.parser.SqlParserImplFactory;
17+
import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
18+
import org.apache.calcite.sql.util.SqlVisitor;
19+
import org.apache.calcite.sql.validate.SqlConformanceEnum;
20+
import org.apache.calcite.sql.validate.SqlValidator;
21+
import org.opensearch.sql.api.parser.NamedArgRewriter;
22+
23+
/**
24+
* SQL language specification with ANSI and extended modes.
25+
*
26+
* <p>Use {@link #extended()} for OpenSearch SQL (lenient syntax, hyphenated identifiers, search
27+
* functions) or {@link #ansi()} for strict ANSI SQL mode.
28+
*/
29+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
30+
@Accessors(fluent = true)
31+
public class UnifiedSqlSpec implements LanguageSpec {
32+
33+
/** Lexical rules: identifier quoting, character escaping, and special identifier support. */
34+
private final Lex lex;
35+
36+
/** Parser implementation: controls keyword reservation and grammar extensions. */
37+
private final SqlParserImplFactory parserFactory;
38+
39+
/** Validation rules: what SQL semantics the validator accepts (GROUP BY, LIMIT, coercion). */
40+
private final SqlConformanceEnum conformance;
41+
42+
/** Composable extensions contributing operators and post-parse rewrite rules. */
43+
@Getter private final List<LanguageExtension> extensions;
44+
45+
/**
46+
* Extended SQL spec: Babel parser, BIG_QUERY lex (hyphenated identifiers, backtick quoting),
47+
* BABEL conformance (lenient GROUP BY, LIMIT, optional FROM), and search functions.
48+
*/
49+
public static UnifiedSqlSpec extended() {
50+
return new UnifiedSqlSpec(
51+
Lex.BIG_QUERY,
52+
SqlBabelParserImpl.FACTORY,
53+
SqlConformanceEnum.BABEL,
54+
List.of(new SearchExtension()));
55+
}
56+
57+
/**
58+
* ANSI SQL spec: Babel parser (for keyword de-reservation), strict lex (double-quote quoting, no
59+
* hyphenated identifiers), strict conformance, and search functions.
60+
*/
61+
public static UnifiedSqlSpec ansi() {
62+
return new UnifiedSqlSpec(
63+
Lex.JAVA,
64+
SqlBabelParserImpl.FACTORY,
65+
SqlConformanceEnum.DEFAULT,
66+
List.of(new SearchExtension()));
67+
}
68+
69+
@Override
70+
public SqlParser.Config parserConfig() {
71+
return SqlParser.config()
72+
.withParserFactory(parserFactory)
73+
.withLex(lex)
74+
.withConformance(conformance);
75+
}
76+
77+
@Override
78+
public SqlValidator.Config validatorConfig() {
79+
return SqlValidator.Config.DEFAULT.withConformance(conformance);
80+
}
81+
82+
/** Search Extension: relevance functions and named argument rewriting. */
83+
private static class SearchExtension implements LanguageExtension {
84+
85+
@Override
86+
public SqlOperatorTable operators() {
87+
return UnifiedFunctionSpec.RELEVANCE.operatorTable();
88+
}
89+
90+
@Override
91+
public List<SqlVisitor<org.apache.calcite.sql.SqlNode>> postParseRules() {
92+
return List.of(NamedArgRewriter.INSTANCE);
93+
}
94+
}
95+
}

api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlTest.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -245,17 +245,21 @@ public void testNonQueryStatementsBlockedByWhitelist() {
245245

246246
@Test
247247
public void testNonQueryStatementsBlockedByParser() {
248-
List.of(
248+
// Babel parser rejects CREATE MATERIALIZED VIEW
249+
givenInvalidQuery(
249250
"""
250251
CREATE MATERIALIZED VIEW mv AS
251252
SELECT department, count(*)
252253
FROM catalog.employees
253254
GROUP BY department\
254-
""",
255+
""")
256+
.assertErrorMessage("Encountered");
257+
258+
// Babel parser accepts SHOW TABLES but it's blocked by query-type whitelist
259+
givenInvalidQuery(
255260
"""
256261
SHOW TABLES\
257262
""")
258-
.forEach(
259-
sql -> givenInvalidQuery(sql).assertErrorMessage("Incorrect syntax near the keyword"));
263+
.assertErrorMessage("Only query statements are supported");
260264
}
261265
}

api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public void testMatch() {
2525
givenQuery(
2626
"""
2727
SELECT * FROM catalog.employees
28-
WHERE "match"(name, 'John')\
28+
WHERE match(name, 'John')\
2929
""")
3030
.assertPlan(
3131
"""
@@ -100,7 +100,7 @@ public void testMatchWithOptions() {
100100
givenQuery(
101101
"""
102102
SELECT * FROM catalog.employees
103-
WHERE "match"(name, 'John', operator='AND', boost=2.0)\
103+
WHERE match(name, 'John', operator='AND', boost=2.0)\
104104
""")
105105
.assertPlanContains(
106106
"match(MAP('field', $1), MAP('query', 'John'),"
@@ -112,7 +112,7 @@ public void testMatchMissingArguments() {
112112
givenInvalidQuery(
113113
"""
114114
SELECT * FROM catalog.employees
115-
WHERE "match"('John')\
115+
WHERE match('John')\
116116
""")
117117
.assertErrorMessage(
118118
"No match found for function signature match(<(CHAR(5), CHAR(4)) MAP>)");

0 commit comments

Comments
 (0)