Skip to content

Commit 5018ae0

Browse files
committed
fix calcite push down issue - make sure use PCRE when pushdown
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent 2530871 commit 5018ae0

7 files changed

Lines changed: 162 additions & 10 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,25 +173,35 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) {
173173
@Override
174174
public RelNode visitRegex(Regex node, CalcitePlanContext context) {
175175
visitChildren(node, context);
176-
// For Calcite engine, use REGEXP function but the RegexMatch expression will be executed
177-
// with PCRE2 support in the script engine during pushdown
178-
List<UnresolvedExpression> args = new ArrayList<>();
179-
args.add(node.getField());
180-
args.add(node.getPattern());
181176

182-
// Use the standard REGEXP function - the PCRE2 execution happens in the script engine
183-
Function regexFunction = new Function("regexp", args);
184-
RexNode condition = rexVisitor.analyze(regexFunction, context);
177+
// Create our PCRE2 RegexMatch expression directly, just like the legacy engine
178+
// This ensures both engines use identical PCRE2 implementation
179+
180+
// Analyze the field and pattern expressions in the current context
181+
RexNode fieldRex = rexVisitor.analyze(node.getField(), context);
182+
RexNode patternRex = rexVisitor.analyze(node.getPattern(), context);
183+
184+
// Create a custom RexNode that represents our RegexMatch expression
185+
// This will be handled by the script engine with PCRE2 support
186+
RexNode regexCondition = createRegexMatchRexNode(fieldRex, patternRex, context);
185187

186188
// If negated, wrap with NOT
187189
if (node.isNegated()) {
188-
condition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, condition);
190+
regexCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, regexCondition);
189191
}
190192

191-
context.relBuilder.filter(condition);
193+
context.relBuilder.filter(regexCondition);
192194
return context.relBuilder.peek();
193195
}
194196

197+
private RexNode createRegexMatchRexNode(
198+
RexNode field, RexNode pattern, CalcitePlanContext context) {
199+
// Create a function call that will be specifically handled by our script engine
200+
// This bypasses the standard regex routing and ensures PCRE2 usage
201+
return context.rexBuilder.makeCall(
202+
org.opensearch.sql.calcite.rex.RegexMatchOperator.INSTANCE, field, pattern);
203+
}
204+
195205
private boolean containsSubqueryExpression(Node expr) {
196206
if (expr == null) {
197207
return false;
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.rex;
7+
8+
import org.apache.calcite.sql.SqlKind;
9+
import org.apache.calcite.sql.SqlOperator;
10+
import org.apache.calcite.sql.SqlSyntax;
11+
import org.apache.calcite.sql.type.OperandTypes;
12+
import org.apache.calcite.sql.type.ReturnTypes;
13+
14+
/**
15+
* Custom Calcite operator for PCRE2 regex matching. This operator represents our RegexMatch
16+
* expression and ensures PCRE2 support when pushed down to the script engine.
17+
*/
18+
public class RegexMatchOperator extends SqlOperator {
19+
20+
public static final RegexMatchOperator INSTANCE = new RegexMatchOperator();
21+
22+
private RegexMatchOperator() {
23+
super(
24+
"REGEX_MATCH",
25+
SqlKind.OTHER_FUNCTION,
26+
20,
27+
false,
28+
ReturnTypes.BOOLEAN,
29+
null,
30+
OperandTypes.STRING_STRING);
31+
}
32+
33+
@Override
34+
public SqlSyntax getSyntax() {
35+
return SqlSyntax.FUNCTION;
36+
}
37+
}

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ public enum BuiltinFunctionName {
215215
LTRIM(FunctionName.of("ltrim")),
216216
POSITION(FunctionName.of("position")),
217217
REGEXP(FunctionName.of("regexp")),
218+
REGEX_MATCH(FunctionName.of("REGEX_MATCH")),
218219
REPLACE(FunctionName.of("replace")),
219220
REVERSE(FunctionName.of("reverse")),
220221
RIGHT(FunctionName.of("right")),

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,10 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
380380
public static final SqlOperator NUMBER_TO_STRING =
381381
new NumberToStringFunction().toUDF("NUMBER_TO_STRING");
382382

383+
// Custom PCRE2 regex operator for Calcite engine
384+
public static final SqlOperator REGEX_MATCH =
385+
new org.opensearch.sql.expression.function.udf.RegexMatchFunctionImpl().toUDF("REGEX_MATCH");
386+
383387
/**
384388
* Returns the PPL specific operator table, creating it if necessary.
385389
*

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND;
162162
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE;
163163
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP;
164+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH;
164165
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE;
165166
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
166167
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
@@ -844,6 +845,7 @@ void populate() {
844845
registerOperator(SIMPLE_QUERY_STRING, PPLBuiltinOperators.SIMPLE_QUERY_STRING);
845846
registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING);
846847
registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH);
848+
registerOperator(REGEX_MATCH, PPLBuiltinOperators.REGEX_MATCH);
847849

848850
// Register PPL Datetime UDF operator
849851
registerOperator(TIMESTAMP, PPLBuiltinOperators.TIMESTAMP);
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.udf;
7+
8+
import java.util.List;
9+
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
10+
import org.apache.calcite.adapter.enumerable.NullPolicy;
11+
import org.apache.calcite.adapter.enumerable.RexImpTable;
12+
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
13+
import org.apache.calcite.linq4j.tree.Expression;
14+
import org.apache.calcite.linq4j.tree.Types;
15+
import org.apache.calcite.rex.RexCall;
16+
import org.apache.calcite.schema.impl.ScalarFunctionImpl;
17+
import org.apache.calcite.sql.type.ReturnTypes;
18+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
19+
import org.opensearch.sql.expression.function.ImplementorUDF;
20+
import org.opensearch.sql.expression.function.UDFOperandMetadata;
21+
import org.pcre4j.regex.Pattern;
22+
23+
/**
24+
* REGEX_MATCH UDF implementation for Calcite engine. This function provides PCRE2 regex matching
25+
* via script query pushdown.
26+
*/
27+
public class RegexMatchFunctionImpl extends ImplementorUDF {
28+
29+
public RegexMatchFunctionImpl() {
30+
super(new RegexMatchImplementor(), NullPolicy.ANY);
31+
}
32+
33+
@Override
34+
public SqlReturnTypeInference getReturnTypeInference() {
35+
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
36+
}
37+
38+
@Override
39+
public UDFOperandMetadata getOperandMetadata() {
40+
return null;
41+
}
42+
43+
public static class RegexMatchImplementor implements NotNullImplementor {
44+
@Override
45+
public Expression implement(
46+
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
47+
ScalarFunctionImpl function =
48+
(ScalarFunctionImpl)
49+
ScalarFunctionImpl.create(
50+
Types.lookupMethod(
51+
RegexMatchFunctionImpl.class, "eval", String.class, String.class));
52+
return function.getImplementor().implement(translator, call, RexImpTable.NullAs.NULL);
53+
}
54+
}
55+
56+
/**
57+
* Evaluation method for REGEX_MATCH function. This method is called by Calcite's generated code
58+
* during execution.
59+
*
60+
* @param field The field value to match against
61+
* @param pattern The PCRE2 regex pattern
62+
* @return Boolean result of regex match
63+
*/
64+
public static Boolean eval(String field, String pattern) {
65+
if (field == null || pattern == null) {
66+
return null;
67+
}
68+
69+
// Use PCRE4J library directly for evaluation (same logic as RegexMatch)
70+
try {
71+
Pattern compiledPattern = Pattern.compile(pattern);
72+
org.pcre4j.regex.Matcher matcher = compiledPattern.matcher(field);
73+
return matcher.find(); // Use find() for partial match like SPL
74+
} catch (Exception e) {
75+
// Invalid regex pattern or other error
76+
return false;
77+
}
78+
}
79+
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ public QueryBuilder visitFunction(FunctionExpression func, Object context) {
113113
throw new SyntaxCheckException(
114114
"Invalid syntax used for nested function in WHERE clause: "
115115
+ "nested(field | field, path) OPERATOR LITERAL");
116+
case "REGEX_MATCH":
117+
// Handle our custom PCRE2 regex operator from Calcite engine
118+
return buildScriptQueryForRegex(createRegexMatchFromFunction(func));
116119
default:
117120
{
118121
LuceneQuery query = luceneQueries.get(name);
@@ -178,4 +181,20 @@ private ScriptQueryBuilder buildScriptQueryForRegex(RegexMatch regexMatch) {
178181
ScriptEngineType.V2, serializer.serialize(regexMatch)),
179182
emptyMap()));
180183
}
184+
185+
/**
186+
* Convert a REGEX_MATCH function from Calcite to our PCRE2 RegexMatch expression. This ensures
187+
* the Calcite engine uses the same PCRE2 implementation as the legacy engine.
188+
*/
189+
private RegexMatch createRegexMatchFromFunction(FunctionExpression func) {
190+
if (func.getArguments().size() != 2) {
191+
throw new IllegalArgumentException("REGEX_MATCH function requires exactly 2 arguments");
192+
}
193+
194+
Expression fieldExpr = func.getArguments().get(0);
195+
Expression patternExpr = func.getArguments().get(1);
196+
197+
// Create RegexMatch with PCRE2 support
198+
return new RegexMatch(fieldExpr, patternExpr, false);
199+
}
181200
}

0 commit comments

Comments
 (0)