Skip to content

Commit e0c788a

Browse files
committed
implement regex cmd with calcite support by suing java library
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent ad3fc1f commit e0c788a

14 files changed

Lines changed: 403 additions & 0 deletions

File tree

core/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ plugins {
3434

3535
repositories {
3636
mavenCentral()
37+
maven { url 'https://jitpack.io' }
3738
}
3839

3940
pitest {

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.opensearch.sql.ast.tree.Patterns;
7878
import org.opensearch.sql.ast.tree.Project;
7979
import org.opensearch.sql.ast.tree.RareTopN;
80+
import org.opensearch.sql.ast.tree.Regex;
8081
import org.opensearch.sql.ast.tree.Relation;
8182
import org.opensearch.sql.ast.tree.RelationSubquery;
8283
import org.opensearch.sql.ast.tree.Rename;
@@ -689,6 +690,28 @@ public LogicalPlan visitReverse(Reverse node, AnalysisContext context) {
689690
"REVERSE is supported only when " + CALCITE_ENGINE_ENABLED.getKeyValue() + "=true");
690691
}
691692

693+
@Override
694+
public LogicalPlan visitRegex(Regex node, AnalysisContext context) {
695+
// Get the child plan (source of data)
696+
LogicalPlan child = node.getChild().get(0).accept(this, context);
697+
698+
// Analyze the field and pattern expressions
699+
Expression fieldExpr = expressionAnalyzer.analyze(node.getField(), context);
700+
Expression patternExpr = expressionAnalyzer.analyze(node.getPattern(), context);
701+
702+
// Create the RegexMatch expression directly
703+
// This is our custom PCRE-based implementation
704+
Expression regexExpr =
705+
new org.opensearch.sql.expression.operator.predicate.RegexMatch(
706+
fieldExpr, patternExpr, node.isNegated());
707+
708+
// Return a LogicalFilter with the regex condition
709+
// No need for optimization since RegexMatch is already a concrete expression
710+
LogicalFilter result = new LogicalFilter(child, regexExpr);
711+
712+
return result;
713+
}
714+
692715
@Override
693716
public LogicalPlan visitPaginate(Paginate paginate, AnalysisContext context) {
694717
LogicalPlan child = paginate.getChild().get(0).accept(this, context);

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.opensearch.sql.ast.tree.Patterns;
6767
import org.opensearch.sql.ast.tree.Project;
6868
import org.opensearch.sql.ast.tree.RareTopN;
69+
import org.opensearch.sql.ast.tree.Regex;
6970
import org.opensearch.sql.ast.tree.Relation;
7071
import org.opensearch.sql.ast.tree.RelationSubquery;
7172
import org.opensearch.sql.ast.tree.Rename;
@@ -249,6 +250,10 @@ public T visitReverse(Reverse node, C context) {
249250
return visitChildren(node, context);
250251
}
251252

253+
public T visitRegex(Regex node, C context) {
254+
return visitChildren(node, context);
255+
}
256+
252257
public T visitLambdaFunction(LambdaFunction node, C context) {
253258
return visitChildren(node, context);
254259
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import com.google.common.collect.ImmutableList;
9+
import java.util.List;
10+
import lombok.EqualsAndHashCode;
11+
import lombok.Getter;
12+
import lombok.Setter;
13+
import lombok.ToString;
14+
import org.opensearch.sql.ast.AbstractNodeVisitor;
15+
import org.opensearch.sql.ast.expression.Literal;
16+
import org.opensearch.sql.ast.expression.UnresolvedExpression;
17+
18+
/** AST node represent Regex filtering operation. */
19+
@Getter
20+
@ToString
21+
@EqualsAndHashCode(callSuper = false)
22+
public class Regex extends UnresolvedPlan {
23+
/** Field to match against. */
24+
private final UnresolvedExpression field;
25+
26+
/** Whether this is a negated match (!=). */
27+
private final boolean negated;
28+
29+
/** Pattern. */
30+
private final Literal pattern;
31+
32+
/** Child Plan. */
33+
@Setter private UnresolvedPlan child;
34+
35+
public Regex(UnresolvedExpression field, String operator, Literal pattern) {
36+
// Require explicit field - no default to _source for PoC
37+
this.field = field;
38+
this.negated = "!=".equals(operator);
39+
this.pattern = pattern;
40+
}
41+
42+
@Override
43+
public Regex attach(UnresolvedPlan child) {
44+
this.child = child;
45+
return this;
46+
}
47+
48+
@Override
49+
public List<UnresolvedPlan> getChild() {
50+
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
51+
}
52+
53+
@Override
54+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
55+
return nodeVisitor.visitRegex(this, context);
56+
}
57+
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
import org.opensearch.sql.ast.tree.Patterns;
9898
import org.opensearch.sql.ast.tree.Project;
9999
import org.opensearch.sql.ast.tree.RareTopN;
100+
import org.opensearch.sql.ast.tree.Regex;
100101
import org.opensearch.sql.ast.tree.Relation;
101102
import org.opensearch.sql.ast.tree.Rename;
102103
import org.opensearch.sql.ast.tree.Sort;
@@ -169,6 +170,38 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) {
169170
return context.relBuilder.peek();
170171
}
171172

173+
@Override
174+
public RelNode visitRegex(Regex node, CalcitePlanContext context) {
175+
visitChildren(node, context);
176+
177+
// Create our PCRE2 RegexMatch expression directly, just like the legacy engine
178+
// This ensures both engines use identical PCRE2 implementation
179+
180+
// Analyze the field and pattern expressions in the current context
181+
RexNode fieldRex = rexVisitor.analyze(node.getField(), context);
182+
RexNode patternRex = rexVisitor.analyze(node.getPattern(), context);
183+
184+
// Create a custom RexNode that represents our RegexMatch expression
185+
// This will be handled by the script engine with PCRE2 support
186+
RexNode regexCondition = createRegexMatchRexNode(fieldRex, patternRex, context);
187+
188+
// If negated, wrap with NOT
189+
if (node.isNegated()) {
190+
regexCondition = context.rexBuilder.makeCall(SqlStdOperatorTable.NOT, regexCondition);
191+
}
192+
193+
context.relBuilder.filter(regexCondition);
194+
return context.relBuilder.peek();
195+
}
196+
197+
private RexNode createRegexMatchRexNode(
198+
RexNode field, RexNode pattern, CalcitePlanContext context) {
199+
// Use the UDF version that has proper enumerable implementation support
200+
// This ensures PCRE2 usage for both pushdown and in-memory execution
201+
return context.rexBuilder.makeCall(
202+
org.opensearch.sql.expression.function.PPLBuiltinOperators.REGEX_MATCH, field, pattern);
203+
}
204+
172205
private boolean containsSubqueryExpression(Node expr) {
173206
if (expr == null) {
174207
return false;

core/src/main/java/org/opensearch/sql/expression/ExpressionNodeVisitor.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,14 @@ public T visitWhen(WhenClause node, C context) {
101101
public T visitNamedArgument(NamedArgumentExpression node, C context) {
102102
return visitNode(node, context);
103103
}
104+
105+
public T visitRegex(org.opensearch.sql.expression.operator.predicate.RegexMatch node, C context) {
106+
// Visit field and pattern expressions to ensure field extraction works properly
107+
T result = defaultResult();
108+
T fieldResult = node.getField().accept(this, context);
109+
result = aggregateResult(result, fieldResult);
110+
T patternResult = node.getPattern().accept(this, context);
111+
result = aggregateResult(result, patternResult);
112+
return result;
113+
}
104114
}

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ public enum BuiltinFunctionName {
215215
LTRIM(FunctionName.of("ltrim")),
216216
POSITION(FunctionName.of("position")),
217217
REGEXP(FunctionName.of("regexp")),
218+
REGEX_MATCH(FunctionName.of("REGEX_MATCH")),
218219
REPLACE(FunctionName.of("replace")),
219220
REVERSE(FunctionName.of("reverse")),
220221
RIGHT(FunctionName.of("right")),

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,10 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
380380
public static final SqlOperator NUMBER_TO_STRING =
381381
new NumberToStringFunction().toUDF("NUMBER_TO_STRING");
382382

383+
// Custom PCRE2 regex operator for Calcite engine
384+
public static final SqlOperator REGEX_MATCH =
385+
new org.opensearch.sql.expression.function.udf.RegexMatchFunctionImpl().toUDF("REGEX_MATCH");
386+
383387
/**
384388
* Returns the PPL specific operator table, creating it if necessary.
385389
*

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@
161161
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RAND;
162162
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REDUCE;
163163
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEXP;
164+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REGEX_MATCH;
164165
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE;
165166
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
166167
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
@@ -844,6 +845,7 @@ void populate() {
844845
registerOperator(SIMPLE_QUERY_STRING, PPLBuiltinOperators.SIMPLE_QUERY_STRING);
845846
registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING);
846847
registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH);
848+
registerOperator(REGEX_MATCH, PPLBuiltinOperators.REGEX_MATCH);
847849

848850
// Register PPL Datetime UDF operator
849851
registerOperator(TIMESTAMP, PPLBuiltinOperators.TIMESTAMP);
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.udf;
7+
8+
import java.util.List;
9+
import java.util.regex.Pattern;
10+
import java.util.regex.PatternSyntaxException;
11+
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
12+
import org.apache.calcite.adapter.enumerable.NullPolicy;
13+
import org.apache.calcite.adapter.enumerable.RexImpTable;
14+
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
15+
import org.apache.calcite.linq4j.tree.Expression;
16+
import org.apache.calcite.linq4j.tree.Types;
17+
import org.apache.calcite.rex.RexCall;
18+
import org.apache.calcite.schema.impl.ScalarFunctionImpl;
19+
import org.apache.calcite.sql.type.ReturnTypes;
20+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
21+
import org.opensearch.sql.expression.function.ImplementorUDF;
22+
import org.opensearch.sql.expression.function.UDFOperandMetadata;
23+
24+
/**
25+
* REGEX_MATCH UDF implementation for Calcite engine.
26+
* This function provides Java regex matching via script query pushdown.
27+
*/
28+
public class RegexMatchFunctionImpl extends ImplementorUDF {
29+
30+
public RegexMatchFunctionImpl() {
31+
super(new RegexMatchImplementor(), NullPolicy.ANY);
32+
}
33+
34+
@Override
35+
public SqlReturnTypeInference getReturnTypeInference() {
36+
return ReturnTypes.BOOLEAN_FORCE_NULLABLE;
37+
}
38+
39+
@Override
40+
public UDFOperandMetadata getOperandMetadata() {
41+
return null;
42+
}
43+
44+
public static class RegexMatchImplementor implements NotNullImplementor {
45+
@Override
46+
public Expression implement(
47+
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
48+
ScalarFunctionImpl function =
49+
(ScalarFunctionImpl)
50+
ScalarFunctionImpl.create(
51+
Types.lookupMethod(
52+
RegexMatchFunctionImpl.class, "eval", String.class, String.class));
53+
return function.getImplementor().implement(translator, call, RexImpTable.NullAs.NULL);
54+
}
55+
}
56+
57+
/**
58+
* Evaluation method for REGEX_MATCH function. This method is called by Calcite's generated code
59+
* during execution.
60+
*
61+
* @param field The field value to match against
62+
* @param pattern The Java regex pattern
63+
* @return Boolean result of regex match
64+
*/
65+
public static Boolean eval(String field, String pattern) {
66+
if (field == null || pattern == null) {
67+
return null;
68+
}
69+
70+
// Use Java regex library for evaluation (same logic as RegexMatch)
71+
try {
72+
Pattern compiledPattern = Pattern.compile(pattern);
73+
java.util.regex.Matcher matcher = compiledPattern.matcher(field);
74+
return matcher.find(); // Use find() for partial match like SPL
75+
} catch (PatternSyntaxException e) {
76+
throw new IllegalArgumentException("Invalid regex pattern: " + e.getMessage());
77+
}
78+
}
79+
}

0 commit comments

Comments
 (0)