Skip to content

Commit 2bae9eb

Browse files
committed
max_match - initial implementation
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent 752126e commit 2bae9eb

7 files changed

Lines changed: 126 additions & 11 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -187,26 +187,36 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
187187
"Rex pattern must contain at least one named capture group");
188188
}
189189

190-
// Filter matching rows on data nodes using script pushdown
191190
RexNode regexMatchCondition =
192191
context.rexBuilder.makeCall(
193192
SqlLibraryOperators.REGEXP_CONTAINS,
194193
fieldRex,
195194
context.rexBuilder.makeLiteral(patternStr));
196195
context.relBuilder.filter(regexMatchCondition);
197196

198-
// Extract fields from filtered data
199197
List<RexNode> newFields = new ArrayList<>();
200198
List<String> newFieldNames = new ArrayList<>();
201199

202200
for (int i = 0; i < namedGroups.size(); i++) {
203-
RexNode extractCall =
204-
PPLFuncImpTable.INSTANCE.resolve(
205-
context.rexBuilder,
206-
BuiltinFunctionName.REX_EXTRACT,
207-
fieldRex,
208-
context.rexBuilder.makeLiteral(patternStr),
209-
context.relBuilder.literal(i + 1));
201+
RexNode extractCall;
202+
if (node.getMaxMatch().isPresent() && node.getMaxMatch().get() != 1) {
203+
extractCall =
204+
PPLFuncImpTable.INSTANCE.resolve(
205+
context.rexBuilder,
206+
BuiltinFunctionName.REX_EXTRACT_MULTI,
207+
fieldRex,
208+
context.rexBuilder.makeLiteral(patternStr),
209+
context.relBuilder.literal(i + 1),
210+
context.relBuilder.literal(node.getMaxMatch().get()));
211+
} else {
212+
extractCall =
213+
PPLFuncImpTable.INSTANCE.resolve(
214+
context.rexBuilder,
215+
BuiltinFunctionName.REX_EXTRACT,
216+
fieldRex,
217+
context.rexBuilder.makeLiteral(patternStr),
218+
context.relBuilder.literal(i + 1));
219+
}
210220
newFields.add(extractCall);
211221
newFieldNames.add(namedGroups.get(i));
212222
}

core/src/main/java/org/opensearch/sql/calcite/utils/PPLOperandTypes.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ private PPLOperandTypes() {}
5151
UDFOperandMetadata.wrap(
5252
OperandTypes.family(
5353
SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER, SqlTypeFamily.INTEGER));
54+
public static final UDFOperandMetadata STRING_STRING_INTEGER_INTEGER =
55+
UDFOperandMetadata.wrap(
56+
OperandTypes.family(
57+
SqlTypeFamily.CHARACTER,
58+
SqlTypeFamily.CHARACTER,
59+
SqlTypeFamily.INTEGER,
60+
SqlTypeFamily.INTEGER));
5461

5562
public static final UDFOperandMetadata NUMERIC_NUMERIC_NUMERIC =
5663
UDFOperandMetadata.wrap(

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ public enum BuiltinFunctionName {
216216
POSITION(FunctionName.of("position")),
217217
REGEXP(FunctionName.of("regexp")),
218218
REX_EXTRACT(FunctionName.of("REX_EXTRACT")),
219+
REX_EXTRACT_MULTI(FunctionName.of("REX_EXTRACT_MULTI")),
219220
REX_OFFSET(FunctionName.of("REX_OFFSET")),
220221
REPLACE(FunctionName.of("replace")),
221222
REVERSE(FunctionName.of("reverse")),

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.opensearch.sql.expression.function.udf.GrokFunction;
5050
import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction;
5151
import org.opensearch.sql.expression.function.udf.RexExtractFunction;
52+
import org.opensearch.sql.expression.function.udf.RexExtractMultiFunction;
5253
import org.opensearch.sql.expression.function.udf.RexOffsetFunction;
5354
import org.opensearch.sql.expression.function.udf.SpanFunction;
5455
import org.opensearch.sql.expression.function.udf.condition.EarliestFunction;
@@ -383,6 +384,8 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
383384
public static final SqlOperator NUMBER_TO_STRING =
384385
new NumberToStringFunction().toUDF("NUMBER_TO_STRING");
385386
public static final SqlOperator REX_EXTRACT = new RexExtractFunction().toUDF("REX_EXTRACT");
387+
public static final SqlOperator REX_EXTRACT_MULTI =
388+
new RexExtractMultiFunction().toUDF("REX_EXTRACT_MULTI");
386389
public static final SqlOperator REX_OFFSET = new RexOffsetFunction().toUDF("REX_OFFSET");
387390

388391
/**

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@
163163
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE;
164164
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
165165
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT;
166+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT_MULTI;
166167
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_OFFSET;
167168
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
168169
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RINT;
@@ -786,6 +787,7 @@ void populate() {
786787
registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING);
787788
registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH);
788789
registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT);
790+
registerOperator(REX_EXTRACT_MULTI, PPLBuiltinOperators.REX_EXTRACT_MULTI);
789791
registerOperator(REX_OFFSET, PPLBuiltinOperators.REX_OFFSET);
790792

791793
// Register PPL Datetime UDF operator

core/src/main/java/org/opensearch/sql/expression/function/udf/RexExtractFunction.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package org.opensearch.sql.expression.function.udf;
77

88
import java.util.List;
9+
import java.util.regex.Matcher;
10+
import java.util.regex.Pattern;
911
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
1012
import org.apache.calcite.adapter.enumerable.NullPolicy;
1113
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
@@ -54,8 +56,8 @@ public static String extractGroup(String text, String pattern, int groupIndex) {
5456
}
5557

5658
try {
57-
java.util.regex.Pattern compiledPattern = java.util.regex.Pattern.compile(pattern);
58-
java.util.regex.Matcher matcher = compiledPattern.matcher(text);
59+
Pattern compiledPattern = Pattern.compile(pattern);
60+
Matcher matcher = compiledPattern.matcher(text);
5961

6062
if (matcher.find() && groupIndex > 0 && groupIndex <= matcher.groupCount()) {
6163
return matcher.group(groupIndex);
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.udf;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import java.util.regex.Matcher;
11+
import java.util.regex.Pattern;
12+
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
13+
import org.apache.calcite.adapter.enumerable.NullPolicy;
14+
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
15+
import org.apache.calcite.linq4j.tree.Expression;
16+
import org.apache.calcite.linq4j.tree.Expressions;
17+
import org.apache.calcite.rex.RexCall;
18+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
19+
import org.apache.calcite.sql.type.SqlTypeName;
20+
import org.opensearch.sql.calcite.utils.PPLOperandTypes;
21+
import org.opensearch.sql.expression.function.ImplementorUDF;
22+
import org.opensearch.sql.expression.function.UDFOperandMetadata;
23+
24+
/** Custom REX_EXTRACT_MULTI function for extracting multiple regex matches. */
25+
public final class RexExtractMultiFunction extends ImplementorUDF {
26+
27+
public RexExtractMultiFunction() {
28+
super(new RexExtractMultiImplementor(), NullPolicy.ARG0);
29+
}
30+
31+
@Override
32+
public SqlReturnTypeInference getReturnTypeInference() {
33+
return call ->
34+
call.getTypeFactory()
35+
.createArrayType(call.getTypeFactory().createSqlType(SqlTypeName.VARCHAR, 2000), -1);
36+
}
37+
38+
@Override
39+
public UDFOperandMetadata getOperandMetadata() {
40+
return PPLOperandTypes.STRING_STRING_INTEGER_INTEGER;
41+
}
42+
43+
private static class RexExtractMultiImplementor implements NotNullImplementor {
44+
45+
@Override
46+
public Expression implement(
47+
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
48+
Expression field = translatedOperands.get(0);
49+
Expression pattern = translatedOperands.get(1);
50+
Expression groupIndex = translatedOperands.get(2);
51+
Expression maxMatch = translatedOperands.get(3);
52+
53+
return Expressions.call(
54+
RexExtractMultiFunction.class,
55+
"extractMultipleGroups",
56+
field,
57+
pattern,
58+
groupIndex,
59+
maxMatch);
60+
}
61+
}
62+
63+
public static List<String> extractMultipleGroups(
64+
String text, String pattern, int groupIndex, int maxMatch) {
65+
if (text == null || pattern == null) {
66+
return null;
67+
}
68+
69+
try {
70+
Pattern compiledPattern = Pattern.compile(pattern);
71+
Matcher matcher = compiledPattern.matcher(text);
72+
List<String> matches = new ArrayList<>();
73+
74+
int matchCount = 0;
75+
while (matcher.find() && (maxMatch == 0 || matchCount < maxMatch)) {
76+
if (groupIndex > 0 && groupIndex <= matcher.groupCount()) {
77+
String match = matcher.group(groupIndex);
78+
if (match != null) {
79+
matches.add(match);
80+
matchCount++;
81+
}
82+
}
83+
}
84+
85+
return matches.isEmpty() ? null : matches;
86+
} catch (Exception e) {
87+
return null;
88+
}
89+
}
90+
}

0 commit comments

Comments
 (0)