Skip to content

Commit a23b553

Browse files
Support mvfind eval function (#4839) (#5095)
* Support eval function # Conflicts: # core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java # core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java # ppl/src/main/antlr/OpenSearchPPLLexer.g4 # ppl/src/main/antlr/OpenSearchPPLParser.g4 # ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLArrayFunctionTest.java # Conflicts: # core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java # docs/user/ppl/functions/collection.rst # Conflicts: # core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java # core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java # core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteArrayFunctionIT.java # ppl/src/main/antlr/OpenSearchPPLLexer.g4 * update tests * fixes * fixes * fix spotless * update md doc * Address coderabbit * Addressing CodeRabbit * Update comment * Restrict mvfind regex parameter to STRING type only * Fix compile * Addressing CodeRabbit --------- (cherry picked from commit 56569f3) Signed-off-by: Kai Huang <ahkcs@amazon.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
1 parent 572f613 commit a23b553

11 files changed

Lines changed: 686 additions & 1 deletion

File tree

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ public enum BuiltinFunctionName {
7979
INTERNAL_APPEND(FunctionName.of("append"), true),
8080
MVJOIN(FunctionName.of("mvjoin")),
8181
MVINDEX(FunctionName.of("mvindex")),
82+
MVFIND(FunctionName.of("mvfind")),
8283
MVZIP(FunctionName.of("mvzip")),
8384
SPLIT(FunctionName.of("split")),
8485
MVDEDUP(FunctionName.of("mvdedup")),
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.CollectionUDF;
7+
8+
import java.util.List;
9+
import java.util.regex.Pattern;
10+
import java.util.regex.PatternSyntaxException;
11+
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
12+
import org.apache.calcite.adapter.enumerable.NullPolicy;
13+
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
14+
import org.apache.calcite.linq4j.tree.Expression;
15+
import org.apache.calcite.linq4j.tree.Expressions;
16+
import org.apache.calcite.linq4j.tree.Types;
17+
import org.apache.calcite.rex.RexCall;
18+
import org.apache.calcite.rex.RexLiteral;
19+
import org.apache.calcite.sql.type.OperandTypes;
20+
import org.apache.calcite.sql.type.ReturnTypes;
21+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
22+
import org.apache.calcite.sql.type.SqlTypeFamily;
23+
import org.opensearch.sql.expression.function.ImplementorUDF;
24+
import org.opensearch.sql.expression.function.UDFOperandMetadata;
25+
26+
/**
27+
* MVFIND function implementation that finds the index of the first element in a multivalue array
28+
* that matches a regular expression.
29+
*
30+
* <p>Usage: mvfind(array, regex)
31+
*
32+
* <p>Returns the 0-based index of the first array element matching the regex pattern, or NULL if no
33+
* match is found.
34+
*
35+
* <p>Example: mvfind(array('apple', 'banana', 'apricot'), 'ban.*') returns 1
36+
*/
37+
public class MVFindFunctionImpl extends ImplementorUDF {
38+
public MVFindFunctionImpl() {
39+
super(new MVFindImplementor(), NullPolicy.ANY);
40+
}
41+
42+
@Override
43+
public SqlReturnTypeInference getReturnTypeInference() {
44+
return ReturnTypes.INTEGER_NULLABLE;
45+
}
46+
47+
@Override
48+
public UDFOperandMetadata getOperandMetadata() {
49+
// Accept ARRAY and STRING for the regex pattern
50+
return UDFOperandMetadata.wrap(
51+
OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER));
52+
}
53+
54+
public static class MVFindImplementor implements NotNullImplementor {
55+
@Override
56+
public Expression implement(
57+
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
58+
Expression arrayExpr = translatedOperands.get(0);
59+
Expression patternExpr = translatedOperands.get(1);
60+
61+
// Check if regex pattern is a literal - compile at planning time
62+
if (call.operands.size() >= 2 && call.operands.get(1) instanceof RexLiteral) {
63+
RexLiteral patternLiteral = (RexLiteral) call.operands.get(1);
64+
Expression literalPatternExpr = tryCompileLiteralPattern(patternLiteral, arrayExpr);
65+
if (literalPatternExpr != null) {
66+
return literalPatternExpr;
67+
}
68+
}
69+
70+
// For dynamic patterns, use evalWithString
71+
return Expressions.call(
72+
Types.lookupMethod(MVFindFunctionImpl.class, "evalWithString", List.class, String.class),
73+
arrayExpr,
74+
patternExpr);
75+
}
76+
77+
private static Expression tryCompileLiteralPattern(
78+
RexLiteral patternLiteral, Expression arrayExpr) {
79+
// Use getValueAs(String.class) to correctly unwrap Calcite NlsString
80+
String patternString = patternLiteral.getValueAs(String.class);
81+
if (patternString == null) {
82+
return null;
83+
}
84+
try {
85+
// Compile pattern at planning time and validate
86+
Pattern compiledPattern = Pattern.compile(patternString);
87+
// Generate code that uses the pre-compiled pattern
88+
return Expressions.call(
89+
Types.lookupMethod(
90+
MVFindFunctionImpl.class, "evalWithPattern", List.class, Pattern.class),
91+
arrayExpr,
92+
Expressions.constant(compiledPattern, Pattern.class));
93+
} catch (PatternSyntaxException e) {
94+
// Convert to IllegalArgumentException so it's treated as a client error (400)
95+
throw new IllegalArgumentException(
96+
String.format("Invalid regex pattern '%s': %s", patternString, e.getDescription()), e);
97+
}
98+
}
99+
}
100+
101+
private static Integer mvfindCore(List<Object> array, Pattern pattern) {
102+
for (int i = 0; i < array.size(); i++) {
103+
Object element = array.get(i);
104+
if (element != null) {
105+
String strValue = element.toString();
106+
if (pattern.matcher(strValue).find()) {
107+
return i; // Return 0-based index
108+
}
109+
}
110+
}
111+
return null; // No match found
112+
}
113+
114+
/**
115+
* Evaluates mvfind with a pre-compiled Pattern (for literal patterns compiled at planning time).
116+
* Any runtime exceptions from mvfindCore will propagate unchanged.
117+
*
118+
* @param array The array to search
119+
* @param pattern The pre-compiled regex pattern
120+
* @return The 0-based index of the first matching element, or null if no match
121+
*/
122+
public static Integer evalWithPattern(List<Object> array, Pattern pattern) {
123+
if (array == null || pattern == null) {
124+
return null;
125+
}
126+
return mvfindCore(array, pattern);
127+
}
128+
129+
/**
130+
* Evaluates mvfind with a string pattern (for dynamic patterns at runtime).
131+
*
132+
* @param array The array to search
133+
* @param regex The regex pattern string
134+
* @return The 0-based index of the first matching element, or null if no match
135+
*/
136+
public static Integer evalWithString(List<Object> array, String regex) {
137+
if (array == null || regex == null) {
138+
return null;
139+
}
140+
return mvfind(array, regex);
141+
}
142+
143+
/**
144+
* Evaluates mvfind with a String pattern. Compiles the regex pattern and executes search. Throws
145+
* IllegalArgumentException for invalid regex patterns; other runtime exceptions propagate
146+
* unchanged.
147+
*
148+
* @param array The array to search
149+
* @param regex The regex pattern string
150+
* @return The 0-based index of the first matching element, or null if no match
151+
* @throws IllegalArgumentException if the regex pattern is invalid
152+
*/
153+
private static Integer mvfind(List<Object> array, String regex) {
154+
if (array == null || regex == null) {
155+
return null;
156+
}
157+
158+
Pattern pattern;
159+
try {
160+
pattern = Pattern.compile(regex);
161+
} catch (PatternSyntaxException e) {
162+
// Invalid regex is a client error (400)
163+
throw new IllegalArgumentException(
164+
String.format("Invalid regex pattern '%s': %s", regex, e.getDescription()), e);
165+
}
166+
return mvfindCore(array, pattern);
167+
}
168+
}

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
import org.opensearch.sql.expression.function.CollectionUDF.FilterFunctionImpl;
4949
import org.opensearch.sql.expression.function.CollectionUDF.ForallFunctionImpl;
5050
import org.opensearch.sql.expression.function.CollectionUDF.MVAppendFunctionImpl;
51+
import org.opensearch.sql.expression.function.CollectionUDF.MVFindFunctionImpl;
5152
import org.opensearch.sql.expression.function.CollectionUDF.MVZipFunctionImpl;
5253
import org.opensearch.sql.expression.function.CollectionUDF.MapAppendFunctionImpl;
5354
import org.opensearch.sql.expression.function.CollectionUDF.MapRemoveFunctionImpl;
@@ -396,6 +397,7 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
396397
public static final SqlOperator MVAPPEND = new MVAppendFunctionImpl().toUDF("mvappend");
397398
public static final SqlOperator INTERNAL_APPEND = new AppendFunctionImpl().toUDF("append");
398399
public static final SqlOperator MVZIP = new MVZipFunctionImpl().toUDF("mvzip");
400+
public static final SqlOperator MVFIND = new MVFindFunctionImpl().toUDF("mvfind");
399401
public static final SqlOperator FILTER = new FilterFunctionImpl().toUDF("filter");
400402
public static final SqlOperator TRANSFORM = new TransformFunctionImpl().toUDF("transform");
401403
public static final SqlOperator REDUCE = new ReduceFunctionImpl().toUDF("reduce");

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@
154154
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MULTI_MATCH;
155155
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVAPPEND;
156156
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVDEDUP;
157+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVFIND;
157158
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVINDEX;
158159
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVJOIN;
159160
import static org.opensearch.sql.expression.function.BuiltinFunctionName.MVMAP;
@@ -1051,6 +1052,7 @@ void populate() {
10511052
registerOperator(MVAPPEND, PPLBuiltinOperators.MVAPPEND);
10521053
registerOperator(INTERNAL_APPEND, PPLBuiltinOperators.INTERNAL_APPEND);
10531054
registerOperator(MVDEDUP, SqlLibraryOperators.ARRAY_DISTINCT);
1055+
registerOperator(MVFIND, PPLBuiltinOperators.MVFIND);
10541056
registerOperator(MVZIP, PPLBuiltinOperators.MVZIP);
10551057
registerOperator(MVMAP, PPLBuiltinOperators.TRANSFORM);
10561058
registerOperator(MAP_APPEND, PPLBuiltinOperators.MAP_APPEND);

0 commit comments

Comments
 (0)