Skip to content

Commit 752126e

Browse files
committed
offset_field - initial implementation
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent 6380e03 commit 752126e

9 files changed

Lines changed: 150 additions & 9 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/tree/Rex.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,29 @@ public class Rex extends UnresolvedPlan {
3030
/** Maximum number of matches (optional). */
3131
private final Optional<Integer> maxMatch;
3232

33+
/** Offset field name for position tracking (optional). */
34+
private final Optional<String> offsetField;
35+
3336
/** Child Plan. */
3437
@Setter private UnresolvedPlan child;
3538

3639
public Rex(UnresolvedExpression field, Literal pattern) {
37-
this(field, pattern, Optional.empty());
40+
this(field, pattern, Optional.empty(), Optional.empty());
3841
}
3942

4043
public Rex(UnresolvedExpression field, Literal pattern, Optional<Integer> maxMatch) {
44+
this(field, pattern, maxMatch, Optional.empty());
45+
}
46+
47+
public Rex(
48+
UnresolvedExpression field,
49+
Literal pattern,
50+
Optional<Integer> maxMatch,
51+
Optional<String> offsetField) {
4152
this.field = field;
4253
this.pattern = pattern;
4354
this.maxMatch = maxMatch;
55+
this.offsetField = offsetField;
4456
}
4557

4658
@Override

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.apache.calcite.rex.RexLiteral;
4848
import org.apache.calcite.rex.RexNode;
4949
import org.apache.calcite.rex.RexWindowBounds;
50+
import org.apache.calcite.sql.fun.SqlLibraryOperators;
5051
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
5152
import org.apache.calcite.sql.type.SqlTypeName;
5253
import org.apache.calcite.tools.RelBuilder;
@@ -189,13 +190,15 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
189190
// Filter matching rows on data nodes using script pushdown
190191
RexNode regexMatchCondition =
191192
context.rexBuilder.makeCall(
192-
org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_CONTAINS,
193+
SqlLibraryOperators.REGEXP_CONTAINS,
193194
fieldRex,
194195
context.rexBuilder.makeLiteral(patternStr));
195196
context.relBuilder.filter(regexMatchCondition);
196197

197198
// Extract fields from filtered data
198199
List<RexNode> newFields = new ArrayList<>();
200+
List<String> newFieldNames = new ArrayList<>();
201+
199202
for (int i = 0; i < namedGroups.size(); i++) {
200203
RexNode extractCall =
201204
PPLFuncImpTable.INSTANCE.resolve(
@@ -205,9 +208,21 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
205208
context.rexBuilder.makeLiteral(patternStr),
206209
context.relBuilder.literal(i + 1));
207210
newFields.add(extractCall);
211+
newFieldNames.add(namedGroups.get(i));
212+
}
213+
214+
if (node.getOffsetField().isPresent()) {
215+
RexNode offsetCall =
216+
PPLFuncImpTable.INSTANCE.resolve(
217+
context.rexBuilder,
218+
BuiltinFunctionName.REX_OFFSET,
219+
fieldRex,
220+
context.rexBuilder.makeLiteral(patternStr));
221+
newFields.add(offsetCall);
222+
newFieldNames.add(node.getOffsetField().get());
208223
}
209224

210-
projectPlusOverriding(newFields, namedGroups, context);
225+
projectPlusOverriding(newFields, newFieldNames, context);
211226
return context.relBuilder.peek();
212227
}
213228

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ public enum BuiltinFunctionName {
216216
POSITION(FunctionName.of("position")),
217217
REGEXP(FunctionName.of("regexp")),
218218
REX_EXTRACT(FunctionName.of("REX_EXTRACT")),
219+
REX_OFFSET(FunctionName.of("REX_OFFSET")),
219220
REPLACE(FunctionName.of("replace")),
220221
REVERSE(FunctionName.of("reverse")),
221222
RIGHT(FunctionName.of("right")),

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.opensearch.sql.expression.function.udf.GrokFunction;
5050
import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction;
5151
import org.opensearch.sql.expression.function.udf.RexExtractFunction;
52+
import org.opensearch.sql.expression.function.udf.RexOffsetFunction;
5253
import org.opensearch.sql.expression.function.udf.SpanFunction;
5354
import org.opensearch.sql.expression.function.udf.condition.EarliestFunction;
5455
import org.opensearch.sql.expression.function.udf.condition.LatestFunction;
@@ -382,6 +383,7 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
382383
public static final SqlOperator NUMBER_TO_STRING =
383384
new NumberToStringFunction().toUDF("NUMBER_TO_STRING");
384385
public static final SqlOperator REX_EXTRACT = new RexExtractFunction().toUDF("REX_EXTRACT");
386+
public static final SqlOperator REX_OFFSET = new RexOffsetFunction().toUDF("REX_OFFSET");
385387

386388
/**
387389
* Returns the PPL specific operator table, creating it if necessary.

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@
163163
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REPLACE;
164164
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
165165
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT;
166+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_OFFSET;
166167
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
167168
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RINT;
168169
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND;
@@ -785,6 +786,7 @@ void populate() {
785786
registerOperator(QUERY_STRING, PPLBuiltinOperators.QUERY_STRING);
786787
registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH);
787788
registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT);
789+
registerOperator(REX_OFFSET, PPLBuiltinOperators.REX_OFFSET);
788790

789791
// Register PPL Datetime UDF operator
790792
registerOperator(TIMESTAMP, PPLBuiltinOperators.TIMESTAMP);
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.udf;
7+
8+
import java.util.List;
9+
import java.util.regex.Matcher;
10+
import java.util.regex.Pattern;
11+
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
12+
import org.apache.calcite.adapter.enumerable.NullPolicy;
13+
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
14+
import org.apache.calcite.linq4j.tree.Expression;
15+
import org.apache.calcite.linq4j.tree.Expressions;
16+
import org.apache.calcite.rex.RexCall;
17+
import org.apache.calcite.sql.type.ReturnTypes;
18+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
19+
import org.opensearch.sql.calcite.utils.PPLOperandTypes;
20+
import org.opensearch.sql.expression.function.ImplementorUDF;
21+
import org.opensearch.sql.expression.function.UDFOperandMetadata;
22+
23+
/** Custom REX_OFFSET function for calculating regex match positions. */
24+
public final class RexOffsetFunction extends ImplementorUDF {
25+
26+
public RexOffsetFunction() {
27+
super(new RexOffsetImplementor(), NullPolicy.ARG0);
28+
}
29+
30+
@Override
31+
public SqlReturnTypeInference getReturnTypeInference() {
32+
return ReturnTypes.VARCHAR_2000_NULLABLE;
33+
}
34+
35+
@Override
36+
public UDFOperandMetadata getOperandMetadata() {
37+
return PPLOperandTypes.STRING_STRING;
38+
}
39+
40+
private static class RexOffsetImplementor implements NotNullImplementor {
41+
42+
@Override
43+
public Expression implement(
44+
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
45+
Expression field = translatedOperands.get(0);
46+
Expression pattern = translatedOperands.get(1);
47+
48+
return Expressions.call(RexOffsetFunction.class, "calculateOffsets", field, pattern);
49+
}
50+
}
51+
52+
public static String calculateOffsets(String text, String patternStr) {
53+
if (text == null || patternStr == null) {
54+
return null;
55+
}
56+
57+
try {
58+
Pattern pattern = Pattern.compile(patternStr);
59+
Matcher matcher = pattern.matcher(text);
60+
61+
if (!matcher.find()) {
62+
return null;
63+
}
64+
65+
StringBuilder result = new StringBuilder();
66+
67+
Pattern namedGroupPattern = Pattern.compile("\\(\\?<([^>]+)>");
68+
Matcher namedGroupMatcher = namedGroupPattern.matcher(patternStr);
69+
70+
int groupIndex = 1;
71+
boolean firstGroup = true;
72+
73+
while (namedGroupMatcher.find()) {
74+
String groupName = namedGroupMatcher.group(1);
75+
76+
if (groupIndex <= matcher.groupCount()) {
77+
int start = matcher.start(groupIndex);
78+
int end = matcher.end(groupIndex);
79+
80+
if (start >= 0 && end >= 0) {
81+
if (!firstGroup) {
82+
result.append(",");
83+
}
84+
result.append(groupName).append("=").append(start).append("-").append(end - 1);
85+
firstGroup = false;
86+
}
87+
}
88+
groupIndex++;
89+
}
90+
91+
return result.length() > 0 ? result.toString() : null;
92+
} catch (Exception e) {
93+
return null;
94+
}
95+
}
96+
}

ppl/src/main/antlr/OpenSearchPPLLexer.g4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ FREQUENCY_THRESHOLD_PERCENTAGE: 'FREQUENCY_THRESHOLD_PERCENTAGE';
5151
METHOD: 'METHOD';
5252
MAX_SAMPLE_COUNT: 'MAX_SAMPLE_COUNT';
5353
MAX_MATCH: 'MAX_MATCH';
54+
OFFSET_FIELD: 'OFFSET_FIELD';
5455
BUFFER_LIMIT: 'BUFFER_LIMIT';
5556
LABEL: 'LABEL';
5657
AGGREGATION: 'AGGREGATION';

ppl/src/main/antlr/OpenSearchPPLParser.g4

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,13 @@ rexCommand
194194
;
195195

196196
rexExpr
197-
: pattern=stringLiteral
198-
| FIELD EQUAL field=qualifiedName pattern=stringLiteral
199-
| FIELD EQUAL field=qualifiedName pattern=stringLiteral (MAX_MATCH EQUAL maxMatch=integerLiteral)?
197+
: pattern=stringLiteral (rexOption)*
198+
| FIELD EQUAL field=qualifiedName (rexOption)* pattern=stringLiteral (rexOption)*
199+
;
200+
201+
rexOption
202+
: MAX_MATCH EQUAL maxMatch=integerLiteral
203+
| OFFSET_FIELD EQUAL offsetField=qualifiedName
200204
;
201205
patternsMethod
202206
: PUNCT

ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -770,15 +770,23 @@ public UnresolvedPlan visitRexCommand(OpenSearchPPLParser.RexCommandContext ctx)
770770
UnresolvedExpression field = null;
771771
Literal pattern = (Literal) internalVisitExpression(ctx.rexExpr().pattern);
772772
Optional<Integer> maxMatch = Optional.empty();
773+
Optional<String> offsetField = Optional.empty();
773774

774775
if (ctx.rexExpr().field != null) {
775776
field = internalVisitExpression(ctx.rexExpr().field);
776777
}
777-
if (ctx.rexExpr().maxMatch != null) {
778-
maxMatch = Optional.of(Integer.parseInt(ctx.rexExpr().maxMatch.getText()));
778+
779+
// Process rex options
780+
for (OpenSearchPPLParser.RexOptionContext optionCtx : ctx.rexExpr().rexOption()) {
781+
if (optionCtx.maxMatch != null) {
782+
maxMatch = Optional.of(Integer.parseInt(optionCtx.maxMatch.getText()));
783+
}
784+
if (optionCtx.offsetField != null) {
785+
offsetField = Optional.of(internalVisitExpression(optionCtx.offsetField).toString());
786+
}
779787
}
780788

781-
return new Rex(field, pattern, maxMatch);
789+
return new Rex(field, pattern, maxMatch, offsetField);
782790
}
783791

784792
/** Get original text in query. */

0 commit comments

Comments
 (0)