Skip to content

Commit cf208fc

Browse files
committed
DECOUPLE SED + OFFSET FIELD
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent dce13b4 commit cf208fc

18 files changed

Lines changed: 18 additions & 707 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/tree/Rex.java

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,55 +23,38 @@
2323
public class Rex extends UnresolvedPlan {
2424

2525
public enum RexMode {
26-
EXTRACT,
27-
SED
26+
EXTRACT
2827
}
2928

3029
/** Field to extract from. */
3130
private final UnresolvedExpression field;
3231

33-
/** Pattern with named capture groups or sed expression. */
32+
/** Pattern with named capture groups. */
3433
private final Literal pattern;
3534

35+
/** Rex mode (only EXTRACT supported). */
36+
private final RexMode mode;
37+
3638
/** Maximum number of matches (optional). */
3739
private final Optional<Integer> maxMatch;
3840

39-
/** Offset field name for position tracking (optional). */
40-
private final Optional<String> offsetField;
41-
42-
/** Rex mode (extract or sed). */
43-
private final RexMode mode;
44-
4541
/** Child Plan. */
4642
@Setter private UnresolvedPlan child;
4743

4844
public Rex(UnresolvedExpression field, Literal pattern) {
49-
this(field, pattern, Optional.empty(), Optional.empty(), RexMode.EXTRACT);
45+
this(field, pattern, RexMode.EXTRACT, Optional.empty());
5046
}
5147

5248
public Rex(UnresolvedExpression field, Literal pattern, Optional<Integer> maxMatch) {
53-
this(field, pattern, maxMatch, Optional.empty(), RexMode.EXTRACT);
49+
this(field, pattern, RexMode.EXTRACT, maxMatch);
5450
}
5551

5652
public Rex(
57-
UnresolvedExpression field,
58-
Literal pattern,
59-
Optional<Integer> maxMatch,
60-
Optional<String> offsetField) {
61-
this(field, pattern, maxMatch, offsetField, RexMode.EXTRACT);
62-
}
63-
64-
public Rex(
65-
UnresolvedExpression field,
66-
Literal pattern,
67-
Optional<Integer> maxMatch,
68-
Optional<String> offsetField,
69-
RexMode mode) {
53+
UnresolvedExpression field, Literal pattern, RexMode mode, Optional<Integer> maxMatch) {
7054
this.field = field;
7155
this.pattern = pattern;
72-
this.maxMatch = maxMatch;
73-
this.offsetField = offsetField;
7456
this.mode = mode;
57+
this.maxMatch = maxMatch;
7558
}
7659

7760
@Override

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 2 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,6 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
183183
RexNode fieldRex = rexVisitor.analyze(node.getField(), context);
184184
String patternStr = (String) node.getPattern().getValue();
185185

186-
if (node.getMode() == Rex.RexMode.SED) {
187-
RexNode sedCall = createOptimizedSedCall(fieldRex, patternStr, context);
188-
String fieldName = node.getField().toString();
189-
projectPlusOverriding(List.of(sedCall), List.of(fieldName), context);
190-
return context.relBuilder.peek();
191-
}
192-
193186
List<String> namedGroups = RegexExpression.getNamedGroupCandidates(patternStr);
194187

195188
if (namedGroups.isEmpty()) {
@@ -203,6 +196,7 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
203196
for (int i = 0; i < namedGroups.size(); i++) {
204197
RexNode extractCall;
205198
if (node.getMaxMatch().isPresent() && node.getMaxMatch().get() != 1) {
199+
// Use REX_EXTRACT_MULTI for multiple matches
206200
extractCall =
207201
PPLFuncImpTable.INSTANCE.resolve(
208202
context.rexBuilder,
@@ -212,6 +206,7 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
212206
context.relBuilder.literal(i + 1),
213207
context.relBuilder.literal(node.getMaxMatch().get()));
214208
} else {
209+
// Use REX_EXTRACT for single match (default)
215210
extractCall =
216211
PPLFuncImpTable.INSTANCE.resolve(
217212
context.rexBuilder,
@@ -224,17 +219,6 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
224219
newFieldNames.add(namedGroups.get(i));
225220
}
226221

227-
if (node.getOffsetField().isPresent()) {
228-
RexNode offsetCall =
229-
PPLFuncImpTable.INSTANCE.resolve(
230-
context.rexBuilder,
231-
BuiltinFunctionName.REX_OFFSET,
232-
fieldRex,
233-
context.rexBuilder.makeLiteral(patternStr));
234-
newFields.add(offsetCall);
235-
newFieldNames.add(node.getOffsetField().get());
236-
}
237-
238222
projectPlusOverriding(newFields, newFieldNames, context);
239223
return context.relBuilder.peek();
240224
}
@@ -1687,118 +1671,4 @@ private void buildExpandRelNode(
16871671
context.relBuilder.rename(names);
16881672
}
16891673
}
1690-
1691-
/**
1692-
* Creates an optimized sed call using native Calcite functions where possible. Falls back to
1693-
* custom REX_SED for complex cases.
1694-
*/
1695-
private RexNode createOptimizedSedCall(
1696-
RexNode fieldRex, String sedExpression, CalcitePlanContext context) {
1697-
if (sedExpression.startsWith("s/")) {
1698-
return createOptimizedSubstitution(fieldRex, sedExpression, context);
1699-
} else if (sedExpression.startsWith("y/")) {
1700-
return createOptimizedTransliteration(fieldRex, sedExpression, context);
1701-
} else {
1702-
throw new RuntimeException("Unsupported sed pattern: " + sedExpression);
1703-
}
1704-
}
1705-
1706-
/** Creates optimized substitution calls for s/pattern/replacement/flags syntax. */
1707-
private RexNode createOptimizedSubstitution(
1708-
RexNode fieldRex, String sedExpression, CalcitePlanContext context) {
1709-
try {
1710-
// Parse sed substitution: s/pattern/replacement/flags
1711-
if (!sedExpression.matches("s/.+/.*/.*")) {
1712-
throw new IllegalArgumentException("Invalid sed substitution format");
1713-
}
1714-
1715-
// Find the delimiters - sed format is s/pattern/replacement/flags
1716-
int firstDelimiter = sedExpression.indexOf('/', 2); // First '/' after 's/'
1717-
int secondDelimiter = sedExpression.indexOf('/', firstDelimiter + 1); // Second '/'
1718-
int thirdDelimiter = sedExpression.indexOf('/', secondDelimiter + 1); // Third '/' (optional)
1719-
1720-
if (firstDelimiter == -1 || secondDelimiter == -1) {
1721-
throw new IllegalArgumentException("Invalid sed substitution format");
1722-
}
1723-
1724-
String pattern = sedExpression.substring(2, firstDelimiter);
1725-
String replacement = sedExpression.substring(firstDelimiter + 1, secondDelimiter);
1726-
String flags =
1727-
secondDelimiter + 1 < sedExpression.length()
1728-
? sedExpression.substring(secondDelimiter + 1)
1729-
: "";
1730-
1731-
// Convert sed backreferences (\1, \2) to Java style ($1, $2)
1732-
String javaReplacement = replacement.replaceAll("\\\\(\\d+)", "\\$$1");
1733-
1734-
if (flags.isEmpty()) {
1735-
// 3-parameter REGEXP_REPLACE
1736-
return PPLFuncImpTable.INSTANCE.resolve(
1737-
context.rexBuilder,
1738-
BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_3,
1739-
fieldRex,
1740-
context.rexBuilder.makeLiteral(pattern),
1741-
context.rexBuilder.makeLiteral(javaReplacement));
1742-
} else if (flags.matches("[gi]+")) {
1743-
// 4-parameter REGEXP_REPLACE with flags
1744-
return PPLFuncImpTable.INSTANCE.resolve(
1745-
context.rexBuilder,
1746-
BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_PG_4,
1747-
fieldRex,
1748-
context.rexBuilder.makeLiteral(pattern),
1749-
context.rexBuilder.makeLiteral(javaReplacement),
1750-
context.rexBuilder.makeLiteral(flags));
1751-
} else if (flags.matches("\\d+")) {
1752-
// 5-parameter REGEXP_REPLACE with occurrence
1753-
int occurrence = Integer.parseInt(flags);
1754-
return PPLFuncImpTable.INSTANCE.resolve(
1755-
context.rexBuilder,
1756-
BuiltinFunctionName.INTERNAL_REGEXP_REPLACE_5,
1757-
fieldRex,
1758-
context.rexBuilder.makeLiteral(pattern),
1759-
context.rexBuilder.makeLiteral(javaReplacement),
1760-
context.relBuilder.literal(1), // start position
1761-
context.relBuilder.literal(occurrence));
1762-
} else {
1763-
throw new RuntimeException(
1764-
"Unsupported sed flags: " + flags + " in expression: " + sedExpression);
1765-
}
1766-
} catch (Exception e) {
1767-
throw new RuntimeException("Failed to optimize sed expression: " + sedExpression, e);
1768-
}
1769-
}
1770-
1771-
/** Creates optimized transliteration calls for y/from/to/ syntax. */
1772-
private RexNode createOptimizedTransliteration(
1773-
RexNode fieldRex, String sedExpression, CalcitePlanContext context) {
1774-
try {
1775-
// Parse sed transliteration: y/from/to/
1776-
if (!sedExpression.matches("y/.+/.*/.*")) {
1777-
throw new IllegalArgumentException("Invalid sed transliteration format");
1778-
}
1779-
1780-
int firstSlash = sedExpression.indexOf('/', 1);
1781-
int secondSlash = sedExpression.indexOf('/', firstSlash + 1);
1782-
int thirdSlash = sedExpression.indexOf('/', secondSlash + 1);
1783-
1784-
if (firstSlash == -1 || secondSlash == -1) {
1785-
throw new IllegalArgumentException("Invalid sed transliteration format");
1786-
}
1787-
1788-
String from = sedExpression.substring(firstSlash + 1, secondSlash);
1789-
String to =
1790-
sedExpression.substring(
1791-
secondSlash + 1, thirdSlash != -1 ? thirdSlash : sedExpression.length());
1792-
1793-
// Use Calcite's native TRANSLATE3 function
1794-
return PPLFuncImpTable.INSTANCE.resolve(
1795-
context.rexBuilder,
1796-
BuiltinFunctionName.INTERNAL_TRANSLATE3,
1797-
fieldRex,
1798-
context.rexBuilder.makeLiteral(from),
1799-
context.rexBuilder.makeLiteral(to));
1800-
} catch (Exception e) {
1801-
throw new RuntimeException("Failed to optimize sed expression: " + sedExpression, e);
1802-
}
1803-
}
18041674
}

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ public enum BuiltinFunctionName {
218218
REGEX_MATCH(FunctionName.of("regex_match")),
219219
REX_EXTRACT(FunctionName.of("REX_EXTRACT")),
220220
REX_EXTRACT_MULTI(FunctionName.of("REX_EXTRACT_MULTI")),
221-
REX_OFFSET(FunctionName.of("REX_OFFSET")),
222221
REPLACE(FunctionName.of("replace")),
223222
REVERSE(FunctionName.of("reverse")),
224223
RIGHT(FunctionName.of("right")),

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@
5757
import org.opensearch.sql.expression.function.udf.RelevanceQueryFunction;
5858
import org.opensearch.sql.expression.function.udf.RexExtractFunction;
5959
import org.opensearch.sql.expression.function.udf.RexExtractMultiFunction;
60-
import org.opensearch.sql.expression.function.udf.RexOffsetFunction;
6160
import org.opensearch.sql.expression.function.udf.SpanFunction;
6261
import org.opensearch.sql.expression.function.udf.condition.EarliestFunction;
6362
import org.opensearch.sql.expression.function.udf.condition.EnhancedCoalesceFunction;
@@ -406,7 +405,6 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
406405
public static final SqlOperator REX_EXTRACT = new RexExtractFunction().toUDF("REX_EXTRACT");
407406
public static final SqlOperator REX_EXTRACT_MULTI =
408407
new RexExtractMultiFunction().toUDF("REX_EXTRACT_MULTI");
409-
public static final SqlOperator REX_OFFSET = new RexOffsetFunction().toUDF("REX_OFFSET");
410408

411409
// Aggregation functions
412410
public static final SqlAggFunction AVG_NULLABLE = new NullableSqlAvgAggFunction(SqlKind.AVG);

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@
163163
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REVERSE;
164164
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT;
165165
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_EXTRACT_MULTI;
166-
import static org.opensearch.sql.expression.function.BuiltinFunctionName.REX_OFFSET;
167166
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RIGHT;
168167
import static org.opensearch.sql.expression.function.BuiltinFunctionName.RINT;
169168
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ROUND;
@@ -714,7 +713,6 @@ void populate() {
714713
registerOperator(MULTI_MATCH, PPLBuiltinOperators.MULTI_MATCH);
715714
registerOperator(REX_EXTRACT, PPLBuiltinOperators.REX_EXTRACT);
716715
registerOperator(REX_EXTRACT_MULTI, PPLBuiltinOperators.REX_EXTRACT_MULTI);
717-
registerOperator(REX_OFFSET, PPLBuiltinOperators.REX_OFFSET);
718716

719717
// Register PPL Datetime UDF operator
720718
registerOperator(TIMESTAMP, PPLBuiltinOperators.TIMESTAMP);

core/src/main/java/org/opensearch/sql/expression/function/udf/RexOffsetFunction.java

Lines changed: 0 additions & 92 deletions
This file was deleted.

0 commit comments

Comments
 (0)