Skip to content

Commit f43765d

Browse files
authored
Merge branch 'main' into update-docs
Signed-off-by: ritvibhatt <53196324+ritvibhatt@users.noreply.github.com>
2 parents 50d95b3 + 0c1ec27 commit f43765d

24 files changed

Lines changed: 780 additions & 339 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,13 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
282282
"Rex pattern must contain at least one named capture group");
283283
}
284284

285+
// TODO: Once JDK 20+ is supported, consider using Pattern.namedGroups() API for more efficient
286+
// named group handling instead of manual parsing in RegexCommonUtils
287+
285288
List<RexNode> newFields = new ArrayList<>();
286289
List<String> newFieldNames = new ArrayList<>();
287290

288-
for (int i = 0; i < namedGroups.size(); i++) {
291+
for (String groupName : namedGroups) {
289292
RexNode extractCall;
290293
if (node.getMaxMatch().isPresent() && node.getMaxMatch().get() > 1) {
291294
extractCall =
@@ -294,7 +297,7 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
294297
BuiltinFunctionName.REX_EXTRACT_MULTI,
295298
fieldRex,
296299
context.rexBuilder.makeLiteral(patternStr),
297-
context.relBuilder.literal(i + 1),
300+
context.rexBuilder.makeLiteral(groupName),
298301
context.relBuilder.literal(node.getMaxMatch().get()));
299302
} else {
300303
extractCall =
@@ -303,10 +306,10 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
303306
BuiltinFunctionName.REX_EXTRACT,
304307
fieldRex,
305308
context.rexBuilder.makeLiteral(patternStr),
306-
context.relBuilder.literal(i + 1));
309+
context.rexBuilder.makeLiteral(groupName));
307310
}
308311
newFields.add(extractCall);
309-
newFieldNames.add(namedGroups.get(i));
312+
newFieldNames.add(groupName);
310313
}
311314

312315
if (node.getOffsetField().isPresent()) {
@@ -1809,18 +1812,16 @@ public RelNode visitMultisearch(Multisearch node, CalcitePlanContext context) {
18091812
}
18101813

18111814
/**
1812-
* Finds the timestamp field for multisearch ordering.
1815+
* Finds the @timestamp field for multisearch ordering. Only @timestamp field is used for
1816+
* timestamp interleaving. Other timestamp-like fields are ignored.
18131817
*
1814-
* @param rowType The row type to search for timestamp fields
1815-
* @return The name of the timestamp field, or null if not found
1818+
* @param rowType The row type to search for @timestamp field
1819+
* @return "@timestamp" if the field exists, or null if not found
18161820
*/
18171821
private String findTimestampField(RelDataType rowType) {
1818-
String[] candidates = {"@timestamp", "_time", "timestamp", "time"};
1819-
for (String fieldName : candidates) {
1820-
RelDataTypeField field = rowType.getField(fieldName, false, false);
1821-
if (field != null) {
1822-
return fieldName;
1823-
}
1822+
RelDataTypeField field = rowType.getField("@timestamp", false, false);
1823+
if (field != null) {
1824+
return "@timestamp";
18241825
}
18251826
return null;
18261827
}

core/src/main/java/org/opensearch/sql/calcite/SchemaUnifier.java

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,27 @@
77

88
import java.util.ArrayList;
99
import java.util.HashMap;
10-
import java.util.HashSet;
1110
import java.util.List;
1211
import java.util.Map;
13-
import java.util.Set;
1412
import java.util.stream.Collectors;
1513
import org.apache.calcite.rel.RelNode;
1614
import org.apache.calcite.rel.type.RelDataType;
1715
import org.apache.calcite.rel.type.RelDataTypeField;
1816
import org.apache.calcite.rex.RexNode;
19-
import org.apache.calcite.sql.validate.SqlValidatorUtil;
2017

2118
/**
22-
* Utility class for unifying schemas across multiple RelNodes with type conflict resolution. Uses
23-
* the same strategy as append command - renames conflicting fields to avoid type conflicts.
19+
* Utility class for unifying schemas across multiple RelNodes. Throws an exception when type
20+
* conflicts are detected.
2421
*/
2522
public class SchemaUnifier {
2623

2724
/**
28-
* Builds a unified schema for multiple nodes with type conflict resolution.
25+
* Builds a unified schema for multiple nodes. Throws an exception if type conflicts are detected.
2926
*
3027
* @param nodes List of RelNodes to unify schemas for
3128
* @param context Calcite plan context
3229
* @return List of projected RelNodes with unified schema
30+
* @throws IllegalArgumentException if type conflicts are detected
3331
*/
3432
public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
3533
List<RelNode> nodes, CalcitePlanContext context) {
@@ -41,7 +39,7 @@ public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
4139
return nodes;
4240
}
4341

44-
// Step 1: Build the unified schema by processing all nodes
42+
// Step 1: Build the unified schema by processing all nodes (throws on conflict)
4543
List<SchemaField> unifiedSchema = buildUnifiedSchema(nodes);
4644

4745
// Step 2: Create projections for each node to align with unified schema
@@ -55,47 +53,37 @@ public static List<RelNode> buildUnifiedSchemaWithConflictResolution(
5553
projectedNodes.add(projectedNode);
5654
}
5755

58-
// Step 3: Unify names to handle type conflicts (this creates age0, age1, etc.)
59-
List<String> uniqueNames =
60-
SqlValidatorUtil.uniquify(fieldNames, SqlValidatorUtil.EXPR_SUGGESTER, true);
61-
62-
// Step 4: Re-project with unique names if needed
63-
if (!uniqueNames.equals(fieldNames)) {
64-
List<RelNode> renamedNodes = new ArrayList<>();
65-
for (RelNode node : projectedNodes) {
66-
RelNode renamedNode =
67-
context.relBuilder.push(node).project(context.relBuilder.fields(), uniqueNames).build();
68-
renamedNodes.add(renamedNode);
69-
}
70-
return renamedNodes;
71-
}
72-
7356
return projectedNodes;
7457
}
7558

7659
/**
77-
* Builds a unified schema by merging fields from all nodes. Fields with the same name but
78-
* different types are added as separate entries (which will be renamed during uniquification).
60+
* Builds a unified schema by merging fields from all nodes. Throws an exception if fields with
61+
* the same name have different types.
7962
*
8063
* @param nodes List of RelNodes to merge schemas from
81-
* @return List of SchemaField representing the unified schema (may contain duplicate names)
64+
* @return List of SchemaField representing the unified schema
65+
* @throws IllegalArgumentException if type conflicts are detected
8266
*/
8367
private static List<SchemaField> buildUnifiedSchema(List<RelNode> nodes) {
8468
List<SchemaField> schema = new ArrayList<>();
85-
Map<String, Set<RelDataType>> seenFields = new HashMap<>();
69+
Map<String, RelDataType> seenFields = new HashMap<>();
8670

8771
for (RelNode node : nodes) {
8872
for (RelDataTypeField field : node.getRowType().getFieldList()) {
8973
String fieldName = field.getName();
9074
RelDataType fieldType = field.getType();
9175

92-
// Track which (name, type) combinations we've seen
93-
Set<RelDataType> typesForName = seenFields.computeIfAbsent(fieldName, k -> new HashSet<>());
94-
95-
if (!typesForName.contains(fieldType)) {
96-
// New field or same name with different type - add to schema
76+
RelDataType existingType = seenFields.get(fieldName);
77+
if (existingType == null) {
78+
// New field - add to schema
9779
schema.add(new SchemaField(fieldName, fieldType));
98-
typesForName.add(fieldType);
80+
seenFields.put(fieldName, fieldType);
81+
} else if (!areTypesCompatible(existingType, fieldType)) {
82+
// Same field name but different type - throw exception
83+
throw new IllegalArgumentException(
84+
String.format(
85+
"Unable to process column '%s' due to incompatible types: '%s' and '%s'",
86+
fieldName, existingType.getSqlTypeName(), fieldType.getSqlTypeName()));
9987
}
10088
// If we've seen this exact (name, type) combination, skip it
10189
}
@@ -104,6 +92,10 @@ private static List<SchemaField> buildUnifiedSchema(List<RelNode> nodes) {
10492
return schema;
10593
}
10694

95+
private static boolean areTypesCompatible(RelDataType type1, RelDataType type2) {
96+
return type1.getSqlTypeName() != null && type1.getSqlTypeName().equals(type2.getSqlTypeName());
97+
}
98+
10799
/**
108100
* Builds a projection for a node to align with the unified schema. For each field in the unified
109101
* schema: - If the node has a matching field with the same type, use it - Otherwise, project NULL
@@ -125,8 +117,8 @@ private static List<RexNode> buildProjectionForNode(
125117
RelDataType expectedType = schemaField.getType();
126118
RelDataTypeField nodeField = nodeFieldMap.get(fieldName);
127119

128-
if (nodeField != null && nodeField.getType().equals(expectedType)) {
129-
// Field exists with matching type - use it
120+
if (nodeField != null && areTypesCompatible(nodeField.getType(), expectedType)) {
121+
// Field exists with compatible type - use it
130122
projection.add(context.rexBuilder.makeInputRef(node, nodeField.getIndex()));
131123
} else {
132124
// Field missing or type mismatch - project NULL

core/src/main/java/org/opensearch/sql/expression/function/udf/RexExtractFunction.java

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@
1515
import org.apache.calcite.linq4j.tree.Expression;
1616
import org.apache.calcite.linq4j.tree.Expressions;
1717
import org.apache.calcite.rex.RexCall;
18+
import org.apache.calcite.sql.type.CompositeOperandTypeChecker;
1819
import org.apache.calcite.sql.type.ReturnTypes;
1920
import org.apache.calcite.sql.type.SqlReturnTypeInference;
2021
import org.opensearch.sql.calcite.utils.PPLOperandTypes;
2122
import org.opensearch.sql.expression.function.ImplementorUDF;
2223
import org.opensearch.sql.expression.function.UDFOperandMetadata;
24+
import org.opensearch.sql.expression.parse.RegexCommonUtils;
2325

2426
/** Custom REX_EXTRACT function for extracting regex named capture groups. */
2527
public final class RexExtractFunction extends ImplementorUDF {
@@ -35,7 +37,12 @@ public SqlReturnTypeInference getReturnTypeInference() {
3537

3638
@Override
3739
public UDFOperandMetadata getOperandMetadata() {
38-
return PPLOperandTypes.STRING_STRING_INTEGER;
40+
// Support both (field, pattern, groupIndex) and (field, pattern, groupName)
41+
return UDFOperandMetadata.wrap(
42+
(CompositeOperandTypeChecker)
43+
PPLOperandTypes.STRING_STRING_INTEGER
44+
.getInnerTypeChecker()
45+
.or(PPLOperandTypes.STRING_STRING_STRING.getInnerTypeChecker()));
3946
}
4047

4148
private static class RexExtractImplementor implements NotNullImplementor {
@@ -45,19 +52,80 @@ public Expression implement(
4552
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
4653
Expression field = translatedOperands.get(0);
4754
Expression pattern = translatedOperands.get(1);
48-
Expression groupIndex = translatedOperands.get(2);
55+
Expression groupIndexOrName = translatedOperands.get(2);
4956

50-
return Expressions.call(RexExtractFunction.class, "extractGroup", field, pattern, groupIndex);
57+
return Expressions.call(
58+
RexExtractFunction.class, "extractGroup", field, pattern, groupIndexOrName);
5159
}
5260
}
5361

62+
/**
63+
* Extract a regex group by index (1-based).
64+
*
65+
* @param text The input text to extract from
66+
* @param pattern The regex pattern
67+
* @param groupIndex The 1-based group index to extract
68+
* @return The extracted value or null if not found or invalid
69+
*/
5470
public static String extractGroup(String text, String pattern, int groupIndex) {
71+
if (text == null || pattern == null) {
72+
return null;
73+
}
74+
75+
return executeExtraction(
76+
text,
77+
pattern,
78+
matcher -> {
79+
if (groupIndex > 0 && groupIndex <= matcher.groupCount()) {
80+
return matcher.group(groupIndex);
81+
}
82+
return null;
83+
});
84+
}
85+
86+
/**
87+
* Extract a named capture group from text using the provided pattern. This method avoids the
88+
* index shifting issue that occurs with nested unnamed groups.
89+
*
90+
* @param text The input text to extract from
91+
* @param pattern The regex pattern with named capture groups
92+
* @param groupName The name of the capture group to extract
93+
* @return The extracted value or null if not found
94+
*/
95+
public static String extractGroup(String text, String pattern, String groupName) {
96+
if (text == null || pattern == null || groupName == null) {
97+
return null;
98+
}
99+
100+
return executeExtraction(
101+
text,
102+
pattern,
103+
matcher -> {
104+
try {
105+
return matcher.group(groupName);
106+
} catch (IllegalArgumentException e) {
107+
// Group name doesn't exist in the pattern
108+
return null;
109+
}
110+
});
111+
}
112+
113+
/**
114+
* Common extraction logic to avoid code duplication.
115+
*
116+
* @param text The input text
117+
* @param pattern The regex pattern
118+
* @param extractor Function to extract the value from the matcher
119+
* @return The extracted value or null
120+
*/
121+
private static String executeExtraction(
122+
String text, String pattern, java.util.function.Function<Matcher, String> extractor) {
55123
try {
56-
Pattern compiledPattern = Pattern.compile(pattern);
124+
Pattern compiledPattern = RegexCommonUtils.getCompiledPattern(pattern);
57125
Matcher matcher = compiledPattern.matcher(text);
58126

59-
if (matcher.find() && groupIndex > 0 && groupIndex <= matcher.groupCount()) {
60-
return matcher.group(groupIndex);
127+
if (matcher.find()) {
128+
return extractor.apply(matcher);
61129
}
62130
return null;
63131
} catch (PatternSyntaxException e) {

0 commit comments

Comments
 (0)