Skip to content

Commit 3734d6a

Browse files
authored
Support function composition in expressions. (#6628)
Support function composition in expressions. The grammar changed function from a lexer rule to a parser rule. The previous approach yielded on token that needed to be parsed manually. As a parser rule each argument is a full sub-expression. This includes conditionalExpression, arithmeticExpression, stringExpression, jsonPointer, or literal. This gives ANTLR structural visibility into function calls, enabling composable functions that were impossible when the whole call was a single unparsed string. The grammar change meant that the ParseTreeEvaluatorListener now parses functions instead of ParseTreeCoercionService. I also consolidated SET_DELIMITER and COMMA and DIVIDE and FORWARDSLASH. Having these as different tokens caused problems parsing grammars. Resolves #6322. Signed-off-by: David Venable <dlv@amazon.com>
1 parent 34a63e9 commit 3734d6a

13 files changed

Lines changed: 397 additions & 379 deletions

data-prepper-expression/src/main/antlr/DataPrepperExpression.g4

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
/*
22
* Copyright OpenSearch Contributors
33
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
48
*/
59

610
grammar DataPrepperExpression;
@@ -29,7 +33,7 @@ arithmeticExpression
2933
;
3034

3135
multiplicativeExpression
32-
: multiplicativeExpression (MULTIPLY | DIVIDE | MOD) arithmeticTerm
36+
: multiplicativeExpression (MULTIPLY | FORWARDSLASH | MOD) arithmeticTerm
3337
| arithmeticTerm
3438
;
3539

@@ -126,7 +130,7 @@ setInitializer
126130
;
127131

128132
setMembers
129-
: literal (SPACE* SET_DELIMITER SPACE* literal)*
133+
: literal (SPACE* COMMA SPACE* literal)*
130134
;
131135

132136
unaryOperator
@@ -151,22 +155,19 @@ jsonPointer
151155
;
152156

153157
function
154-
: Function
158+
: FunctionName LPAREN functionArgs? RPAREN
155159
;
156160

157-
Function
158-
: JsonPointerCharacters LPAREN FunctionArgs RPAREN
161+
functionArgs
162+
: functionArg (COMMA functionArg)*
159163
;
160164

161-
fragment
162-
FunctionArgs
163-
: ((FunctionArg SPACE* COMMA SPACE*)* SPACE* FunctionArg)?
164-
;
165-
166-
fragment
167-
FunctionArg
168-
: JsonPointer
169-
| String
165+
functionArg
166+
: conditionalExpression
167+
| arithmeticExpression
168+
| stringExpression
169+
| jsonPointer
170+
| literal
170171
;
171172

172173
variableIdentifier
@@ -274,6 +275,10 @@ String
274275
| DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE StringCharacters? DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE
275276
;
276277
278+
FunctionName
279+
: JsonPointerCharacters
280+
;
281+
277282
fragment
278283
StringCharacters
279284
: StringCharacter+
@@ -301,14 +306,6 @@ DataTypes
301306
| STRING
302307
;
303308

304-
SET_DELIMITER
305-
: COMMA
306-
;
307-
308-
DIVIDE
309-
: FORWARDSLASH
310-
;
311-
312309
COMMA : ',';
313310
EQUAL : '==';
314311
NOT_EQUAL : '!=';

data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/OperatorConfiguration.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
/*
22
* Copyright OpenSearch Contributors
33
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
48
*/
59

610
package org.opensearch.dataprepper.expression;
@@ -444,7 +448,7 @@ public ArithmeticBinaryOperator divideOperator() {
444448
operandsToOperationMap.put(Long.class, longOperations);
445449
operandsToOperationMap.put(Double.class, doubleOperations);
446450

447-
return new ArithmeticBinaryOperator(DataPrepperExpressionParser.DIVIDE, operandsToOperationMap);
451+
return new ArithmeticBinaryOperator(DataPrepperExpressionParser.FORWARDSLASH, operandsToOperationMap);
448452
}
449453

450454
@Bean

data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeCoercionService.java

Lines changed: 10 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -18,38 +18,24 @@
1818
import javax.inject.Inject;
1919
import javax.inject.Named;
2020
import java.io.Serializable;
21-
import java.util.ArrayList;
2221
import java.util.List;
2322
import java.util.Map;
2423
import java.util.Objects;
25-
import java.util.concurrent.ConcurrentHashMap;
26-
import java.util.concurrent.ConcurrentMap;
2724
import java.util.function.Function;
2825
import java.util.regex.Pattern;
2926

3027
@Named
3128
class ParseTreeCoercionService {
3229
private static final Pattern QUOTE_PATTERN = Pattern.compile("^\"{1,3}|\"{1,3}$");
33-
private static final Pattern ARGUMENT_SPLITTER = Pattern.compile("(?<!\\\\),");
34-
private static final int INITIAL_ARG_LIST_SIZE = 8;
35-
private static final String INVALID_FUNCTION_FORMAT_OPEN = "Invalid function format: missing opening parenthesis";
36-
private static final String INVALID_FUNCTION_FORMAT_CLOSE = "Invalid function format: missing closing parenthesis";
37-
private static final String INVALID_STRING_ARG = "Invalid string argument: check if any argument is missing a closing double quote or contains comma that's not escaped with `\\`.";
38-
private static final String UNSUPPORTED_ARG_TYPE = "Unsupported type passed as function argument";
39-
private final Map<Class<? extends Serializable>, Function<Object, Object>> literalTypeConversions;
4030
private final ExpressionFunctionProvider expressionFunctionProvider;
4131
private final Function<Object, Object> convertLiteralType;
42-
private final ConcurrentMap<String, FunctionMetadata> cachedFunctionStrings = new ConcurrentHashMap<>(16, 0.75f);
4332
private final EventKeyFactory eventKeyFactory;
4433

4534
public Object coercePrimaryTerminalNode(final TerminalNode node, final Event event) {
4635
Objects.requireNonNull(node, "TerminalNode cannot be null");
4736
final int nodeType = node.getSymbol().getType();
4837
final String nodeStringValue = node.getText();
4938
switch (nodeType) {
50-
case DataPrepperExpressionParser.Function:
51-
FunctionMetadata functionMetadata = cachedFunctionStrings.computeIfAbsent(nodeStringValue, this::parseFunctionMetadata);
52-
return expressionFunctionProvider.provideFunction(functionMetadata.functionName, functionMetadata.argList, event, convertLiteralType);
5339
case DataPrepperExpressionParser.EscapedJsonPointer:
5440
return resolveJsonPointerValue(nodeStringValue.substring(1, nodeStringValue.length() - 1), event);
5541
case DataPrepperExpressionParser.JsonPointer:
@@ -68,27 +54,31 @@ public Object coercePrimaryTerminalNode(final TerminalNode node, final Event eve
6854
return Boolean.valueOf(nodeStringValue);
6955
case DataPrepperExpressionParser.COMMA:
7056
return DataPrepperExpressionParser.COMMA;
71-
case DataPrepperExpressionParser.SET_DELIMITER:
72-
return DataPrepperExpressionParser.SET_DELIMITER;
7357
case DataPrepperExpressionParser.Null:
7458
return null;
7559
case DataPrepperExpressionParser.DataTypes:
7660
return nodeStringValue;
77-
7861
default:
7962
throw new ExpressionCoercionException("Unsupported terminal node type symbol string: " +
8063
DataPrepperExpressionParser.VOCABULARY.getDisplayName(nodeType));
8164
}
8265
}
8366

67+
public Object evaluateFunction(final String functionName, final List<Object> args, final Event event) {
68+
return expressionFunctionProvider.provideFunction(functionName, args, event, convertLiteralType);
69+
}
70+
71+
public EventKey createEventKey(final String jsonPointer) {
72+
return eventKeyFactory.createEventKey(jsonPointer);
73+
}
74+
8475
@Inject
8576
public ParseTreeCoercionService(
8677
final Map<Class<? extends Serializable>, Function<Object, Object>> literalTypeConversions,
8778
final ExpressionFunctionProvider expressionFunctionProvider,
8879
final EventKeyFactory eventKeyFactory) {
8980
Objects.requireNonNull(literalTypeConversions, "literalTypeConversions cannot be null");
9081
Objects.requireNonNull(expressionFunctionProvider, "expressionFunctionProvider cannot be null");
91-
this.literalTypeConversions = literalTypeConversions;
9282
convertLiteralType = (value) -> {
9383
if (literalTypeConversions.containsKey(value.getClass())) {
9484
return literalTypeConversions.get(value.getClass()).apply(value);
@@ -108,57 +98,9 @@ public <T> T coerce(final Object obj, Class<T> clazz) throws ExpressionCoercionE
10898
"Unable to cast " + obj.getClass().getName() + " into " + clazz.getName());
10999
}
110100

111-
private FunctionMetadata parseFunctionMetadata(final String nodeStringValue) {
112-
final int funcNameIndex = nodeStringValue.indexOf("(");
113-
if (funcNameIndex == -1) {
114-
throw new ExpressionCoercionException(INVALID_FUNCTION_FORMAT_OPEN);
115-
}
116-
final String functionName = nodeStringValue.substring(0, funcNameIndex);
117-
final int argsEndIndex = nodeStringValue.indexOf(")", funcNameIndex);
118-
if (argsEndIndex == -1) {
119-
throw new ExpressionCoercionException(INVALID_FUNCTION_FORMAT_CLOSE);
120-
}
121-
122-
List<Object> argList = new ArrayList<>(INITIAL_ARG_LIST_SIZE);
123-
if (argsEndIndex > funcNameIndex + 1) {
124-
final String argsStr = nodeStringValue.substring(funcNameIndex + 1, argsEndIndex);
125-
final String[] args = ARGUMENT_SPLITTER.split(argsStr);
126-
127-
for (final String arg : args) {
128-
String trimmedArg = arg.trim();
129-
if (trimmedArg.isEmpty()) {
130-
continue;
131-
}
132-
if (trimmedArg.charAt(0) == '/') {
133-
argList.add(eventKeyFactory.createEventKey(trimmedArg));
134-
} else if (trimmedArg.charAt(0) == '"') {
135-
if (trimmedArg.length() < 2 || trimmedArg.charAt(trimmedArg.length() - 1) != '"') {
136-
throw new ExpressionCoercionException(
137-
INVALID_STRING_ARG);
138-
}
139-
argList.add(trimmedArg.substring(1, trimmedArg.length() - 1));
140-
} else {
141-
throw new ExpressionCoercionException(UNSUPPORTED_ARG_TYPE);
142-
}
143-
}
144-
}
145-
146-
return new FunctionMetadata(functionName, argList);
147-
}
148-
149-
private static final class FunctionMetadata {
150-
final String functionName;
151-
final List<Object> argList;
152-
153-
private FunctionMetadata(final String functionName, final List<Object> argList) {
154-
this.functionName = functionName.intern();
155-
this.argList = argList;
156-
}
157-
}
158-
159-
private Object resolveJsonPointerValue(final String jsonPointer, final Event event) {
101+
Object resolveJsonPointerValue(final String jsonPointer, final Event event) {
160102
EventKey eventKey = this.eventKeyFactory.createEventKey(jsonPointer);
161103
final Object value = event.get(eventKey, Object.class);
162104
return value != null ? convertLiteralType.apply(value) : null;
163105
}
164-
}
106+
}

data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeEvaluatorListener.java

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
/*
22
* Copyright OpenSearch Contributors
33
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
48
*/
59

610
package org.opensearch.dataprepper.expression;
@@ -14,7 +18,9 @@
1418
import org.opensearch.dataprepper.expression.antlr.DataPrepperExpressionListener;
1519
import org.opensearch.dataprepper.expression.antlr.DataPrepperExpressionParser;
1620

21+
import java.util.ArrayList;
1722
import java.util.HashSet;
23+
import java.util.List;
1824
import java.util.Set;
1925
import java.util.Stack;
2026

@@ -40,6 +46,15 @@ class ParseTreeEvaluatorListener extends DataPrepperExpressionBaseListener {
4046
private boolean listStart;
4147
private Set<Object> setMembers;
4248

49+
/**
50+
* Track function context for composable function support
51+
*/
52+
private final Stack<FunctionEvalContext> functionContextStack = new Stack<>();
53+
/**
54+
* Track whether we are directly inside a functionArg that contains only a jsonPointer
55+
*/
56+
private int functionArgDepth = 0;
57+
4358
public ParseTreeEvaluatorListener(final OperatorProvider operatorProvider,
4459
final ParseTreeCoercionService coercionService,
4560
final Event event) {
@@ -77,12 +92,78 @@ private void validateSetMembers(Set<Object> setMembers) {
7792
}
7893
}
7994

95+
private boolean isInsideFunction() {
96+
return !functionContextStack.isEmpty();
97+
}
98+
99+
@Override
100+
public void enterFunction(DataPrepperExpressionParser.FunctionContext ctx) {
101+
final String functionName = ctx.FunctionName().getText();
102+
functionContextStack.push(new FunctionEvalContext(functionName, operandStack.size()));
103+
}
104+
105+
@Override
106+
public void exitFunction(DataPrepperExpressionParser.FunctionContext ctx) {
107+
final FunctionEvalContext funcCtx = functionContextStack.pop();
108+
109+
// Collect all arguments that were pushed onto the operand stack during function arg evaluation
110+
final List<Object> args = new ArrayList<>();
111+
while (operandStack.size() > funcCtx.stackSizeAtEntry) {
112+
args.add(0, operandStack.pop());
113+
}
114+
115+
final Object result = coercionService.evaluateFunction(funcCtx.functionName, args, event);
116+
operandStack.push(result);
117+
}
118+
119+
@Override
120+
public void enterFunctionArg(DataPrepperExpressionParser.FunctionArgContext ctx) {
121+
functionArgDepth++;
122+
}
123+
124+
@Override
125+
public void exitFunctionArg(DataPrepperExpressionParser.FunctionArgContext ctx) {
126+
functionArgDepth--;
127+
}
128+
80129
@Override
81130
public void visitTerminal(TerminalNode node) {
82131
final int nodeType = node.getSymbol().getType();
83132
if (nodeType == DataPrepperExpressionParser.EOF) {
84133
return;
85134
}
135+
136+
// Skip FunctionName, LPAREN, RPAREN, and COMMA tokens inside function rules
137+
// These are structural tokens handled by enter/exitFunction
138+
if (nodeType == DataPrepperExpressionParser.FunctionName) {
139+
return;
140+
}
141+
142+
if (isInsideFunction()) {
143+
// Inside a function context, LPAREN/RPAREN/COMMA are structural - skip them
144+
if (nodeType == DataPrepperExpressionParser.LPAREN ||
145+
nodeType == DataPrepperExpressionParser.RPAREN ||
146+
nodeType == DataPrepperExpressionParser.COMMA) {
147+
return;
148+
}
149+
150+
// Inside a function arg, bare JsonPointer (e.g. /field) should be converted to EventKey.
151+
// EscapedJsonPointer (e.g. "/key1") is a quoted value — treat it as a string literal,
152+
// not an EventKey. This ensures getMetadata("/key1") receives a String argument.
153+
if (functionArgDepth > 0) {
154+
if (nodeType == DataPrepperExpressionParser.JsonPointer) {
155+
operandStack.push(coercionService.createEventKey(node.getText()));
156+
return;
157+
}
158+
if (nodeType == DataPrepperExpressionParser.EscapedJsonPointer) {
159+
final String nodeStringValue = node.getText();
160+
// Strip surrounding quotes and push as String
161+
operandStack.push(nodeStringValue.substring(1, nodeStringValue.length() - 1));
162+
return;
163+
}
164+
}
165+
}
166+
86167
if (operatorProvider.containsOperator(nodeType) || nodeType == DataPrepperExpressionParser.LPAREN) {
87168
operatorSymbolStack.push(nodeType);
88169
} else if (nodeType == DataPrepperExpressionParser.LBRACE) {
@@ -98,7 +179,7 @@ public void visitTerminal(TerminalNode node) {
98179
} else {
99180
final Object arg = coercionService.coercePrimaryTerminalNode(node, event);
100181
if (listStart) {
101-
if (!(arg instanceof Integer) || (((int)arg) != DataPrepperExpressionParser.COMMA && ((int)arg) != DataPrepperExpressionParser.SET_DELIMITER)) {
182+
if (!(arg instanceof Integer) || (((int)arg) != DataPrepperExpressionParser.COMMA)) {
102183
setMembers.add(arg);
103184
}
104185
} else {
@@ -151,4 +232,14 @@ private String getPartialStatementFromContext(final ParserRuleContext ctx) {
151232
final String fullStatement = startToken.getInputStream().toString();
152233
return fullStatement.substring(startToken.getStartIndex(), stopToken.getStopIndex() + 1);
153234
}
235+
236+
private static final class FunctionEvalContext {
237+
final String functionName;
238+
final int stackSizeAtEntry;
239+
240+
FunctionEvalContext(final String functionName, final int stackSizeAtEntry) {
241+
this.functionName = functionName;
242+
this.stackSizeAtEntry = stackSizeAtEntry;
243+
}
244+
}
154245
}

0 commit comments

Comments
 (0)