Skip to content

Commit da35804

Browse files
authored
Fixes the typeof operator along with data types in expressions. (#6673)
Fixes the typeof operator along with data types in expressions. The typeof operator was not working because FunctionName came before DataTypes it and it has a very broad matching rule. This means that ANTLR was resolving these as functions instead of the DataTypes that typeof needs. The fix here is to move FunctionName to the bottom. Additionally, I renamed FunctionName to Identifier to make this more generic for other future identifiers. Also there were no tests for the typeof operator. I have added some of these tests, first to verify the failure and fix, and second to ensure future changes do not break them. Signed-off-by: David Venable <dlv@amazon.com>
1 parent 627a6b7 commit da35804

6 files changed

Lines changed: 170 additions & 26 deletions

File tree

data-prepper-expression/src/main/antlr/DataPrepperExpression.g4

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ jsonPointer
155155
;
156156

157157
function
158-
: FunctionName LPAREN functionArgs? RPAREN
158+
: Identifier LPAREN functionArgs? RPAREN
159159
;
160160

161161
functionArgs
@@ -275,10 +275,6 @@ String
275275
| DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE StringCharacters? DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE
276276
;
277277
278-
FunctionName
279-
: JsonPointerCharacters
280-
;
281-
282278
fragment
283279
StringCharacters
284280
: StringCharacter+
@@ -306,6 +302,14 @@ DataTypes
306302
| STRING
307303
;
308304

305+
// Identifier MUST be defined after DataTypes (and all other keyword-like
306+
// lexer rules) because it matches [A-Za-z0-9_.@]+ which would shadow any
307+
// keyword defined later. ANTLR resolves same-length lexer ambiguities by
308+
// choosing the rule that appears first in the grammar.
309+
Identifier
310+
: JsonPointerCharacters
311+
;
312+
309313
COMMA : ',';
310314
EQUAL : '==';
311315
NOT_EQUAL : '!=';

data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeEvaluatorListener.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ private boolean isInsideFunction() {
9898

9999
@Override
100100
public void enterFunction(DataPrepperExpressionParser.FunctionContext ctx) {
101-
final String functionName = ctx.FunctionName().getText();
101+
final String functionName = ctx.Identifier().getText();
102102
functionContextStack.push(new FunctionEvalContext(functionName, operandStack.size()));
103103
}
104104

@@ -133,9 +133,9 @@ public void visitTerminal(TerminalNode node) {
133133
return;
134134
}
135135

136-
// Skip FunctionName, LPAREN, RPAREN, and COMMA tokens inside function rules
136+
// Skip Identifier, LPAREN, RPAREN, and COMMA tokens inside function rules
137137
// These are structural tokens handled by enter/exitFunction
138-
if (nodeType == DataPrepperExpressionParser.FunctionName) {
138+
if (nodeType == DataPrepperExpressionParser.Identifier) {
139139
return;
140140
}
141141

data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_ConditionalIT.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ private static Stream<Arguments> validExpressionArguments() {
246246
arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-212\"}"), true),
247247
arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-abc\"}"), false),
248248
arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"other\": \"dataprepper-abc\"}"), false),
249+
arguments("/name !~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-abc\"}"), true),
250+
arguments("/name !~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-0\"}"), false),
249251
arguments("startsWith(\""+strValue+ UUID.randomUUID() + "\",/status)", event("{\"status\":\""+strValue+"\"}"), true),
250252
arguments("startsWith(\""+ UUID.randomUUID() +strValue+ "\",/status)", event("{\"status\":\""+strValue+"\"}"), false),
251253
arguments("getEventType() == \"event\"", longEvent, true),
@@ -256,7 +258,17 @@ private static Stream<Arguments> validExpressionArguments() {
256258
arguments("substringBefore(\"key=a=b\", \"=\") == \"key\"", event("{}"), true),
257259
arguments("substringAfterLast(\"/app/src/main.py\", \"/\") == \"main.py\"", event("{}"), true),
258260
arguments("substringBeforeLast(\"app.src.main\", \".\") == \"app.src\"", event("{}"), true),
259-
arguments("/value == \"value-a\" and contains(/string, \"x/y/\")", event("{\"value\": \"value-a\", \"string\": \"prefix/x/y/postfix\"}"), true)
261+
arguments("/value == \"value-a\" and contains(/string, \"x/y/\")", event("{\"value\": \"value-a\", \"string\": \"prefix/x/y/postfix\"}"), true),
262+
arguments("/status_code typeof integer", event("{\"status_code\": 200}"), true),
263+
arguments("/status_code typeof integer", event("{\"status_code\": \"200\"}"), false),
264+
arguments("/name typeof string", event("{\"name\": \"test\"}"), true),
265+
arguments("/flag typeof boolean", event("{\"flag\": true}"), true),
266+
arguments("/value typeof long", event("{\"value\": 2147483648}"), true),
267+
arguments("/items typeof array", event("{\"items\": [1, 2]}"), true),
268+
arguments("/data typeof map", event("{\"data\": {\"k\": \"v\"}}"), true),
269+
arguments("not (/status_code typeof integer)", event("{\"status_code\": \"200\"}"), true),
270+
arguments("not (/status_code typeof integer)", event("{\"status_code\": 200}"), false),
271+
arguments("not (/name typeof string)", event("{\"name\": 123}"), true)
260272
);
261273
}
262274

data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GrammarLexerTest.java

Lines changed: 141 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,17 @@
1414
import org.antlr.v4.runtime.Lexer;
1515
import org.antlr.v4.runtime.Token;
1616
import org.junit.jupiter.api.Test;
17+
import org.junit.jupiter.params.ParameterizedTest;
18+
import org.junit.jupiter.params.provider.ValueSource;
1719
import org.opensearch.dataprepper.expression.antlr.DataPrepperExpressionLexer;
1820

1921
import java.util.List;
2022

2123
import static org.hamcrest.CoreMatchers.is;
24+
import static org.hamcrest.CoreMatchers.not;
2225
import static org.hamcrest.MatcherAssert.assertThat;
26+
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
2327
import static org.junit.jupiter.api.Assertions.assertAll;
24-
import static org.junit.jupiter.api.Assertions.assertFalse;
2528

2629
class GrammarLexerTest {
2730

@@ -32,10 +35,11 @@ private List<? extends Token> getTokens(final String statement) {
3235
return tokenStream.getTokens();
3336
}
3437

35-
private void assertTokenFalse(final String statement, final int type) {
38+
private void assertIsNotToken(final String statement, final int type) {
3639
final List<? extends Token> tokens = getTokens(statement);
3740

38-
assertFalse(tokens.size() == 2 && tokens.get(0).getText() == statement);
41+
assertThat(tokens.size(), is(greaterThanOrEqualTo(2)));
42+
assertThat(tokens.get(0).getType(), is(not(type)));
3943
}
4044

4145
private void assertToken(final String statement, final int type) {
@@ -73,27 +77,31 @@ void testTokenFloat() {
7377
assertToken("12345678.0002e6", DataPrepperExpressionLexer.Float);
7478
assertToken("12345678.000252E16", DataPrepperExpressionLexer.Float);
7579
// only one zero before the decimal point
76-
assertTokenFalse("0000.678e12", DataPrepperExpressionLexer.Float);
80+
assertIsNotToken("0000.678e12", DataPrepperExpressionLexer.Float);
7781
// Must have one digit before the decimal point
78-
assertTokenFalse(".678e12", DataPrepperExpressionLexer.Float);
79-
assertTokenFalse(".678e-12", DataPrepperExpressionLexer.Float);
80-
assertTokenFalse(".6782", DataPrepperExpressionLexer.Float);
82+
assertIsNotToken(".678e12", DataPrepperExpressionLexer.Float);
83+
assertIsNotToken(".678e-12", DataPrepperExpressionLexer.Float);
84+
assertIsNotToken(".6782", DataPrepperExpressionLexer.Float);
8185
// Can't end with decimal point
82-
assertTokenFalse("6782.", DataPrepperExpressionLexer.Float);
83-
// only one zero after decimal point
84-
assertTokenFalse("12345678.00", DataPrepperExpressionLexer.Float);
86+
assertIsNotToken("6782.", DataPrepperExpressionLexer.Float);
8587
}
8688

87-
@Test
88-
void testTokenBoolean() {
89-
assertToken("true", DataPrepperExpressionLexer.Boolean);
89+
@ParameterizedTest
90+
@ValueSource(strings = {"true", "false"})
91+
void testTokenBoolean(final String booleanStatement) {
92+
assertToken(booleanStatement, DataPrepperExpressionLexer.Boolean);
9093
}
9194

9295
@Test
9396
void testTokenJsonPointer() {
9497
assertToken("/status_code", DataPrepperExpressionLexer.JsonPointer);
9598
}
9699

100+
@Test
101+
void testTokenEscapedJsonPointer() {
102+
assertToken("\"/status_code\"", DataPrepperExpressionLexer.EscapedJsonPointer);
103+
}
104+
97105
@Test
98106
void testTokenString() {
99107
assertToken("\"Hello World\"", DataPrepperExpressionLexer.String);
@@ -204,6 +212,126 @@ void testTokenSUBTRACT() {
204212
assertToken("-", DataPrepperExpressionLexer.SUBTRACT);
205213
}
206214

215+
@ParameterizedTest
216+
@ValueSource(strings = {"integer", "boolean", "big_decimal", "long", "double", "string", "map", "array"})
217+
void testTokenDataTypes(final String dataType) {
218+
assertToken(dataType, DataPrepperExpressionLexer.DataTypes);
219+
}
220+
221+
@ParameterizedTest
222+
@ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"})
223+
void testTokenIdentifier(final String functionName) {
224+
assertToken(functionName, DataPrepperExpressionLexer.Identifier);
225+
}
226+
227+
@ParameterizedTest
228+
@ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"})
229+
void testTokenFunction(final String functionName) {
230+
final String statement = functionName + "()";
231+
final List<? extends Token> tokens = getTokens(statement);
232+
233+
assertThat(tokens.size(), is(4));
234+
assertAll(
235+
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
236+
() -> assertThat(tokens.get(0).getText(), is(functionName)),
237+
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
238+
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)),
239+
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
240+
);
241+
assertToken(functionName, DataPrepperExpressionLexer.Identifier);
242+
}
243+
244+
@ParameterizedTest
245+
@ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"})
246+
void testTokenFunctionWithSpace(final String functionName) {
247+
final String statement = functionName + " ()";
248+
final List<? extends Token> tokens = getTokens(statement);
249+
250+
assertThat(tokens.size(), is(4));
251+
assertAll(
252+
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
253+
() -> assertThat(tokens.get(0).getText(), is(functionName)),
254+
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
255+
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)),
256+
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
257+
);
258+
assertToken(functionName, DataPrepperExpressionLexer.Identifier);
259+
}
260+
261+
@ParameterizedTest
262+
@ValueSource(strings = {"integer", "boolean", "big_decimal", "long", "double", "string", "map", "array"})
263+
void testTypeOfExpressionTokenization(final String dataType) {
264+
final String statement = "/status typeof " + dataType;
265+
final List<? extends Token> tokens = getTokens(statement);
266+
267+
assertThat(tokens.size(), is(4));
268+
assertAll(
269+
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.JsonPointer)),
270+
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.TYPEOF)),
271+
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.DataTypes)),
272+
() -> assertThat(tokens.get(2).getText(), is(dataType)),
273+
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
274+
);
275+
}
276+
277+
@Test
278+
void testFunctionWithNoArgsTokenization() {
279+
final List<? extends Token> tokens = getTokens("functionWithoutArguments()");
280+
281+
assertThat(tokens.size(), is(4));
282+
assertAll(
283+
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
284+
() -> assertThat(tokens.get(0).getText(), is("functionWithoutArguments")),
285+
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
286+
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)),
287+
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
288+
);
289+
}
290+
291+
@Test
292+
void testFunctionWithArgsTokenization() {
293+
final List<? extends Token> tokens = getTokens("functionWithTwoArguments(/sourceIp,\"192.0.2.0/24\")");
294+
295+
assertThat(tokens.size(), is(7));
296+
assertAll(
297+
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
298+
() -> assertThat(tokens.get(0).getText(), is("functionWithTwoArguments")),
299+
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
300+
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.JsonPointer)),
301+
() -> assertThat(tokens.get(2).getText(), is("/sourceIp")),
302+
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.COMMA)),
303+
() -> assertThat(tokens.get(4).getType(), is(DataPrepperExpressionLexer.String)),
304+
() -> assertThat(tokens.get(4).getText(), is("\"192.0.2.0/24\"")),
305+
() -> assertThat(tokens.get(5).getType(), is(DataPrepperExpressionLexer.RPAREN)),
306+
() -> assertThat(tokens.get(6).getType(), is(DataPrepperExpressionLexer.EOF))
307+
);
308+
}
309+
310+
@Test
311+
void testTokenNull() {
312+
assertToken("null", DataPrepperExpressionLexer.Null);
313+
}
314+
315+
@Test
316+
void testTokenCOMMA() {
317+
assertToken(",", DataPrepperExpressionLexer.COMMA);
318+
}
319+
320+
@Test
321+
void testTokenPLUS() {
322+
assertToken("+", DataPrepperExpressionLexer.PLUS);
323+
}
324+
325+
@Test
326+
void testTokenMULTIPLY() {
327+
assertToken("*", DataPrepperExpressionLexer.MULTIPLY);
328+
}
329+
330+
@Test
331+
void testTokenMOD() {
332+
assertToken("%", DataPrepperExpressionLexer.MOD);
333+
}
334+
207335
@Test
208336
void testSpaceInsignificant() {
209337
final String statement = " ";

data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcher.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ private FunctionMatcher(final RuleClassOrderedList validRuleOrder) {
5151

5252
@Override
5353
protected boolean baseCase(final ParseTree item, final Description mismatchDescription) {
54-
// function is now a parser rule: FunctionName LPAREN functionArgs? RPAREN
55-
// Minimum 3 children: FunctionName, LPAREN, RPAREN
54+
// function is now a parser rule: Identifier LPAREN functionArgs? RPAREN
55+
// Minimum 3 children: Identifier, LPAREN, RPAREN
5656
final int childCount = item.getChildCount();
5757
if (childCount < 3) {
5858
mismatchDescription.appendText("\n\t\t expected " + item.getText() + " to have at least 3 child nodes, got " + childCount);

data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcherTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ void baseCase() {
2828
final DiagnosingMatcher<ParseTree> isFunctionUnaryTree = isFunctionUnaryTree();
2929
final ParseTree primary = mock(DataPrepperExpressionParser.PrimaryContext.class, "PrimaryContext");
3030
final ParseTree functionCtx = mock(DataPrepperExpressionParser.FunctionContext.class, "FunctionContext");
31-
final ParseTree functionName = mock(TerminalNode.class, "FunctionName");
31+
final ParseTree functionName = mock(TerminalNode.class, "Identifier");
3232
final ParseTree lparen = mock(TerminalNode.class, "LPAREN");
3333
final ParseTree rparen = mock(TerminalNode.class, "RPAREN");
3434

@@ -38,7 +38,7 @@ void baseCase() {
3838
doReturn(functionCtx)
3939
.when(primary)
4040
.getChild(eq(0));
41-
// function is now a parser rule: FunctionName LPAREN RPAREN (3 children, no args)
41+
// function is now a parser rule: Identifier LPAREN RPAREN (3 children, no args)
4242
doReturn(3)
4343
.when(functionCtx)
4444
.getChildCount();

0 commit comments

Comments
 (0)