diff --git a/data-prepper-expression/src/main/antlr/DataPrepperExpression.g4 b/data-prepper-expression/src/main/antlr/DataPrepperExpression.g4 index 49ad9102cd..02f7dee9bb 100644 --- a/data-prepper-expression/src/main/antlr/DataPrepperExpression.g4 +++ b/data-prepper-expression/src/main/antlr/DataPrepperExpression.g4 @@ -155,7 +155,7 @@ jsonPointer ; function - : FunctionName LPAREN functionArgs? RPAREN + : Identifier LPAREN functionArgs? RPAREN ; functionArgs @@ -275,10 +275,6 @@ String | DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE StringCharacters? DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE ; -FunctionName - : JsonPointerCharacters - ; - fragment StringCharacters : StringCharacter+ @@ -306,6 +302,14 @@ DataTypes | STRING ; +// Identifier MUST be defined after DataTypes (and all other keyword-like +// lexer rules) because it matches [A-Za-z0-9_.@]+ which would shadow any +// keyword defined later. ANTLR resolves same-length lexer ambiguities by +// choosing the rule that appears first in the grammar. +Identifier + : JsonPointerCharacters + ; + COMMA : ','; EQUAL : '=='; NOT_EQUAL : '!='; diff --git a/data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeEvaluatorListener.java b/data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeEvaluatorListener.java index 2fc08864a5..401d540212 100644 --- a/data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeEvaluatorListener.java +++ b/data-prepper-expression/src/main/java/org/opensearch/dataprepper/expression/ParseTreeEvaluatorListener.java @@ -98,7 +98,7 @@ private boolean isInsideFunction() { @Override public void enterFunction(DataPrepperExpressionParser.FunctionContext ctx) { - final String functionName = ctx.FunctionName().getText(); + final String functionName = ctx.Identifier().getText(); functionContextStack.push(new FunctionEvalContext(functionName, operandStack.size())); } @@ -133,9 +133,9 @@ public void visitTerminal(TerminalNode node) { return; } - // Skip FunctionName, LPAREN, RPAREN, and COMMA tokens inside function rules + // Skip Identifier, LPAREN, RPAREN, and COMMA tokens inside function rules // These are structural tokens handled by enter/exitFunction - if (nodeType == DataPrepperExpressionParser.FunctionName) { + if (nodeType == DataPrepperExpressionParser.Identifier) { return; } diff --git a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_ConditionalIT.java b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_ConditionalIT.java index 0ffa5dc0b9..4f52716cc0 100644 --- a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_ConditionalIT.java +++ b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_ConditionalIT.java @@ -246,6 +246,8 @@ private static Stream validExpressionArguments() { arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-212\"}"), true), arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-abc\"}"), false), arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"other\": \"dataprepper-abc\"}"), false), + arguments("/name !~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-abc\"}"), true), + arguments("/name !~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-0\"}"), false), arguments("startsWith(\""+strValue+ UUID.randomUUID() + "\",/status)", event("{\"status\":\""+strValue+"\"}"), true), arguments("startsWith(\""+ UUID.randomUUID() +strValue+ "\",/status)", event("{\"status\":\""+strValue+"\"}"), false), arguments("getEventType() == \"event\"", longEvent, true), @@ -256,7 +258,17 @@ private static Stream validExpressionArguments() { arguments("substringBefore(\"key=a=b\", \"=\") == \"key\"", event("{}"), true), arguments("substringAfterLast(\"/app/src/main.py\", \"/\") == \"main.py\"", event("{}"), true), arguments("substringBeforeLast(\"app.src.main\", \".\") == \"app.src\"", event("{}"), true), - arguments("/value == \"value-a\" and contains(/string, \"x/y/\")", event("{\"value\": \"value-a\", \"string\": \"prefix/x/y/postfix\"}"), true) + arguments("/value == \"value-a\" and contains(/string, \"x/y/\")", event("{\"value\": \"value-a\", \"string\": \"prefix/x/y/postfix\"}"), true), + arguments("/status_code typeof integer", event("{\"status_code\": 200}"), true), + arguments("/status_code typeof integer", event("{\"status_code\": \"200\"}"), false), + arguments("/name typeof string", event("{\"name\": \"test\"}"), true), + arguments("/flag typeof boolean", event("{\"flag\": true}"), true), + arguments("/value typeof long", event("{\"value\": 2147483648}"), true), + arguments("/items typeof array", event("{\"items\": [1, 2]}"), true), + arguments("/data typeof map", event("{\"data\": {\"k\": \"v\"}}"), true), + arguments("not (/status_code typeof integer)", event("{\"status_code\": \"200\"}"), true), + arguments("not (/status_code typeof integer)", event("{\"status_code\": 200}"), false), + arguments("not (/name typeof string)", event("{\"name\": 123}"), true) ); } diff --git a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GrammarLexerTest.java b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GrammarLexerTest.java index af61e9a9fb..e26a3f2f86 100644 --- a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GrammarLexerTest.java +++ b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GrammarLexerTest.java @@ -14,14 +14,17 @@ import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.Token; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import org.opensearch.dataprepper.expression.antlr.DataPrepperExpressionLexer; import java.util.List; import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.not; import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.junit.jupiter.api.Assertions.assertAll; -import static org.junit.jupiter.api.Assertions.assertFalse; class GrammarLexerTest { @@ -32,10 +35,11 @@ private List getTokens(final String statement) { return tokenStream.getTokens(); } - private void assertTokenFalse(final String statement, final int type) { + private void assertIsNotToken(final String statement, final int type) { final List tokens = getTokens(statement); - assertFalse(tokens.size() == 2 && tokens.get(0).getText() == statement); + assertThat(tokens.size(), is(greaterThanOrEqualTo(2))); + assertThat(tokens.get(0).getType(), is(not(type))); } private void assertToken(final String statement, final int type) { @@ -73,20 +77,19 @@ void testTokenFloat() { assertToken("12345678.0002e6", DataPrepperExpressionLexer.Float); assertToken("12345678.000252E16", DataPrepperExpressionLexer.Float); // only one zero before the decimal point - assertTokenFalse("0000.678e12", DataPrepperExpressionLexer.Float); + assertIsNotToken("0000.678e12", DataPrepperExpressionLexer.Float); // Must have one digit before the decimal point - assertTokenFalse(".678e12", DataPrepperExpressionLexer.Float); - assertTokenFalse(".678e-12", DataPrepperExpressionLexer.Float); - assertTokenFalse(".6782", DataPrepperExpressionLexer.Float); + assertIsNotToken(".678e12", DataPrepperExpressionLexer.Float); + assertIsNotToken(".678e-12", DataPrepperExpressionLexer.Float); + assertIsNotToken(".6782", DataPrepperExpressionLexer.Float); // Can't end with decimal point - assertTokenFalse("6782.", DataPrepperExpressionLexer.Float); - // only one zero after decimal point - assertTokenFalse("12345678.00", DataPrepperExpressionLexer.Float); + assertIsNotToken("6782.", DataPrepperExpressionLexer.Float); } - @Test - void testTokenBoolean() { - assertToken("true", DataPrepperExpressionLexer.Boolean); + @ParameterizedTest + @ValueSource(strings = {"true", "false"}) + void testTokenBoolean(final String booleanStatement) { + assertToken(booleanStatement, DataPrepperExpressionLexer.Boolean); } @Test @@ -94,6 +97,11 @@ void testTokenJsonPointer() { assertToken("/status_code", DataPrepperExpressionLexer.JsonPointer); } + @Test + void testTokenEscapedJsonPointer() { + assertToken("\"/status_code\"", DataPrepperExpressionLexer.EscapedJsonPointer); + } + @Test void testTokenString() { assertToken("\"Hello World\"", DataPrepperExpressionLexer.String); @@ -204,6 +212,126 @@ void testTokenSUBTRACT() { assertToken("-", DataPrepperExpressionLexer.SUBTRACT); } + @ParameterizedTest + @ValueSource(strings = {"integer", "boolean", "big_decimal", "long", "double", "string", "map", "array"}) + void testTokenDataTypes(final String dataType) { + assertToken(dataType, DataPrepperExpressionLexer.DataTypes); + } + + @ParameterizedTest + @ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"}) + void testTokenIdentifier(final String functionName) { + assertToken(functionName, DataPrepperExpressionLexer.Identifier); + } + + @ParameterizedTest + @ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"}) + void testTokenFunction(final String functionName) { + final String statement = functionName + "()"; + final List tokens = getTokens(statement); + + assertThat(tokens.size(), is(4)); + assertAll( + () -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)), + () -> assertThat(tokens.get(0).getText(), is(functionName)), + () -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)), + () -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)), + () -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF)) + ); + assertToken(functionName, DataPrepperExpressionLexer.Identifier); + } + + @ParameterizedTest + @ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"}) + void testTokenFunctionWithSpace(final String functionName) { + final String statement = functionName + " ()"; + final List tokens = getTokens(statement); + + assertThat(tokens.size(), is(4)); + assertAll( + () -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)), + () -> assertThat(tokens.get(0).getText(), is(functionName)), + () -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)), + () -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)), + () -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF)) + ); + assertToken(functionName, DataPrepperExpressionLexer.Identifier); + } + + @ParameterizedTest + @ValueSource(strings = {"integer", "boolean", "big_decimal", "long", "double", "string", "map", "array"}) + void testTypeOfExpressionTokenization(final String dataType) { + final String statement = "/status typeof " + dataType; + final List tokens = getTokens(statement); + + assertThat(tokens.size(), is(4)); + assertAll( + () -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.JsonPointer)), + () -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.TYPEOF)), + () -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.DataTypes)), + () -> assertThat(tokens.get(2).getText(), is(dataType)), + () -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF)) + ); + } + + @Test + void testFunctionWithNoArgsTokenization() { + final List tokens = getTokens("functionWithoutArguments()"); + + assertThat(tokens.size(), is(4)); + assertAll( + () -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)), + () -> assertThat(tokens.get(0).getText(), is("functionWithoutArguments")), + () -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)), + () -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)), + () -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF)) + ); + } + + @Test + void testFunctionWithArgsTokenization() { + final List tokens = getTokens("functionWithTwoArguments(/sourceIp,\"192.0.2.0/24\")"); + + assertThat(tokens.size(), is(7)); + assertAll( + () -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)), + () -> assertThat(tokens.get(0).getText(), is("functionWithTwoArguments")), + () -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)), + () -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.JsonPointer)), + () -> assertThat(tokens.get(2).getText(), is("/sourceIp")), + () -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.COMMA)), + () -> assertThat(tokens.get(4).getType(), is(DataPrepperExpressionLexer.String)), + () -> assertThat(tokens.get(4).getText(), is("\"192.0.2.0/24\"")), + () -> assertThat(tokens.get(5).getType(), is(DataPrepperExpressionLexer.RPAREN)), + () -> assertThat(tokens.get(6).getType(), is(DataPrepperExpressionLexer.EOF)) + ); + } + + @Test + void testTokenNull() { + assertToken("null", DataPrepperExpressionLexer.Null); + } + + @Test + void testTokenCOMMA() { + assertToken(",", DataPrepperExpressionLexer.COMMA); + } + + @Test + void testTokenPLUS() { + assertToken("+", DataPrepperExpressionLexer.PLUS); + } + + @Test + void testTokenMULTIPLY() { + assertToken("*", DataPrepperExpressionLexer.MULTIPLY); + } + + @Test + void testTokenMOD() { + assertToken("%", DataPrepperExpressionLexer.MOD); + } + @Test void testSpaceInsignificant() { final String statement = " "; diff --git a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcher.java b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcher.java index 5e2867f418..7bf10193e5 100644 --- a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcher.java +++ b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcher.java @@ -51,8 +51,8 @@ private FunctionMatcher(final RuleClassOrderedList validRuleOrder) { @Override protected boolean baseCase(final ParseTree item, final Description mismatchDescription) { - // function is now a parser rule: FunctionName LPAREN functionArgs? RPAREN - // Minimum 3 children: FunctionName, LPAREN, RPAREN + // function is now a parser rule: Identifier LPAREN functionArgs? RPAREN + // Minimum 3 children: Identifier, LPAREN, RPAREN final int childCount = item.getChildCount(); if (childCount < 3) { mismatchDescription.appendText("\n\t\t expected " + item.getText() + " to have at least 3 child nodes, got " + childCount); diff --git a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcherTest.java b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcherTest.java index 0c2ff2fcc4..699f888eaf 100644 --- a/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcherTest.java +++ b/data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/util/FunctionMatcherTest.java @@ -28,7 +28,7 @@ void baseCase() { final DiagnosingMatcher isFunctionUnaryTree = isFunctionUnaryTree(); final ParseTree primary = mock(DataPrepperExpressionParser.PrimaryContext.class, "PrimaryContext"); final ParseTree functionCtx = mock(DataPrepperExpressionParser.FunctionContext.class, "FunctionContext"); - final ParseTree functionName = mock(TerminalNode.class, "FunctionName"); + final ParseTree functionName = mock(TerminalNode.class, "Identifier"); final ParseTree lparen = mock(TerminalNode.class, "LPAREN"); final ParseTree rparen = mock(TerminalNode.class, "RPAREN"); @@ -38,7 +38,7 @@ void baseCase() { doReturn(functionCtx) .when(primary) .getChild(eq(0)); - // function is now a parser rule: FunctionName LPAREN RPAREN (3 children, no args) + // function is now a parser rule: Identifier LPAREN RPAREN (3 children, no args) doReturn(3) .when(functionCtx) .getChildCount();