Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ jsonPointer
;

function
: FunctionName LPAREN functionArgs? RPAREN
: Identifier LPAREN functionArgs? RPAREN
;

functionArgs
Expand Down Expand Up @@ -275,10 +275,6 @@ String
| DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE StringCharacters? DOUBLEQUOTE DOUBLEQUOTE DOUBLEQUOTE
;

FunctionName
: JsonPointerCharacters
;

fragment
StringCharacters
: StringCharacter+
Expand Down Expand Up @@ -306,6 +302,14 @@ DataTypes
| STRING
;

// Identifier MUST be defined after DataTypes (and all other keyword-like
// lexer rules) because it matches [A-Za-z0-9_.@]+ which would shadow any
// keyword defined later. ANTLR resolves same-length lexer ambiguities by
// choosing the rule that appears first in the grammar.
Identifier
: JsonPointerCharacters
;

COMMA : ',';
EQUAL : '==';
NOT_EQUAL : '!=';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ private boolean isInsideFunction() {

@Override
public void enterFunction(DataPrepperExpressionParser.FunctionContext ctx) {
final String functionName = ctx.FunctionName().getText();
final String functionName = ctx.Identifier().getText();
functionContextStack.push(new FunctionEvalContext(functionName, operandStack.size()));
}

Expand Down Expand Up @@ -133,9 +133,9 @@ public void visitTerminal(TerminalNode node) {
return;
}

// Skip FunctionName, LPAREN, RPAREN, and COMMA tokens inside function rules
// Skip Identifier, LPAREN, RPAREN, and COMMA tokens inside function rules
// These are structural tokens handled by enter/exitFunction
if (nodeType == DataPrepperExpressionParser.FunctionName) {
if (nodeType == DataPrepperExpressionParser.Identifier) {
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,8 @@ private static Stream<Arguments> validExpressionArguments() {
arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-212\"}"), true),
arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-abc\"}"), false),
arguments("/name =~ \".*dataprepper-[0-9]+\"", event("{\"other\": \"dataprepper-abc\"}"), false),
arguments("/name !~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-abc\"}"), true),
arguments("/name !~ \".*dataprepper-[0-9]+\"", event("{\"name\": \"dataprepper-0\"}"), false),
arguments("startsWith(\""+strValue+ UUID.randomUUID() + "\",/status)", event("{\"status\":\""+strValue+"\"}"), true),
arguments("startsWith(\""+ UUID.randomUUID() +strValue+ "\",/status)", event("{\"status\":\""+strValue+"\"}"), false),
arguments("getEventType() == \"event\"", longEvent, true),
Expand All @@ -256,7 +258,17 @@ private static Stream<Arguments> validExpressionArguments() {
arguments("substringBefore(\"key=a=b\", \"=\") == \"key\"", event("{}"), true),
arguments("substringAfterLast(\"/app/src/main.py\", \"/\") == \"main.py\"", event("{}"), true),
arguments("substringBeforeLast(\"app.src.main\", \".\") == \"app.src\"", event("{}"), true),
arguments("/value == \"value-a\" and contains(/string, \"x/y/\")", event("{\"value\": \"value-a\", \"string\": \"prefix/x/y/postfix\"}"), true)
arguments("/value == \"value-a\" and contains(/string, \"x/y/\")", event("{\"value\": \"value-a\", \"string\": \"prefix/x/y/postfix\"}"), true),
arguments("/status_code typeof integer", event("{\"status_code\": 200}"), true),
arguments("/status_code typeof integer", event("{\"status_code\": \"200\"}"), false),
arguments("/name typeof string", event("{\"name\": \"test\"}"), true),
arguments("/flag typeof boolean", event("{\"flag\": true}"), true),
arguments("/value typeof long", event("{\"value\": 2147483648}"), true),
arguments("/items typeof array", event("{\"items\": [1, 2]}"), true),
arguments("/data typeof map", event("{\"data\": {\"k\": \"v\"}}"), true),
arguments("not (/status_code typeof integer)", event("{\"status_code\": \"200\"}"), true),
arguments("not (/status_code typeof integer)", event("{\"status_code\": 200}"), false),
arguments("not (/name typeof string)", event("{\"name\": 123}"), true)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.Token;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.opensearch.dataprepper.expression.antlr.DataPrepperExpressionLexer;

import java.util.List;

import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.junit.jupiter.api.Assertions.assertAll;
import static org.junit.jupiter.api.Assertions.assertFalse;

class GrammarLexerTest {

Expand All @@ -32,10 +35,11 @@ private List<? extends Token> getTokens(final String statement) {
return tokenStream.getTokens();
}

private void assertTokenFalse(final String statement, final int type) {
private void assertIsNotToken(final String statement, final int type) {
final List<? extends Token> tokens = getTokens(statement);

assertFalse(tokens.size() == 2 && tokens.get(0).getText() == statement);
assertThat(tokens.size(), is(greaterThanOrEqualTo(2)));
assertThat(tokens.get(0).getType(), is(not(type)));
}

private void assertToken(final String statement, final int type) {
Expand Down Expand Up @@ -73,27 +77,31 @@ void testTokenFloat() {
assertToken("12345678.0002e6", DataPrepperExpressionLexer.Float);
assertToken("12345678.000252E16", DataPrepperExpressionLexer.Float);
// only one zero before the decimal point
assertTokenFalse("0000.678e12", DataPrepperExpressionLexer.Float);
assertIsNotToken("0000.678e12", DataPrepperExpressionLexer.Float);
// Must have one digit before the decimal point
assertTokenFalse(".678e12", DataPrepperExpressionLexer.Float);
assertTokenFalse(".678e-12", DataPrepperExpressionLexer.Float);
assertTokenFalse(".6782", DataPrepperExpressionLexer.Float);
assertIsNotToken(".678e12", DataPrepperExpressionLexer.Float);
assertIsNotToken(".678e-12", DataPrepperExpressionLexer.Float);
assertIsNotToken(".6782", DataPrepperExpressionLexer.Float);
// Can't end with decimal point
assertTokenFalse("6782.", DataPrepperExpressionLexer.Float);
// only one zero after decimal point
assertTokenFalse("12345678.00", DataPrepperExpressionLexer.Float);
assertIsNotToken("6782.", DataPrepperExpressionLexer.Float);
}

@Test
void testTokenBoolean() {
assertToken("true", DataPrepperExpressionLexer.Boolean);
@ParameterizedTest
@ValueSource(strings = {"true", "false"})
void testTokenBoolean(final String booleanStatement) {
assertToken(booleanStatement, DataPrepperExpressionLexer.Boolean);
}

@Test
void testTokenJsonPointer() {
assertToken("/status_code", DataPrepperExpressionLexer.JsonPointer);
}

@Test
void testTokenEscapedJsonPointer() {
assertToken("\"/status_code\"", DataPrepperExpressionLexer.EscapedJsonPointer);
}

@Test
void testTokenString() {
assertToken("\"Hello World\"", DataPrepperExpressionLexer.String);
Expand Down Expand Up @@ -204,6 +212,126 @@ void testTokenSUBTRACT() {
assertToken("-", DataPrepperExpressionLexer.SUBTRACT);
}

@ParameterizedTest
@ValueSource(strings = {"integer", "boolean", "big_decimal", "long", "double", "string", "map", "array"})
void testTokenDataTypes(final String dataType) {
assertToken(dataType, DataPrepperExpressionLexer.DataTypes);
}

@ParameterizedTest
@ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"})
void testTokenIdentifier(final String functionName) {
assertToken(functionName, DataPrepperExpressionLexer.Identifier);
}

@ParameterizedTest
@ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"})
void testTokenFunction(final String functionName) {
final String statement = functionName + "()";
final List<? extends Token> tokens = getTokens(statement);

assertThat(tokens.size(), is(4));
assertAll(
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
() -> assertThat(tokens.get(0).getText(), is(functionName)),
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)),
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
);
assertToken(functionName, DataPrepperExpressionLexer.Identifier);
}

@ParameterizedTest
@ValueSource(strings = {"length", "contains", "cidrContains", "hasTags", "getMetadata", "getEventType"})
void testTokenFunctionWithSpace(final String functionName) {
final String statement = functionName + " ()";
final List<? extends Token> tokens = getTokens(statement);

assertThat(tokens.size(), is(4));
assertAll(
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
() -> assertThat(tokens.get(0).getText(), is(functionName)),
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)),
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
);
assertToken(functionName, DataPrepperExpressionLexer.Identifier);
}

@ParameterizedTest
@ValueSource(strings = {"integer", "boolean", "big_decimal", "long", "double", "string", "map", "array"})
void testTypeOfExpressionTokenization(final String dataType) {
final String statement = "/status typeof " + dataType;
final List<? extends Token> tokens = getTokens(statement);

assertThat(tokens.size(), is(4));
assertAll(
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.JsonPointer)),
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.TYPEOF)),
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.DataTypes)),
() -> assertThat(tokens.get(2).getText(), is(dataType)),
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
);
}

@Test
void testFunctionWithNoArgsTokenization() {
final List<? extends Token> tokens = getTokens("functionWithoutArguments()");

assertThat(tokens.size(), is(4));
assertAll(
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
() -> assertThat(tokens.get(0).getText(), is("functionWithoutArguments")),
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.RPAREN)),
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.EOF))
);
}

@Test
void testFunctionWithArgsTokenization() {
final List<? extends Token> tokens = getTokens("functionWithTwoArguments(/sourceIp,\"192.0.2.0/24\")");

assertThat(tokens.size(), is(7));
assertAll(
() -> assertThat(tokens.get(0).getType(), is(DataPrepperExpressionLexer.Identifier)),
() -> assertThat(tokens.get(0).getText(), is("functionWithTwoArguments")),
() -> assertThat(tokens.get(1).getType(), is(DataPrepperExpressionLexer.LPAREN)),
() -> assertThat(tokens.get(2).getType(), is(DataPrepperExpressionLexer.JsonPointer)),
() -> assertThat(tokens.get(2).getText(), is("/sourceIp")),
() -> assertThat(tokens.get(3).getType(), is(DataPrepperExpressionLexer.COMMA)),
() -> assertThat(tokens.get(4).getType(), is(DataPrepperExpressionLexer.String)),
() -> assertThat(tokens.get(4).getText(), is("\"192.0.2.0/24\"")),
() -> assertThat(tokens.get(5).getType(), is(DataPrepperExpressionLexer.RPAREN)),
() -> assertThat(tokens.get(6).getType(), is(DataPrepperExpressionLexer.EOF))
);
}

@Test
void testTokenNull() {
assertToken("null", DataPrepperExpressionLexer.Null);
}

@Test
void testTokenCOMMA() {
assertToken(",", DataPrepperExpressionLexer.COMMA);
}

@Test
void testTokenPLUS() {
assertToken("+", DataPrepperExpressionLexer.PLUS);
}

@Test
void testTokenMULTIPLY() {
assertToken("*", DataPrepperExpressionLexer.MULTIPLY);
}

@Test
void testTokenMOD() {
assertToken("%", DataPrepperExpressionLexer.MOD);
}

@Test
void testSpaceInsignificant() {
final String statement = " ";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ private FunctionMatcher(final RuleClassOrderedList validRuleOrder) {

@Override
protected boolean baseCase(final ParseTree item, final Description mismatchDescription) {
// function is now a parser rule: FunctionName LPAREN functionArgs? RPAREN
// Minimum 3 children: FunctionName, LPAREN, RPAREN
// function is now a parser rule: Identifier LPAREN functionArgs? RPAREN
// Minimum 3 children: Identifier, LPAREN, RPAREN
final int childCount = item.getChildCount();
if (childCount < 3) {
mismatchDescription.appendText("\n\t\t expected " + item.getText() + " to have at least 3 child nodes, got " + childCount);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void baseCase() {
final DiagnosingMatcher<ParseTree> isFunctionUnaryTree = isFunctionUnaryTree();
final ParseTree primary = mock(DataPrepperExpressionParser.PrimaryContext.class, "PrimaryContext");
final ParseTree functionCtx = mock(DataPrepperExpressionParser.FunctionContext.class, "FunctionContext");
final ParseTree functionName = mock(TerminalNode.class, "FunctionName");
final ParseTree functionName = mock(TerminalNode.class, "Identifier");
final ParseTree lparen = mock(TerminalNode.class, "LPAREN");
final ParseTree rparen = mock(TerminalNode.class, "RPAREN");

Expand All @@ -38,7 +38,7 @@ void baseCase() {
doReturn(functionCtx)
.when(primary)
.getChild(eq(0));
// function is now a parser rule: FunctionName LPAREN RPAREN (3 children, no args)
// function is now a parser rule: Identifier LPAREN RPAREN (3 children, no args)
doReturn(3)
.when(functionCtx)
.getChildCount();
Expand Down
Loading