Skip to content

Commit 6692596

Browse files
authored
Add substring expression functions (#6621)
The expression language has no way to extract a portion of a string by delimiter. Existing string processors mutate fields in-place but cannot produce a value for assignment via value_expression. Add four new expression functions: - substringAfter(s, d): text after the first occurrence of d - substringBefore(s, d): text before the first occurrence of d - substringAfterLast(s, d): text after the last occurrence of d - substringBeforeLast(s, d): text before the last occurrence of d Both arguments accept JSON Pointers or string literals. If the delimiter is not found, the original string is returned. If the source resolves to null, null is returned. Resolve #6612 Signed-off-by: Nikhil Bagmar <nikhilbagmar73@gmail.com>
1 parent bb61dbe commit 6692596

12 files changed

Lines changed: 840 additions & 4 deletions
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*/
9+
10+
package org.opensearch.dataprepper.expression;
11+
12+
import org.opensearch.dataprepper.model.event.Event;
13+
import org.opensearch.dataprepper.model.event.EventKey;
14+
15+
import java.util.List;
16+
import java.util.function.Function;
17+
18+
abstract class AbstractSubstringExpressionFunction implements ExpressionFunction {
19+
private static final int NUMBER_OF_ARGS = 2;
20+
21+
@Override
22+
public Object evaluate(final List<Object> args, final Event event, final Function<Object, Object> convertLiteralType) {
23+
if (args.size() != NUMBER_OF_ARGS) {
24+
throw new RuntimeException(getFunctionName() + "() takes exactly two arguments");
25+
}
26+
27+
final String[] strArgs = new String[NUMBER_OF_ARGS];
28+
for (int i = 0; i < NUMBER_OF_ARGS; i++) {
29+
final Object arg = args.get(i);
30+
if (arg instanceof EventKey) {
31+
final Object obj = event.get((EventKey) arg, Object.class);
32+
if (obj == null) {
33+
strArgs[i] = null;
34+
} else if (!(obj instanceof String)) {
35+
throw new RuntimeException(String.format("%s() takes only string type arguments. \"%s\" is not of type string", getFunctionName(), obj));
36+
} else {
37+
strArgs[i] = (String) obj;
38+
}
39+
} else if (arg instanceof String) {
40+
strArgs[i] = (String) arg;
41+
} else {
42+
throw new RuntimeException("Unexpected argument type: " + arg.getClass());
43+
}
44+
}
45+
46+
final String source = strArgs[0];
47+
final String delimiter = strArgs[1];
48+
49+
if (source == null) {
50+
return null;
51+
}
52+
if (delimiter == null || delimiter.isEmpty()) {
53+
return source;
54+
}
55+
return extractSubstring(source, delimiter);
56+
}
57+
58+
protected abstract String extractSubstring(final String source, final String delimiter);
59+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*/
9+
10+
package org.opensearch.dataprepper.expression;
11+
12+
import javax.inject.Named;
13+
14+
@Named
15+
public class SubstringAfterExpressionFunction extends AbstractSubstringExpressionFunction {
16+
static final String FUNCTION_NAME = "substringAfter";
17+
18+
@Override
19+
public String getFunctionName() {
20+
return FUNCTION_NAME;
21+
}
22+
23+
@Override
24+
protected String extractSubstring(final String source, final String delimiter) {
25+
final int index = source.indexOf(delimiter);
26+
if (index == -1) {
27+
return source;
28+
}
29+
return source.substring(index + delimiter.length());
30+
}
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*/
9+
10+
package org.opensearch.dataprepper.expression;
11+
12+
import javax.inject.Named;
13+
14+
@Named
15+
public class SubstringAfterLastExpressionFunction extends AbstractSubstringExpressionFunction {
16+
static final String FUNCTION_NAME = "substringAfterLast";
17+
18+
@Override
19+
public String getFunctionName() {
20+
return FUNCTION_NAME;
21+
}
22+
23+
@Override
24+
protected String extractSubstring(final String source, final String delimiter) {
25+
final int index = source.lastIndexOf(delimiter);
26+
if (index == -1) {
27+
return source;
28+
}
29+
return source.substring(index + delimiter.length());
30+
}
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*/
9+
10+
package org.opensearch.dataprepper.expression;
11+
12+
import javax.inject.Named;
13+
14+
@Named
15+
public class SubstringBeforeExpressionFunction extends AbstractSubstringExpressionFunction {
16+
static final String FUNCTION_NAME = "substringBefore";
17+
18+
@Override
19+
public String getFunctionName() {
20+
return FUNCTION_NAME;
21+
}
22+
23+
@Override
24+
protected String extractSubstring(final String source, final String delimiter) {
25+
final int index = source.indexOf(delimiter);
26+
if (index == -1) {
27+
return source;
28+
}
29+
return source.substring(0, index);
30+
}
31+
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*/
9+
10+
package org.opensearch.dataprepper.expression;
11+
12+
import javax.inject.Named;
13+
14+
@Named
15+
public class SubstringBeforeLastExpressionFunction extends AbstractSubstringExpressionFunction {
16+
static final String FUNCTION_NAME = "substringBeforeLast";
17+
18+
@Override
19+
public String getFunctionName() {
20+
return FUNCTION_NAME;
21+
}
22+
23+
@Override
24+
protected String extractSubstring(final String source, final String delimiter) {
25+
final int index = source.lastIndexOf(delimiter);
26+
if (index == -1) {
27+
return source;
28+
}
29+
return source.substring(0, index);
30+
}
31+
}

data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_ConditionalIT.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,12 @@ private static Stream<Arguments> validExpressionArguments() {
250250
arguments("startsWith(\""+ UUID.randomUUID() +strValue+ "\",/status)", event("{\"status\":\""+strValue+"\"}"), false),
251251
arguments("getEventType() == \"event\"", longEvent, true),
252252
arguments("getEventType() == \"LOG\"", longEvent, false),
253-
arguments("formatDateTime(/time, \"'year='yyyy'/month='MM'/day='dd\", \"UTC-8\") == \"year=2025/month=04/day=01\"", event("{\"time\": " + LocalDateTime.of(2025, 4, 1, 23, 59).toInstant(ZoneOffset.UTC).toEpochMilli() + "}"), true)
253+
arguments("formatDateTime(/time, \"'year='yyyy'/month='MM'/day='dd\", \"UTC-8\") == \"year=2025/month=04/day=01\"", event("{\"time\": " + LocalDateTime.of(2025, 4, 1, 23, 59).toInstant(ZoneOffset.UTC).toEpochMilli() + "}"), true),
254+
arguments("substringAfter(\"file.txt\", \".\") == \"txt\"", event("{}"), true),
255+
arguments("substringAfter(/path, \"/\") == \"app/src/main.py\"", event("{\"path\": \"/app/src/main.py\"}"), true),
256+
arguments("substringBefore(\"key=a=b\", \"=\") == \"key\"", event("{}"), true),
257+
arguments("substringAfterLast(\"/app/src/main.py\", \"/\") == \"main.py\"", event("{}"), true),
258+
arguments("substringBeforeLast(\"app.src.main\", \".\") == \"app.src\"", event("{}"), true)
254259
);
255260
}
256261

@@ -297,7 +302,11 @@ private static Stream<Arguments> invalidExpressionArguments() {
297302
arguments("contains(1234, /strField)", event("{\"intField\":1234,\"strField\":\"string\"}")),
298303
arguments("contains(/strField, 1234)", event("{\"intField\":1234,\"strField\":\"string\"}")),
299304
arguments("getMetadata(10)", tagEvent),
300-
arguments("cidrContains(/sourceIp,123)", event("{\"sourceIp\": \"192.0.2.3\"}"))
305+
arguments("cidrContains(/sourceIp,123)", event("{\"sourceIp\": \"192.0.2.3\"}")),
306+
arguments("substringAfter()", event("{}")),
307+
arguments("substringBefore()", event("{}")),
308+
arguments("substringAfterLast()", event("{}")),
309+
arguments("substringBeforeLast()", event("{}"))
301310
);
302311
}
303312

data-prepper-expression/src/test/java/org/opensearch/dataprepper/expression/GenericExpressionEvaluator_MultiTypeIT.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,19 @@ private static Stream<Arguments> validStringExpressionArguments() {
132132
Arguments.of("getMetadata(\"strAttr\")+\""+testString2+"\"+/key", testEvent, testString+testString2+"value", String.class),
133133
Arguments.of("join(/list)", testEvent, "string,1,true", String.class),
134134
Arguments.of("join(\"\\\\, \", /list)", testEvent, "string, 1, true", String.class),
135-
Arguments.of("join(\" \", /list)", testEvent, "string 1 true", String.class)
135+
Arguments.of("join(\" \", /list)", testEvent, "string 1 true", String.class),
136+
Arguments.of("substringAfter(\"hello-world\", \"-\")", event("{}"), "world", String.class),
137+
Arguments.of("substringAfter(/field, \"-\")", event("{\"field\": \"hello-world\"}"), "world", String.class),
138+
Arguments.of("substringAfter(\"no-match\", \"xyz\")", event("{}"), "no-match", String.class),
139+
Arguments.of("substringBefore(\"hello-world\", \"-\")", event("{}"), "hello", String.class),
140+
Arguments.of("substringBefore(/field, \"-\")", event("{\"field\": \"hello-world\"}"), "hello", String.class),
141+
Arguments.of("substringBefore(\"no-match\", \"xyz\")", event("{}"), "no-match", String.class),
142+
Arguments.of("substringAfterLast(\"/app/src/main.py\", \"/\")", event("{}"), "main.py", String.class),
143+
Arguments.of("substringAfterLast(/field, \"/\")", event("{\"field\": \"/app/src/main.py\"}"), "main.py", String.class),
144+
Arguments.of("substringAfterLast(\"no-match\", \"xyz\")", event("{}"), "no-match", String.class),
145+
Arguments.of("substringBeforeLast(\"/app/src/main.py\", \"/\")", event("{}"), "/app/src", String.class),
146+
Arguments.of("substringBeforeLast(/field, \"/\")", event("{\"field\": \"/app/src/main.py\"}"), "/app/src", String.class),
147+
Arguments.of("substringBeforeLast(\"no-match\", \"xyz\")", event("{}"), "no-match", String.class)
136148
);
137149
}
138150

@@ -155,7 +167,11 @@ private static Stream<Arguments> exceptionExpressionArguments() {
155167
Arguments.of("join(/list, \" \", \"third_arg\")", event("{\"list\":[\"string\", 1, true]}")),
156168
Arguments.of("join()", event("{\"list\":[\"string\", 1, true]}")),
157169
Arguments.of("contains()", event("{\"list\":[\"string\", 1, true]}")),
158-
Arguments.of("startsWith()", event("{\"list\":[\"string\", 1, true]}"))
170+
Arguments.of("startsWith()", event("{\"list\":[\"string\", 1, true]}")),
171+
Arguments.of("substringAfter()", event("{\"list\":[\"string\", 1, true]}")),
172+
Arguments.of("substringBefore()", event("{\"list\":[\"string\", 1, true]}")),
173+
Arguments.of("substringAfterLast()", event("{\"list\":[\"string\", 1, true]}")),
174+
Arguments.of("substringBeforeLast()", event("{\"list\":[\"string\", 1, true]}"))
159175
);
160176
}
161177

0 commit comments

Comments
 (0)