From 38de323d1544ebb1c190958cee13e755be1d603b Mon Sep 17 00:00:00 2001
From: Manisha Yadav
Date: Mon, 8 Jun 2026 10:24:36 +0000
Subject: [PATCH 1/3] Add multiline input codec for grouping multi-line log
events
Signed-off-by: Manisha Yadav
---
.../multiline-codecs/README.md | 123 ++++
.../multiline-codecs/build.gradle | 24 +
.../codec/multiline/MultilineInputCodec.java | 227 +++++++
.../multiline/MultilineInputCodecConfig.java | 147 ++++
.../codec/multiline/MultilineWhat.java | 57 ++
.../codec/multiline/MultilineCodecsIT.java | 432 ++++++++++++
.../MultilineInputCodecConfigTest.java | 94 +++
.../multiline/MultilineInputCodecTest.java | 629 ++++++++++++++++++
.../codec/multiline/MultilineWhatTest.java | 50 ++
settings.gradle | 1 +
10 files changed, 1784 insertions(+)
create mode 100644 data-prepper-plugins/multiline-codecs/README.md
create mode 100644 data-prepper-plugins/multiline-codecs/build.gradle
create mode 100644 data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
create mode 100644 data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
create mode 100644 data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java
diff --git a/data-prepper-plugins/multiline-codecs/README.md b/data-prepper-plugins/multiline-codecs/README.md
new file mode 100644
index 0000000000..62b61e547e
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/README.md
@@ -0,0 +1,123 @@
+# Multiline Codecs
+
+This plugin provides a multiline input codec for Data Prepper that groups consecutive lines from an input stream into single events based on a configurable regex pattern.
+
+## Usages
+
+The multiline input codec can be configured with source plugins (e.g. S3 source, file source) in the pipeline file.
+
+### Use Cases
+
+- **Java/Kotlin stack traces**: Exception messages followed by `at ...` lines
+- **Python tracebacks**: `Traceback` blocks spanning multiple lines
+- **Timestamp-prefixed logs**: Logs where each entry starts with a timestamp and continuation lines don't
+- **Multi-line JSON/XML in logs**: Structured data embedded across multiple lines within log entries
+- **Custom log formats**: Any format where a recognizable pattern marks the start of a new event
+
+## Configuration Options
+
+| Option | Required | Type | Default | Description |
+|---|---|---|---|---|
+| `match` | Yes | String (regex) | - | A regular expression pattern used to identify line boundaries |
+| `negate` | No | Boolean | `false` | When `false`, lines matching the pattern are continuation lines. When `true`, lines NOT matching the pattern are continuation lines |
+| `what` | No | String | `previous` | Whether continuation lines belong to the `previous` or `next` event |
+| `max_lines` | No | Integer | `500` | Maximum number of lines that can be combined into a single event |
+| `max_length` | No | Integer | `10000` | Maximum character length of a combined multiline event |
+| `line_separator` | No | String | `\n` | Separator string used when joining lines into a single event message |
+
+## How It Works
+
+The codec reads lines from the input stream and uses the `match` regex to determine event boundaries:
+
+1. **`negate=true` + `what=previous`** (most common): A new event starts when a line matches the pattern. Lines that do NOT match are appended to the preceding event.
+
+2. **`negate=false` + `what=previous`**: Lines that match the pattern are appended to the preceding event.
+
+3. **`negate=true` + `what=next`**: Lines that do NOT match the pattern are prepended to the next matching line.
+
+4. **`negate=false` + `what=next`**: Lines that match the pattern are prepended to the next non-matching line.
+
+## Examples
+
+### Java Stack Traces (timestamp-based grouping)
+
+Each log entry starts with a timestamp. Lines without a timestamp are continuations of the previous entry.
+
+```yaml
+pipeline:
+ source:
+ s3:
+ codec:
+ multiline:
+ match: "^\\d{4}-\\d{2}-\\d{2}"
+ negate: true
+ what: previous
+```
+
+Input:
+```
+2024-01-01 12:00:00 ERROR NullPointerException
+ at com.example.Service.method(Service.java:42)
+ at com.example.Main.run(Main.java:10)
+2024-01-01 12:00:01 INFO Application recovered
+```
+
+Result: 2 events
+- Event 1: The ERROR line with its full stack trace grouped together
+- Event 2: The INFO line as a single event
+
+### Java Stack Traces (pattern-based grouping)
+
+Lines starting with whitespace followed by `at `, `...`, or `Caused by:` are continuations.
+
+```yaml
+pipeline:
+ source:
+ s3:
+ codec:
+ multiline:
+ match: "^\\s+(at |\\.\\.\\.|Caused by:)"
+ negate: false
+ what: previous
+```
+
+### Python Tracebacks
+
+```yaml
+pipeline:
+ source:
+ s3:
+ codec:
+ multiline:
+ match: "^Traceback|^\\s|^\\w+Error"
+ negate: false
+ what: previous
+```
+
+### Log Entries with Preamble (next mode)
+
+Lines starting with whitespace are prepended to the next non-indented line.
+
+```yaml
+pipeline:
+ source:
+ s3:
+ codec:
+ multiline:
+ match: "^\\s"
+ negate: false
+ what: next
+```
+
+## Developer Guide
+
+This plugin is compatible with Java 11. See below:
+
+- [CONTRIBUTING](https://github.com/opensearch-project/data-prepper/blob/main/CONTRIBUTING.md)
+- [monitoring](https://github.com/opensearch-project/data-prepper/blob/main/docs/monitoring.md)
+
+The following command runs the unit and integration tests:
+
+```
+./gradlew :data-prepper-plugins:multiline-codecs:test
+```
diff --git a/data-prepper-plugins/multiline-codecs/build.gradle b/data-prepper-plugins/multiline-codecs/build.gradle
new file mode 100644
index 0000000000..cf619d2062
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/build.gradle
@@ -0,0 +1,24 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+plugins {
+ id 'java'
+}
+
+dependencies {
+ implementation project(':data-prepper-api')
+ implementation 'com.fasterxml.jackson.core:jackson-annotations'
+ implementation libs.parquet.common
+ testImplementation project(':data-prepper-plugins:common')
+ testImplementation project(':data-prepper-test:test-event')
+}
+
+test {
+ useJUnitPlatform()
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
new file mode 100644
index 0000000000..07eba72e35
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import org.opensearch.dataprepper.model.annotations.DataPrepperPlugin;
+import org.opensearch.dataprepper.model.annotations.DataPrepperPluginConstructor;
+import org.opensearch.dataprepper.model.codec.InputCodec;
+import org.opensearch.dataprepper.model.event.Event;
+import org.opensearch.dataprepper.model.event.EventFactory;
+import org.opensearch.dataprepper.model.event.LogEventBuilder;
+import org.opensearch.dataprepper.model.log.Log;
+import org.opensearch.dataprepper.model.record.Record;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Collections;
+import java.util.Objects;
+import java.util.function.Consumer;
+import java.util.regex.Pattern;
+
+/**
+ * An implementation of {@link InputCodec} which groups multiple lines from an input stream
+ * into single events based on a configurable regex pattern.
+ *
+ * This is useful for ingesting logs where a single logical event spans multiple lines,
+ * such as Java stack traces, Python tracebacks, or any log format where entries begin with
+ * a recognizable pattern (e.g., a timestamp).
+ *
+ * The codec supports two grouping modes via the {@code what} configuration:
+ *
+ * - {@code previous}: Continuation lines are appended to the preceding event.
+ * - {@code next}: Continuation lines are prepended to the following event.
+ *
+ *
+ * The {@code negate} option controls which lines are considered continuation lines:
+ *
+ * - {@code negate=false}: Lines matching the pattern are continuation lines.
+ * - {@code negate=true}: Lines NOT matching the pattern are continuation lines.
+ *
+ */
+@DataPrepperPlugin(name = "multiline", pluginType = InputCodec.class, pluginConfigurationType = MultilineInputCodecConfig.class)
+public class MultilineInputCodec implements InputCodec {
+
+ private static final Logger LOG = LoggerFactory.getLogger(MultilineInputCodec.class);
+ static final String MESSAGE_FIELD_NAME = "message";
+
+ private final Pattern pattern;
+ private final boolean negate;
+ private final MultilineWhat what;
+ private final int maxLines;
+ private final int maxLength;
+ private final String lineSeparator;
+ private final EventFactory eventFactory;
+
+ @DataPrepperPluginConstructor
+ public MultilineInputCodec(final MultilineInputCodecConfig config, final EventFactory eventFactory) {
+ Objects.requireNonNull(config, "config must not be null");
+ this.eventFactory = Objects.requireNonNull(eventFactory, "eventFactory must not be null");
+ try {
+ this.pattern = Pattern.compile(config.getMatch());
+ } catch (final Exception e) {
+ throw new IllegalArgumentException("Invalid regex pattern for 'match': " + config.getMatch(), e);
+ }
+ this.negate = config.getNegate();
+ this.what = config.getWhat();
+ this.maxLines = config.getMaxLines();
+ this.maxLength = config.getMaxLength();
+ this.lineSeparator = config.getLineSeparator();
+ }
+
+ @Override
+ public void parse(final InputStream inputStream, final Consumer> eventConsumer) throws IOException {
+ Objects.requireNonNull(inputStream, "inputStream must not be null");
+ Objects.requireNonNull(eventConsumer, "eventConsumer must not be null");
+
+ try (final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
+ if (what == MultilineWhat.PREVIOUS) {
+ parsePreviousMode(reader, eventConsumer);
+ } else {
+ parseNextMode(reader, eventConsumer);
+ }
+ }
+ }
+
+ /**
+ * In PREVIOUS mode, continuation lines are appended to the preceding event.
+ * A new event boundary is detected when a line is NOT a continuation line
+ * (i.e., it's a "start" line).
+ */
+ private void parsePreviousMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ final StringBuilder buffer = new StringBuilder();
+ int lineCount = 0;
+ String line;
+
+ while ((line = reader.readLine()) != null) {
+ final boolean isContinuation = isContinuationLine(line);
+
+ if (!isContinuation && buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ }
+
+ if (shouldFlush(buffer, lineCount, line)) {
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ }
+ }
+
+ if (buffer.length() > 0) {
+ buffer.append(lineSeparator);
+ }
+ buffer.append(line);
+ lineCount++;
+ }
+
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ }
+ }
+
+ /**
+ * In NEXT mode, continuation lines are prepended to the following event.
+ * A new event boundary is detected when a line is NOT a continuation line,
+ * and the buffer (containing prior continuation lines) is combined with this line.
+ */
+ private void parseNextMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ final StringBuilder buffer = new StringBuilder();
+ int lineCount = 0;
+ boolean bufferHasNonContinuation = false;
+ String line;
+
+ while ((line = reader.readLine()) != null) {
+ final boolean isContinuation = isContinuationLine(line);
+
+ if (!isContinuation) {
+ if (bufferHasNonContinuation) {
+ // The buffer already has a complete event (non-continuation at end).
+ // Emit it and start fresh.
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ bufferHasNonContinuation = false;
+ }
+ // Append this non-continuation line to the buffer (with any preceding continuations).
+ if (buffer.length() > 0) {
+ buffer.append(lineSeparator);
+ }
+ buffer.append(line);
+ lineCount++;
+ bufferHasNonContinuation = true;
+ continue;
+ }
+
+ // This is a continuation line.
+ if (bufferHasNonContinuation) {
+ // Buffer has a complete event ending with non-continuation.
+ // Emit it, then start collecting continuations for the next event.
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ bufferHasNonContinuation = false;
+ }
+
+ if (shouldFlush(buffer, lineCount, line)) {
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ }
+ }
+
+ if (buffer.length() > 0) {
+ buffer.append(lineSeparator);
+ }
+ buffer.append(line);
+ lineCount++;
+ }
+
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ }
+ }
+
+ /**
+ * Determines if a line is a continuation line based on the pattern and negate settings.
+ *
+ * When {@code negate=false}: a line matching the pattern IS a continuation line.
+ * When {@code negate=true}: a line NOT matching the pattern IS a continuation line.
+ */
+ boolean isContinuationLine(final String line) {
+ final boolean matches = pattern.matcher(line).find();
+ return negate != matches;
+ }
+
+ private boolean shouldFlush(final StringBuilder buffer, final int lineCount, final String nextLine) {
+ if (lineCount >= maxLines) {
+ LOG.debug("Flushing multiline event due to max_lines limit of {}", maxLines);
+ return true;
+ }
+ if (buffer.length() + lineSeparator.length() + nextLine.length() > maxLength) {
+ LOG.debug("Flushing multiline event due to max_length limit of {}", maxLength);
+ return true;
+ }
+ return false;
+ }
+
+ private void emitEvent(final String message, final Consumer> eventConsumer) {
+ final Log event = eventFactory.eventBuilder(LogEventBuilder.class)
+ .withData(Collections.singletonMap(MESSAGE_FIELD_NAME, message))
+ .build();
+ eventConsumer.accept(new Record<>(event));
+ }
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
new file mode 100644
index 0000000000..dc26290f3a
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import jakarta.validation.constraints.AssertTrue;
+import jakarta.validation.constraints.Min;
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.NotNull;
+
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+/**
+ * Configuration class for the multiline input codec.
+ *
+ * The multiline codec groups consecutive lines from an input stream into a single event
+ * based on a regex pattern. This is useful for log formats where a single logical event
+ * spans multiple lines (e.g., Java stack traces, multi-line application logs).
+ *
+ * Example configuration for Java stack traces:
+ *
+ * codec:
+ * multiline:
+ * match: "^\\s+(at |\\.\\.\\.|Caused by:)"
+ * negate: false
+ * what: previous
+ *
+ *
+ * Example configuration for timestamp-prefixed logs:
+ *
+ * codec:
+ * multiline:
+ * match: "^\\d{4}-\\d{2}-\\d{2}"
+ * negate: true
+ * what: previous
+ *
+ */
+public class MultilineInputCodecConfig {
+
+ static final int DEFAULT_MAX_LINES = 500;
+ static final int DEFAULT_MAX_LENGTH = 10000;
+ static final String DEFAULT_LINE_SEPARATOR = "\n";
+
+ @NotEmpty(message = "match must not be empty")
+ @JsonProperty("match")
+ private String match;
+
+ @NotNull(message = "negate must not be null")
+ @JsonProperty("negate")
+ private Boolean negate = false;
+
+ @NotNull(message = "what must not be null")
+ @JsonProperty("what")
+ private MultilineWhat what = MultilineWhat.PREVIOUS;
+
+ @Min(value = 1, message = "max_lines must be at least 1")
+ @JsonProperty("max_lines")
+ private int maxLines = DEFAULT_MAX_LINES;
+
+ @Min(value = 1, message = "max_length must be at least 1")
+ @JsonProperty("max_length")
+ private int maxLength = DEFAULT_MAX_LENGTH;
+
+ @NotNull(message = "line_separator must not be null")
+ @JsonProperty("line_separator")
+ private String lineSeparator = DEFAULT_LINE_SEPARATOR;
+
+ /**
+ * The regex pattern used to identify line boundaries.
+ *
+ * @return The regex pattern string.
+ */
+ public String getMatch() {
+ return match;
+ }
+
+ /**
+ * Whether to negate the pattern match.
+ * When false: lines matching the pattern are considered continuation lines.
+ * When true: lines NOT matching the pattern are considered continuation lines.
+ *
+ * @return true if the pattern should be negated.
+ */
+ public Boolean getNegate() {
+ return negate;
+ }
+
+ /**
+ * Defines whether unmatched (continuation) lines belong to the previous or next event.
+ *
+ * @return The multiline grouping direction.
+ */
+ public MultilineWhat getWhat() {
+ return what;
+ }
+
+ /**
+ * The maximum number of lines that can be combined into a single event.
+ * When this limit is reached, the accumulated lines are flushed as an event
+ * and a new accumulation begins.
+ *
+ * @return The maximum number of lines per event.
+ */
+ public int getMaxLines() {
+ return maxLines;
+ }
+
+ /**
+ * The maximum character length of a combined multiline event.
+ * When this limit is reached, the accumulated lines are flushed as an event.
+ *
+ * @return The maximum character length per event.
+ */
+ public int getMaxLength() {
+ return maxLength;
+ }
+
+ /**
+ * The separator string to use when joining multiple lines into a single event message.
+ *
+ * @return The line separator string.
+ */
+ public String getLineSeparator() {
+ return lineSeparator;
+ }
+
+ @AssertTrue(message = "match must be a valid regular expression")
+ boolean isValidPattern() {
+ if (match == null || match.isEmpty()) {
+ return false;
+ }
+ try {
+ Pattern.compile(match);
+ return true;
+ } catch (final PatternSyntaxException e) {
+ return false;
+ }
+ }
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java
new file mode 100644
index 0000000000..ab21b16ac5
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonValue;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Defines whether unmatched lines should be grouped with the previous or next matching line.
+ */
+public enum MultilineWhat {
+
+ /**
+ * Unmatched lines are appended to the previous matching line's event.
+ */
+ PREVIOUS("previous"),
+
+ /**
+ * Unmatched lines are prepended to the next matching line's event.
+ */
+ NEXT("next");
+
+ private static final Map OPTIONS_MAP = Arrays.stream(MultilineWhat.values())
+ .collect(Collectors.toMap(MultilineWhat::toString, value -> value));
+
+ private final String name;
+
+ MultilineWhat(final String name) {
+ this.name = name;
+ }
+
+ @JsonCreator
+ public static MultilineWhat fromString(final String value) {
+ final MultilineWhat result = OPTIONS_MAP.get(value.toLowerCase());
+ if (result == null) {
+ throw new IllegalArgumentException("Invalid value for 'what': " + value + ". Valid values are: " + OPTIONS_MAP.keySet());
+ }
+ return result;
+ }
+
+ @JsonValue
+ @Override
+ public String toString() {
+ return name;
+ }
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
new file mode 100644
index 0000000000..6d2b970db5
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
@@ -0,0 +1,432 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.opensearch.dataprepper.event.TestEventFactory;
+import org.opensearch.dataprepper.model.event.Event;
+import org.opensearch.dataprepper.model.event.EventFactory;
+import org.opensearch.dataprepper.model.record.Record;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.notNullValue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.containsString;
+import static org.mockito.Mockito.lenient;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+@ExtendWith(MockitoExtension.class)
+public class MultilineCodecsIT {
+
+ @Mock
+ private MultilineInputCodecConfig config;
+
+ @Mock
+ private Consumer> eventConsumer;
+
+ private final EventFactory eventFactory = TestEventFactory.getTestEventFactory();
+
+ @BeforeEach
+ void setUp() {
+ lenient().when(config.getMaxLines()).thenReturn(500);
+ lenient().when(config.getMaxLength()).thenReturn(50000);
+ lenient().when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ private MultilineInputCodec createObjectUnderTest() {
+ return new MultilineInputCodec(config, eventFactory);
+ }
+
+ private InputStream toInputStream(final String content) {
+ return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
+ }
+
+ @Test
+ void parse_java_stack_trace_groups_exception_with_stack_frames() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "2024-01-15 10:23:45.123 ERROR [main] com.example.UserService - Request failed\n" +
+ "java.lang.NullPointerException: null\n" +
+ "\tat com.example.UserService.getUser(UserService.java:42)\n" +
+ "\tat com.example.Controller.handle(Controller.java:28)\n" +
+ "Caused by: java.sql.SQLException: Connection refused\n" +
+ "\tat com.mysql.jdbc.Connection.connect(Connection.java:456)\n" +
+ "\t... 12 more\n" +
+ "2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying\n" +
+ "2024-01-15 10:23:46.789 WARN [worker-1] com.example.Cache - Cache miss\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(3)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: ERROR log + stack trace (7 lines grouped)
+ final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(event1, notNullValue());
+ assertThat(event1, containsString("NullPointerException"));
+ assertThat(event1, containsString("at com.example.UserService.getUser"));
+ assertThat(event1, containsString("Caused by: java.sql.SQLException"));
+ assertThat(event1, containsString("... 12 more"));
+
+ // Event 2: INFO single line
+ final String event2 = records.get(1).getData().get("message", String.class);
+ assertThat(event2, equalTo("2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying"));
+
+ // Event 3: WARN single line
+ final String event3 = records.get(2).getData().get("message", String.class);
+ assertThat(event3, equalTo("2024-01-15 10:23:46.789 WARN [worker-1] com.example.Cache - Cache miss"));
+ }
+
+ @Test
+ void parse_python_traceback_groups_traceback_with_error_line() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "2024-03-20 08:15:00,123 INFO Starting application\n" +
+ "2024-03-20 08:15:02,789 ERROR Unhandled exception\n" +
+ "Traceback (most recent call last):\n" +
+ " File \"/app/worker.py\", line 45, in process\n" +
+ " result = transform(record)\n" +
+ "ValueError: invalid literal for int()\n" +
+ "2024-03-20 08:15:03,456 INFO Recovered\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(3)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: single INFO line
+ assertThat(records.get(0).getData().get("message", String.class),
+ equalTo("2024-03-20 08:15:00,123 INFO Starting application"));
+
+ // Event 2: ERROR + traceback (5 lines grouped)
+ final String event2 = records.get(1).getData().get("message", String.class);
+ assertThat(event2, containsString("ERROR Unhandled exception"));
+ assertThat(event2, containsString("Traceback (most recent call last):"));
+ assertThat(event2, containsString("File \"/app/worker.py\""));
+ assertThat(event2, containsString("ValueError: invalid literal"));
+
+ // Event 3: single INFO line
+ assertThat(records.get(2).getData().get("message", String.class),
+ equalTo("2024-03-20 08:15:03,456 INFO Recovered"));
+ }
+
+ @Test
+ void parse_xml_multiline_logs_groups_xml_body_with_header() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\[\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "[2024-05-10 14:30:00.001] [INFO] Incoming request:\n" +
+ "\n" +
+ " value\n" +
+ "\n" +
+ "[2024-05-10 14:30:00.045] [INFO] Request processed\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(2)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: log line + XML body (4 lines grouped)
+ final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(event1, containsString("[INFO] Incoming request:"));
+ assertThat(event1, containsString(""));
+ assertThat(event1, containsString("value"));
+ assertThat(event1, containsString(""));
+
+ // Event 2: single line
+ assertThat(records.get(1).getData().get("message", String.class),
+ equalTo("[2024-05-10 14:30:00.045] [INFO] Request processed"));
+ }
+
+ @Test
+ void parse_sql_multiline_logs_groups_query_with_header() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "2024-07-01 09:00:01 [Query] thread_id=145 exec_time=0.003s\n" +
+ "SELECT u.id, u.name\n" +
+ "FROM users u\n" +
+ "WHERE u.active = 1\n" +
+ "ORDER BY u.name;\n" +
+ "2024-07-01 09:00:02 [Query] thread_id=146 exec_time=0.001s\n" +
+ "SELECT COUNT(*) FROM sessions;\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(2)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: query header + multi-line SQL (5 lines grouped)
+ final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(event1, containsString("[Query] thread_id=145"));
+ assertThat(event1, containsString("SELECT u.id, u.name"));
+ assertThat(event1, containsString("FROM users u"));
+ assertThat(event1, containsString("WHERE u.active = 1"));
+ assertThat(event1, containsString("ORDER BY u.name;"));
+
+ // Event 2: query header + single-line SQL (2 lines grouped)
+ final String event2 = records.get(1).getData().get("message", String.class);
+ assertThat(event2, containsString("[Query] thread_id=146"));
+ assertThat(event2, containsString("SELECT COUNT(*) FROM sessions;"));
+ }
+
+ @Test
+ void parse_syslog_ise_multiline_groups_continuation_lines() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^<\\d+>[A-Z][a-z]{2}\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "<181>Jun 1 12:39:49 Infra-ISE CISE_Audit 0000000176 NOTICE Admin-Login: success\n" +
+ "<181>Jun 1 12:39:49 Infra-ISE CISE_Audit 0000000177 NOTICE OpenAPI: Response={\\\n" +
+ " \"version\" : \"1.0.0\",\\\n" +
+ " \"status\" : \"ok\"\\\n" +
+ "}, HttpCode=200\n" +
+ "<181>Jun 1 12:40:15 Infra-ISE CISE_Audit 0000000178 NOTICE Config-Change: added\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(3)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: single-line syslog
+ assertThat(records.get(0).getData().get("message", String.class),
+ containsString("Admin-Login: success"));
+
+ // Event 2: multiline syslog with JSON continuation (4 lines grouped)
+ final String event2 = records.get(1).getData().get("message", String.class);
+ assertThat(event2, containsString("OpenAPI: Response="));
+ assertThat(event2, containsString("\"version\" : \"1.0.0\""));
+ assertThat(event2, containsString("HttpCode=200"));
+
+ // Event 3: single-line syslog
+ assertThat(records.get(2).getData().get("message", String.class),
+ containsString("Config-Change: added"));
+ }
+
+ @Test
+ void parse_with_negate_false_groups_matching_lines_with_previous() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
+ lenient().when(config.getNegate()).thenReturn(false);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "java.lang.RuntimeException: error\n" +
+ " at com.example.A.method(A.java:1)\n" +
+ " at com.example.B.method(B.java:2)\n" +
+ " Caused by: java.io.IOException\n" +
+ " at com.example.C.read(C.java:3)\n" +
+ " ... 5 more\n" +
+ "Application recovered\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(2)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: exception + all matching stack frames (6 lines grouped)
+ final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(event1, containsString("RuntimeException: error"));
+ assertThat(event1, containsString("at com.example.A.method"));
+ assertThat(event1, containsString("Caused by: java.io.IOException"));
+ assertThat(event1, containsString("... 5 more"));
+
+ // Event 2: non-matching line on its own
+ assertThat(records.get(1).getData().get("message", String.class),
+ equalTo("Application recovered"));
+ }
+
+ @Test
+ void parse_with_next_mode_prepends_continuation_to_following_event() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\s");
+ lenient().when(config.getNegate()).thenReturn(false);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
+
+ final String input =
+ " context-line-1\n" +
+ " context-line-2\n" +
+ "MAIN EVENT A\n" +
+ " context-line-3\n" +
+ "MAIN EVENT B\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(2)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: continuation lines + first non-continuation
+ final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(event1, containsString("context-line-1"));
+ assertThat(event1, containsString("context-line-2"));
+ assertThat(event1, containsString("MAIN EVENT A"));
+
+ // Event 2: continuation line + second non-continuation
+ final String event2 = records.get(1).getData().get("message", String.class);
+ assertThat(event2, containsString("context-line-3"));
+ assertThat(event2, containsString("MAIN EVENT B"));
+ }
+
+ @Test
+ void parse_respects_max_lines_limit() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ lenient().when(config.getMaxLines()).thenReturn(3);
+
+ final String input =
+ "2024 start\n" +
+ " line 2\n" +
+ " line 3\n" +
+ " line 4\n" +
+ " line 5\n" +
+ "2024 next event\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(3)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: first 3 lines (hit max_lines)
+ final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(event1, equalTo("2024 start\n line 2\n line 3"));
+
+ // Event 2: overflow lines
+ final String event2 = records.get(1).getData().get("message", String.class);
+ assertThat(event2, equalTo(" line 4\n line 5"));
+
+ // Event 3: next event
+ assertThat(records.get(2).getData().get("message", String.class),
+ equalTo("2024 next event"));
+ }
+
+ @Test
+ void parse_respects_max_length_limit() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ lenient().when(config.getMaxLength()).thenReturn(25);
+
+ final String input =
+ "2024 start here\n" +
+ " long continuation line\n" +
+ "2024 next\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(3)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+
+ // Event 1: flushed due to max_length before adding continuation
+ assertThat(records.get(0).getData().get("message", String.class),
+ equalTo("2024 start here"));
+
+ // Event 2: continuation line on its own
+ assertThat(records.get(1).getData().get("message", String.class),
+ equalTo(" long continuation line"));
+
+ // Event 3: next event
+ assertThat(records.get(2).getData().get("message", String.class),
+ equalTo("2024 next"));
+ }
+
+ @Test
+ void parse_empty_input_produces_no_events() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ createObjectUnderTest().parse(toInputStream(""), eventConsumer);
+
+ verify(eventConsumer, times(0)).accept(ArgumentCaptor.forClass(Record.class).capture());
+ }
+
+ @Test
+ void parse_all_lines_are_single_events_when_all_match_pattern() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^\\d{4}");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "2024 event one\n" +
+ "2024 event two\n" +
+ "2024 event three\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(3)).accept(captor.capture());
+
+ final List> records = captor.getAllValues();
+ assertThat(records.get(0).getData().get("message", String.class), equalTo("2024 event one"));
+ assertThat(records.get(1).getData().get("message", String.class), equalTo("2024 event two"));
+ assertThat(records.get(2).getData().get("message", String.class), equalTo("2024 event three"));
+ }
+
+ @Test
+ void parse_all_lines_form_single_event_when_none_match_pattern() throws IOException {
+ lenient().when(config.getMatch()).thenReturn("^NEVER_MATCHES");
+ lenient().when(config.getNegate()).thenReturn(true);
+ lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+
+ final String input =
+ "line one\n" +
+ "line two\n" +
+ "line three\n";
+
+ createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+
+ final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
+ verify(eventConsumer, times(1)).accept(captor.capture());
+
+ assertThat(captor.getValue().getData().get("message", String.class),
+ equalTo("line one\nline two\nline three"));
+ }
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java
new file mode 100644
index 0000000000..6d74973abf
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.lang.reflect.Field;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+class MultilineInputCodecConfigTest {
+
+ @Test
+ void defaults_are_correct() {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+
+ assertThat(config.getNegate(), equalTo(false));
+ assertThat(config.getWhat(), equalTo(MultilineWhat.PREVIOUS));
+ assertThat(config.getMaxLines(), equalTo(MultilineInputCodecConfig.DEFAULT_MAX_LINES));
+ assertThat(config.getMaxLength(), equalTo(MultilineInputCodecConfig.DEFAULT_MAX_LENGTH));
+ assertThat(config.getLineSeparator(), equalTo(MultilineInputCodecConfig.DEFAULT_LINE_SEPARATOR));
+ assertThat(config.getMatch(), equalTo(null));
+ }
+
+ @Test
+ void getMatch_returns_configured_value() throws Exception {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ setField(config, "match", "^\\d{4}");
+ assertThat(config.getMatch(), equalTo("^\\d{4}"));
+ }
+
+ @Test
+ void isValidPattern_returns_true_for_valid_regex() throws Exception {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ setField(config, "match", "^\\d{4}-\\d{2}-\\d{2}");
+ assertThat(config.isValidPattern(), equalTo(true));
+ }
+
+ @Test
+ void isValidPattern_returns_false_for_invalid_regex() throws Exception {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ setField(config, "match", "[invalid(");
+ assertThat(config.isValidPattern(), equalTo(false));
+ }
+
+ @Test
+ void isValidPattern_returns_false_for_null_match() {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ assertThat(config.isValidPattern(), equalTo(false));
+ }
+
+ @Test
+ void isValidPattern_returns_false_for_empty_match() throws Exception {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ setField(config, "match", "");
+ assertThat(config.isValidPattern(), equalTo(false));
+ }
+
+ @ParameterizedTest
+ @ValueSource(ints = {1, 100, 1000})
+ void getMaxLines_returns_configured_value(final int maxLines) throws Exception {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ setField(config, "maxLines", maxLines);
+ assertThat(config.getMaxLines(), equalTo(maxLines));
+ }
+
+ @ParameterizedTest
+ @ValueSource(ints = {1, 5000, 50000})
+ void getMaxLength_returns_configured_value(final int maxLength) throws Exception {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ setField(config, "maxLength", maxLength);
+ assertThat(config.getMaxLength(), equalTo(maxLength));
+ }
+
+ private void setField(final Object object, final String fieldName, final Object value) throws Exception {
+ final Field field = object.getClass().getDeclaredField(fieldName);
+ try {
+ field.setAccessible(true);
+ field.set(object, value);
+ } finally {
+ field.setAccessible(false);
+ }
+ }
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
new file mode 100644
index 0000000000..5738a82da7
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
@@ -0,0 +1,629 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.opensearch.dataprepper.event.TestEventFactory;
+import org.opensearch.dataprepper.model.event.Event;
+import org.opensearch.dataprepper.model.event.EventFactory;
+import org.opensearch.dataprepper.model.record.Record;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.notNullValue;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class MultilineInputCodecTest {
+
+ @Mock
+ private MultilineInputCodecConfig config;
+
+ private final EventFactory eventFactory = TestEventFactory.getTestEventFactory();
+
+ private MultilineInputCodec createObjectUnderTest() {
+ return new MultilineInputCodec(config, eventFactory);
+ }
+
+ private InputStream toInputStream(final String content) {
+ return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
+ }
+
+ private List> parseContent(final String content) throws IOException {
+ final List> events = new ArrayList<>();
+ createObjectUnderTest().parse(toInputStream(content), events::add);
+ return events;
+ }
+
+ @Test
+ void constructor_throws_if_config_is_null() {
+ assertThrows(NullPointerException.class, () -> new MultilineInputCodec(null, eventFactory));
+ }
+
+ @Test
+ void constructor_throws_if_eventFactory_is_null() {
+ assertThrows(NullPointerException.class, () -> new MultilineInputCodec(config, null));
+ }
+
+ @Test
+ void constructor_throws_if_match_pattern_is_invalid() {
+ when(config.getMatch()).thenReturn("[invalid(");
+
+ assertThrows(IllegalArgumentException.class, this::createObjectUnderTest);
+ }
+
+ @Test
+ void parse_throws_if_inputStream_is_null() {
+ when(config.getMatch()).thenReturn("^\\S");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final MultilineInputCodec codec = createObjectUnderTest();
+ assertThrows(NullPointerException.class, () -> codec.parse(null, events -> {}));
+ }
+
+ @Test
+ void parse_throws_if_consumer_is_null() {
+ when(config.getMatch()).thenReturn("^\\S");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final MultilineInputCodec codec = createObjectUnderTest();
+ assertThrows(NullPointerException.class, () -> codec.parse(toInputStream("test"), null));
+ }
+
+ @Test
+ void parse_empty_input_produces_no_events() throws IOException {
+ when(config.getMatch()).thenReturn("^\\S");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final List> events = parseContent("");
+ assertThat(events.size(), equalTo(0));
+ }
+
+ @Test
+ void parse_single_line_produces_one_event() throws IOException {
+ when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final List> events = parseContent("2024-01-01 INFO single line\n");
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData().get("message", String.class), equalTo("2024-01-01 INFO single line"));
+ }
+
+ @Nested
+ class PreviousModeWithNegateTrue {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void groups_java_stack_trace_with_timestamp_start() throws IOException {
+ final String input = "2024-01-01 ERROR NullPointerException\n" +
+ " at com.example.Service.method(Service.java:42)\n" +
+ " at com.example.Main.run(Main.java:10)\n" +
+ "2024-01-01 INFO Application recovered\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01 ERROR NullPointerException\n" +
+ " at com.example.Service.method(Service.java:42)\n" +
+ " at com.example.Main.run(Main.java:10)"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("2024-01-01 INFO Application recovered"));
+ }
+
+ @Test
+ void multiple_single_line_events_each_matching_pattern() throws IOException {
+ final String input = "2024-01-01 INFO line one\n" +
+ "2024-01-02 INFO line two\n" +
+ "2024-01-03 INFO line three\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(3));
+ assertThat(events.get(0).getData().get("message", String.class), equalTo("2024-01-01 INFO line one"));
+ assertThat(events.get(1).getData().get("message", String.class), equalTo("2024-01-02 INFO line two"));
+ assertThat(events.get(2).getData().get("message", String.class), equalTo("2024-01-03 INFO line three"));
+ }
+
+ @Test
+ void continuation_lines_at_beginning_are_grouped_as_first_event() throws IOException {
+ final String input = " orphan continuation line 1\n" +
+ " orphan continuation line 2\n" +
+ "2024-01-01 INFO first real entry\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo(" orphan continuation line 1\n orphan continuation line 2"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("2024-01-01 INFO first real entry"));
+ }
+
+ @Test
+ void last_event_with_continuations_flushed_at_end_of_stream() throws IOException {
+ final String input = "2024-01-01 ERROR Exception occurred\n" +
+ " at com.example.Foo.bar(Foo.java:1)\n" +
+ " at com.example.Baz.run(Baz.java:2)\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01 ERROR Exception occurred\n" +
+ " at com.example.Foo.bar(Foo.java:1)\n" +
+ " at com.example.Baz.run(Baz.java:2)"));
+ }
+
+ @Test
+ void no_lines_match_pattern_produces_single_event() throws IOException {
+ final String input = " continuation line 1\n" +
+ " continuation line 2\n" +
+ " continuation line 3\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo(" continuation line 1\n continuation line 2\n continuation line 3"));
+ }
+ }
+
+ @Nested
+ class PreviousModeWithNegateFalse {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
+ when(config.getNegate()).thenReturn(false);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void groups_stack_trace_lines_matching_pattern_with_previous() throws IOException {
+ final String input = "java.lang.NullPointerException: null\n" +
+ " at com.example.Service.process(Service.java:42)\n" +
+ " at com.example.Main.run(Main.java:10)\n" +
+ "INFO: Recovery complete\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("java.lang.NullPointerException: null\n" +
+ " at com.example.Service.process(Service.java:42)\n" +
+ " at com.example.Main.run(Main.java:10)"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("INFO: Recovery complete"));
+ }
+
+ @Test
+ void caused_by_is_grouped_with_previous() throws IOException {
+ final String input = "java.lang.RuntimeException: error\n" +
+ " at com.example.A.method(A.java:1)\n" +
+ " Caused by: java.io.IOException\n" +
+ " at com.example.B.read(B.java:5)\n" +
+ "Next log entry\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("java.lang.RuntimeException: error\n" +
+ " at com.example.A.method(A.java:1)\n" +
+ " Caused by: java.io.IOException\n" +
+ " at com.example.B.read(B.java:5)"));
+ }
+ }
+
+ @Nested
+ class NextMode {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\s");
+ when(config.getNegate()).thenReturn(false);
+ when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void continuation_lines_prepended_to_next_event() throws IOException {
+ final String input = " header line 1\n" +
+ " header line 2\n" +
+ "MAIN LOG ENTRY\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo(" header line 1\n header line 2\nMAIN LOG ENTRY"));
+ }
+
+ @Test
+ void multiple_groups_in_next_mode() throws IOException {
+ final String input = " context A\n" +
+ "EVENT A\n" +
+ " context B\n" +
+ "EVENT B\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo(" context A\nEVENT A"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo(" context B\nEVENT B"));
+ }
+
+ @Test
+ void trailing_continuation_lines_flushed_at_end_of_stream() throws IOException {
+ final String input = "EVENT A\n" +
+ " trailing context 1\n" +
+ " trailing context 2\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("EVENT A"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo(" trailing context 1\n trailing context 2"));
+ }
+
+ @Test
+ void no_continuation_lines_each_line_is_separate_event() throws IOException {
+ final String input = "EVENT A\n" +
+ "EVENT B\n" +
+ "EVENT C\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(3));
+ assertThat(events.get(0).getData().get("message", String.class), equalTo("EVENT A"));
+ assertThat(events.get(1).getData().get("message", String.class), equalTo("EVENT B"));
+ assertThat(events.get(2).getData().get("message", String.class), equalTo("EVENT C"));
+ }
+ }
+
+ @Nested
+ class NextModeMaxLinesLimit {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\d{4}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
+ when(config.getMaxLines()).thenReturn(3);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void flushes_continuation_lines_when_max_lines_exceeded_in_next_mode() throws IOException {
+ final String input = " ctx 1\n" +
+ " ctx 2\n" +
+ " ctx 3\n" +
+ " ctx 4\n" +
+ "2024 EVENT\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo(" ctx 1\n ctx 2\n ctx 3"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo(" ctx 4\n2024 EVENT"));
+ }
+ }
+
+ @Nested
+ class NextModeWithNegateTrue {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\[");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void lines_not_matching_pattern_are_prepended_to_next_matching_line() throws IOException {
+ final String input = "preamble line 1\n" +
+ "preamble line 2\n" +
+ "[2024-01-01] Log entry\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("preamble line 1\npreamble line 2\n[2024-01-01] Log entry"));
+ }
+ }
+
+ @Nested
+ class MaxLinesLimit {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\d{4}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(3);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void flushes_event_when_max_lines_exceeded() throws IOException {
+ final String input = "2024-01-01 ERROR start\n" +
+ " line 2\n" +
+ " line 3\n" +
+ " line 4\n" +
+ " line 5\n" +
+ "2024-01-02 INFO next\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(3));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01 ERROR start\n line 2\n line 3"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo(" line 4\n line 5"));
+ assertThat(events.get(2).getData().get("message", String.class),
+ equalTo("2024-01-02 INFO next"));
+ }
+ }
+
+ @Nested
+ class MaxLengthLimit {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\d{4}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(30);
+ when(config.getLineSeparator()).thenReturn("\n");
+ }
+
+ @Test
+ void flushes_event_when_max_length_exceeded() throws IOException {
+ final String input = "2024 start line here\n" +
+ " continuation is long\n" +
+ "2024 next entry\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(3));
+ // First event is "2024 start line here" (20 chars)
+ // Adding "\n continuation is long" would be 20+1+22=43 > 30, so it flushes
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024 start line here"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo(" continuation is long"));
+ assertThat(events.get(2).getData().get("message", String.class),
+ equalTo("2024 next entry"));
+ }
+ }
+
+ @Nested
+ class CustomLineSeparator {
+
+ @BeforeEach
+ void setUp() {
+ when(config.getMatch()).thenReturn("^\\d{4}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\r\n");
+ }
+
+ @Test
+ void uses_custom_line_separator_when_joining() throws IOException {
+ final String input = "2024-01-01 ERROR start\n" +
+ " continuation\n" +
+ "2024-01-02 INFO next\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01 ERROR start\r\n continuation"));
+ }
+ }
+
+ @Nested
+ class RealWorldScenarios {
+
+ @Test
+ void python_traceback() throws IOException {
+ when(config.getMatch()).thenReturn("^Traceback|^\\s|^\\w+Error");
+ when(config.getNegate()).thenReturn(false);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final String input = "2024-01-01 INFO Starting application\n" +
+ "Traceback (most recent call last):\n" +
+ " File \"main.py\", line 10, in \n" +
+ " result = process()\n" +
+ " File \"service.py\", line 5, in process\n" +
+ " return 1/0\n" +
+ "ZeroDivisionError: division by zero\n" +
+ "2024-01-01 INFO Recovered\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01 INFO Starting application\n" +
+ "Traceback (most recent call last):\n" +
+ " File \"main.py\", line 10, in \n" +
+ " result = process()\n" +
+ " File \"service.py\", line 5, in process\n" +
+ " return 1/0\n" +
+ "ZeroDivisionError: division by zero"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("2024-01-01 INFO Recovered"));
+ }
+
+ @Test
+ void multiline_xml_in_logs() throws IOException {
+ when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final String input = "2024-01-01 Request body:\n" +
+ "\n" +
+ " value\n" +
+ "\n" +
+ "2024-01-01 Response sent\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01 Request body:\n\n value\n"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("2024-01-01 Response sent"));
+ }
+
+ @Test
+ void log4j_multiline_with_nested_exception() throws IOException {
+ when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final String input = "2024-01-01T12:00:00 ERROR Application failed\n" +
+ "java.lang.RuntimeException: Outer\n" +
+ "\tat com.example.A.run(A.java:10)\n" +
+ "Caused by: java.io.IOException: Inner\n" +
+ "\tat com.example.B.read(B.java:20)\n" +
+ "\t... 5 more\n" +
+ "2024-01-01T12:00:01 INFO Shutdown complete\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("2024-01-01T12:00:00 ERROR Application failed\n" +
+ "java.lang.RuntimeException: Outer\n" +
+ "\tat com.example.A.run(A.java:10)\n" +
+ "Caused by: java.io.IOException: Inner\n" +
+ "\tat com.example.B.read(B.java:20)\n" +
+ "\t... 5 more"));
+ }
+ }
+
+ @Nested
+ class IsContinuationLineTests {
+
+ @Test
+ void negate_false_matching_line_is_continuation() {
+ when(config.getMatch()).thenReturn("^\\s");
+ when(config.getNegate()).thenReturn(false);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final MultilineInputCodec codec = createObjectUnderTest();
+ assertThat(codec.isContinuationLine(" indented"), equalTo(true));
+ assertThat(codec.isContinuationLine("not indented"), equalTo(false));
+ }
+
+ @Test
+ void negate_true_non_matching_line_is_continuation() {
+ when(config.getMatch()).thenReturn("^\\d{4}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final MultilineInputCodec codec = createObjectUnderTest();
+ assertThat(codec.isContinuationLine(" no timestamp"), equalTo(true));
+ assertThat(codec.isContinuationLine("2024 has timestamp"), equalTo(false));
+ }
+ }
+
+ @Test
+ void event_metadata_is_log_type() throws IOException {
+ when(config.getMatch()).thenReturn("^\\d{4}");
+ when(config.getNegate()).thenReturn(true);
+ when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+
+ final List> events = parseContent("2024-01-01 test\n");
+
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData(), notNullValue());
+ assertThat(events.get(0).getData().getMetadata(), notNullValue());
+ assertThat(events.get(0).getData().getMetadata().getEventType(), equalTo("LOG"));
+ }
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java
new file mode 100644
index 0000000000..9928024685
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+class MultilineWhatTest {
+
+ @Test
+ void fromString_returns_PREVIOUS_for_previous() {
+ assertThat(MultilineWhat.fromString("previous"), equalTo(MultilineWhat.PREVIOUS));
+ }
+
+ @Test
+ void fromString_returns_NEXT_for_next() {
+ assertThat(MultilineWhat.fromString("next"), equalTo(MultilineWhat.NEXT));
+ }
+
+ @Test
+ void fromString_is_case_insensitive() {
+ assertThat(MultilineWhat.fromString("PREVIOUS"), equalTo(MultilineWhat.PREVIOUS));
+ assertThat(MultilineWhat.fromString("NEXT"), equalTo(MultilineWhat.NEXT));
+ assertThat(MultilineWhat.fromString("Previous"), equalTo(MultilineWhat.PREVIOUS));
+ }
+
+ @ParameterizedTest
+ @ValueSource(strings = {"invalid", "before", "after", ""})
+ void fromString_throws_for_invalid_value(final String value) {
+ assertThrows(IllegalArgumentException.class, () -> MultilineWhat.fromString(value));
+ }
+
+ @Test
+ void toString_returns_correct_values() {
+ assertThat(MultilineWhat.PREVIOUS.toString(), equalTo("previous"));
+ assertThat(MultilineWhat.NEXT.toString(), equalTo("next"));
+ }
+}
diff --git a/settings.gradle b/settings.gradle
index f6f07cc1b0..3409e170eb 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -169,6 +169,7 @@ include 'release:maven'
include 'e2e-test:peerforwarder'
include 'data-prepper-plugins:failures-common'
include 'data-prepper-plugins:newline-codecs'
+include 'data-prepper-plugins:multiline-codecs'
include 'data-prepper-plugins:avro-codecs'
include 'data-prepper-plugins:kafka-plugins'
include 'data-prepper-plugins:user-agent-processor'
From eb89a299d2e083194fbe5751d93c1269c072948a Mon Sep 17 00:00:00 2001
From: Manisha Yadav
Date: Mon, 15 Jun 2026 13:49:31 +0000
Subject: [PATCH 2/3] Address review comments: Implement config as per the
suggested new design
Signed-off-by: Manisha Yadav
---
.../multiline-codecs/README.md | 64 ++-
.../multiline-codecs/build.gradle | 9 -
.../codec/multiline/MultilineInputCodec.java | 190 +++++--
.../multiline/MultilineInputCodecConfig.java | 196 +++++---
.../codec/multiline/MultilineMode.java | 40 ++
.../codec/multiline/MultilineWhat.java | 57 ---
.../codec/multiline/MultilineCodecsIT.java | 279 +++--------
.../MultilineInputCodecConfigTest.java | 108 ++--
.../multiline/MultilineInputCodecTest.java | 462 ++++++------------
.../codec/multiline/MultilineWhatTest.java | 50 --
10 files changed, 627 insertions(+), 828 deletions(-)
create mode 100644 data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineMode.java
delete mode 100644 data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java
delete mode 100644 data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java
diff --git a/data-prepper-plugins/multiline-codecs/README.md b/data-prepper-plugins/multiline-codecs/README.md
index 62b61e547e..bae5add698 100644
--- a/data-prepper-plugins/multiline-codecs/README.md
+++ b/data-prepper-plugins/multiline-codecs/README.md
@@ -12,36 +12,41 @@ The multiline input codec can be configured with source plugins (e.g. S3 source,
- **Python tracebacks**: `Traceback` blocks spanning multiple lines
- **Timestamp-prefixed logs**: Logs where each entry starts with a timestamp and continuation lines don't
- **Multi-line JSON/XML in logs**: Structured data embedded across multiple lines within log entries
-- **Custom log formats**: Any format where a recognizable pattern marks the start of a new event
+- **Custom log formats**: Any format where a recognizable pattern marks the start or end of a new event
## Configuration Options
+Exactly one of the four pattern fields must be specified:
+
| Option | Required | Type | Default | Description |
|---|---|---|---|---|
-| `match` | Yes | String (regex) | - | A regular expression pattern used to identify line boundaries |
-| `negate` | No | Boolean | `false` | When `false`, lines matching the pattern are continuation lines. When `true`, lines NOT matching the pattern are continuation lines |
-| `what` | No | String | `previous` | Whether continuation lines belong to the `previous` or `next` event |
+| `event_start_pattern` | One of four | String (regex) | - | A new event begins at each line matching this pattern |
+| `event_end_pattern` | One of four | String (regex) | - | An event ends at each line matching this pattern (inclusive) |
+| `continuation_line_start_pattern` | One of four | String (regex) | - | Lines matching this pattern are continuations of the previous event |
+| `continuation_line_end_pattern` | One of four | String (regex) | - | Lines matching this pattern are prepended to the next event |
+| `omit_matched_section` | No | Boolean | `false` | When true, the matched portion of the line is omitted from the output |
| `max_lines` | No | Integer | `500` | Maximum number of lines that can be combined into a single event |
-| `max_length` | No | Integer | `10000` | Maximum character length of a combined multiline event |
-| `line_separator` | No | String | `\n` | Separator string used when joining lines into a single event message |
+| `max_length` | No | Integer | `10000` | Maximum character length of a combined multiline event. Note: a single line exceeding this limit will still be emitted as a complete event without truncation |
+| `line_separator` | No | String | `\n` | Separator string used when joining lines into a single event message. Note: `BufferedReader.readLine()` strips original line endings, so the codec normalizes joined lines using this separator. Set to `""` for no separator |
+| `encoding` | No | String | `UTF-8` | Character encoding to use when reading the input stream |
## How It Works
-The codec reads lines from the input stream and uses the `match` regex to determine event boundaries:
+The codec reads lines from the input stream and uses the configured pattern to determine event boundaries:
-1. **`negate=true` + `what=previous`** (most common): A new event starts when a line matches the pattern. Lines that do NOT match are appended to the preceding event.
+1. **`event_start_pattern`** (most common): Each line matching the pattern starts a new event. All subsequent non-matching lines are appended to it.
-2. **`negate=false` + `what=previous`**: Lines that match the pattern are appended to the preceding event.
+2. **`event_end_pattern`**: Lines are accumulated until a line matches the pattern. The matching line is included in the current event, and the next line starts a new event.
-3. **`negate=true` + `what=next`**: Lines that do NOT match the pattern are prepended to the next matching line.
+3. **`continuation_line_start_pattern`**: Lines matching the pattern are continuations of the previous event. Non-matching lines start new events.
-4. **`negate=false` + `what=next`**: Lines that match the pattern are prepended to the next non-matching line.
+4. **`continuation_line_end_pattern`**: Lines matching the pattern are prepended to the next non-matching line's event.
## Examples
-### Java Stack Traces (timestamp-based grouping)
+### Java Stack Traces
-Each log entry starts with a timestamp. Lines without a timestamp are continuations of the previous entry.
+Each log entry starts with a timestamp. Lines without a timestamp (stack frames) are part of the previous entry.
```yaml
pipeline:
@@ -49,9 +54,7 @@ pipeline:
s3:
codec:
multiline:
- match: "^\\d{4}-\\d{2}-\\d{2}"
- negate: true
- what: previous
+ event_start_pattern: "^\\d{4}-\\d{2}-\\d{2}"
```
Input:
@@ -62,13 +65,11 @@ Input:
2024-01-01 12:00:01 INFO Application recovered
```
-Result: 2 events
-- Event 1: The ERROR line with its full stack trace grouped together
-- Event 2: The INFO line as a single event
+Result: 2 events (stack trace grouped with its ERROR line)
-### Java Stack Traces (pattern-based grouping)
+### Delimiter-Separated Entries
-Lines starting with whitespace followed by `at `, `...`, or `Caused by:` are continuations.
+Log entries are separated by a `---` line.
```yaml
pipeline:
@@ -76,12 +77,12 @@ pipeline:
s3:
codec:
multiline:
- match: "^\\s+(at |\\.\\.\\.|Caused by:)"
- negate: false
- what: previous
+ event_end_pattern: "^---$"
```
-### Python Tracebacks
+### Stack Traces (continuation pattern)
+
+Lines starting with whitespace followed by `at ` or `Caused by:` are continuations.
```yaml
pipeline:
@@ -89,14 +90,12 @@ pipeline:
s3:
codec:
multiline:
- match: "^Traceback|^\\s|^\\w+Error"
- negate: false
- what: previous
+ continuation_line_start_pattern: "^\\s+(at |\\.\\.\\.|Caused by:)"
```
-### Log Entries with Preamble (next mode)
+### Omitting Timestamps from Output
-Lines starting with whitespace are prepended to the next non-indented line.
+Strip the timestamp from each event's first line:
```yaml
pipeline:
@@ -104,9 +103,8 @@ pipeline:
s3:
codec:
multiline:
- match: "^\\s"
- negate: false
- what: next
+ event_start_pattern: "^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}\\s+"
+ omit_matched_section: true
```
## Developer Guide
diff --git a/data-prepper-plugins/multiline-codecs/build.gradle b/data-prepper-plugins/multiline-codecs/build.gradle
index cf619d2062..ade99e2e54 100644
--- a/data-prepper-plugins/multiline-codecs/build.gradle
+++ b/data-prepper-plugins/multiline-codecs/build.gradle
@@ -7,18 +7,9 @@
* compatible open source license.
*/
-plugins {
- id 'java'
-}
-
dependencies {
implementation project(':data-prepper-api')
implementation 'com.fasterxml.jackson.core:jackson-annotations'
- implementation libs.parquet.common
testImplementation project(':data-prepper-plugins:common')
testImplementation project(':data-prepper-test:test-event')
}
-
-test {
- useJUnitPlatform()
-}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
index 07eba72e35..e3e5e1dc9d 100644
--- a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
@@ -24,9 +24,11 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Objects;
import java.util.function.Consumer;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@@ -37,46 +39,57 @@
* such as Java stack traces, Python tracebacks, or any log format where entries begin with
* a recognizable pattern (e.g., a timestamp).
*
- * The codec supports two grouping modes via the {@code what} configuration:
+ * The codec supports four mutually exclusive pattern modes:
*
- * - {@code previous}: Continuation lines are appended to the preceding event.
- * - {@code next}: Continuation lines are prepended to the following event.
- *
- *
- * The {@code negate} option controls which lines are considered continuation lines:
- *
- * - {@code negate=false}: Lines matching the pattern are continuation lines.
- * - {@code negate=true}: Lines NOT matching the pattern are continuation lines.
+ * - {@code event_start_pattern}: A new event begins at each matching line.
+ * - {@code event_end_pattern}: An event ends at each matching line (inclusive).
+ * - {@code continuation_line_start_pattern}: Matching lines are continuations of the previous event.
+ * - {@code continuation_line_end_pattern}: Matching lines are prepended to the next event.
*
*/
@DataPrepperPlugin(name = "multiline", pluginType = InputCodec.class, pluginConfigurationType = MultilineInputCodecConfig.class)
public class MultilineInputCodec implements InputCodec {
private static final Logger LOG = LoggerFactory.getLogger(MultilineInputCodec.class);
- static final String MESSAGE_FIELD_NAME = "message";
+ private static final String MESSAGE_FIELD_NAME = "message";
private final Pattern pattern;
- private final boolean negate;
- private final MultilineWhat what;
+ private final MultilineMode mode;
+ private final boolean omitMatchedSection;
private final int maxLines;
private final int maxLength;
private final String lineSeparator;
+ private final Charset encoding;
private final EventFactory eventFactory;
@DataPrepperPluginConstructor
public MultilineInputCodec(final MultilineInputCodecConfig config, final EventFactory eventFactory) {
Objects.requireNonNull(config, "config must not be null");
this.eventFactory = Objects.requireNonNull(eventFactory, "eventFactory must not be null");
- try {
- this.pattern = Pattern.compile(config.getMatch());
- } catch (final Exception e) {
- throw new IllegalArgumentException("Invalid regex pattern for 'match': " + config.getMatch(), e);
+
+ this.pattern = config.getCompiledPattern();
+ if (this.pattern == null) {
+ throw new IllegalArgumentException("A valid pattern must be configured");
}
- this.negate = config.getNegate();
- this.what = config.getWhat();
+
+ this.mode = resolveMode(config);
+ this.omitMatchedSection = config.getOmitMatchedSection();
this.maxLines = config.getMaxLines();
this.maxLength = config.getMaxLength();
this.lineSeparator = config.getLineSeparator();
+ this.encoding = config.getEncoding();
+ }
+
+ private static MultilineMode resolveMode(final MultilineInputCodecConfig config) {
+ if (config.getEventStartPattern() != null) {
+ return MultilineMode.EVENT_START;
+ } else if (config.getEventEndPattern() != null) {
+ return MultilineMode.EVENT_END;
+ } else if (config.getContinuationLineStartPattern() != null) {
+ return MultilineMode.CONTINUATION_START;
+ } else {
+ return MultilineMode.CONTINUATION_END;
+ }
}
@Override
@@ -84,35 +97,109 @@ public void parse(final InputStream inputStream, final Consumer> e
Objects.requireNonNull(inputStream, "inputStream must not be null");
Objects.requireNonNull(eventConsumer, "eventConsumer must not be null");
- try (final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
- if (what == MultilineWhat.PREVIOUS) {
- parsePreviousMode(reader, eventConsumer);
- } else {
- parseNextMode(reader, eventConsumer);
+ try (final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, encoding))) {
+ switch (mode) {
+ case EVENT_START:
+ parseEventStartMode(reader, eventConsumer);
+ break;
+ case EVENT_END:
+ parseEventEndMode(reader, eventConsumer);
+ break;
+ case CONTINUATION_START:
+ parseContinuationStartMode(reader, eventConsumer);
+ break;
+ case CONTINUATION_END:
+ parseContinuationEndMode(reader, eventConsumer);
+ break;
+ default:
+ throw new IllegalStateException("Unknown multiline mode: " + mode);
+ }
+ }
+ }
+
+ /**
+ * EVENT_START mode: A new event begins at each line matching the pattern.
+ * Non-matching lines are continuations of the preceding event.
+ */
+ private void parseEventStartMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ final StringBuilder buffer = new StringBuilder();
+ int lineCount = 0;
+ String line;
+
+ while ((line = reader.readLine()) != null) {
+ final boolean matches = pattern.matcher(line).find();
+
+ if (matches || shouldFlush(buffer, lineCount, line)) {
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ }
}
+
+ if (buffer.length() > 0) {
+ buffer.append(lineSeparator);
+ }
+ buffer.append(processLine(line, matches));
+ lineCount++;
+ }
+
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
}
}
/**
- * In PREVIOUS mode, continuation lines are appended to the preceding event.
- * A new event boundary is detected when a line is NOT a continuation line
- * (i.e., it's a "start" line).
+ * EVENT_END mode: An event ends at each line matching the pattern (inclusive).
+ * The matching line is included in the current event, then a new event begins.
*/
- private void parsePreviousMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ private void parseEventEndMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
final StringBuilder buffer = new StringBuilder();
int lineCount = 0;
String line;
while ((line = reader.readLine()) != null) {
- final boolean isContinuation = isContinuationLine(line);
+ final boolean matches = pattern.matcher(line).find();
- if (!isContinuation && buffer.length() > 0) {
+ if (shouldFlush(buffer, lineCount, line)) {
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
+ lineCount = 0;
+ }
+ }
+
+ if (buffer.length() > 0) {
+ buffer.append(lineSeparator);
+ }
+ buffer.append(processLine(line, matches));
+ lineCount++;
+
+ if (matches) {
emitEvent(buffer.toString(), eventConsumer);
buffer.setLength(0);
lineCount = 0;
}
+ }
- if (shouldFlush(buffer, lineCount, line)) {
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ }
+ }
+
+ /**
+ * CONTINUATION_START mode: Lines matching the pattern are continuations of the previous event.
+ * Non-matching lines start new events.
+ */
+ private void parseContinuationStartMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ final StringBuilder buffer = new StringBuilder();
+ int lineCount = 0;
+ String line;
+
+ while ((line = reader.readLine()) != null) {
+ final boolean matches = pattern.matcher(line).find();
+
+ if (!matches || shouldFlush(buffer, lineCount, line)) {
if (buffer.length() > 0) {
emitEvent(buffer.toString(), eventConsumer);
buffer.setLength(0);
@@ -123,7 +210,7 @@ private void parsePreviousMode(final BufferedReader reader, final Consumer 0) {
buffer.append(lineSeparator);
}
- buffer.append(line);
+ buffer.append(processLine(line, matches));
lineCount++;
}
@@ -133,42 +220,35 @@ private void parsePreviousMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ private void parseContinuationEndMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
final StringBuilder buffer = new StringBuilder();
int lineCount = 0;
boolean bufferHasNonContinuation = false;
String line;
while ((line = reader.readLine()) != null) {
- final boolean isContinuation = isContinuationLine(line);
+ final boolean matches = pattern.matcher(line).find();
- if (!isContinuation) {
+ if (!matches) {
if (bufferHasNonContinuation) {
- // The buffer already has a complete event (non-continuation at end).
- // Emit it and start fresh.
emitEvent(buffer.toString(), eventConsumer);
buffer.setLength(0);
lineCount = 0;
bufferHasNonContinuation = false;
}
- // Append this non-continuation line to the buffer (with any preceding continuations).
if (buffer.length() > 0) {
buffer.append(lineSeparator);
}
- buffer.append(line);
+ buffer.append(processLine(line, false));
lineCount++;
bufferHasNonContinuation = true;
continue;
}
- // This is a continuation line.
if (bufferHasNonContinuation) {
- // Buffer has a complete event ending with non-continuation.
- // Emit it, then start collecting continuations for the next event.
emitEvent(buffer.toString(), eventConsumer);
buffer.setLength(0);
lineCount = 0;
@@ -186,7 +266,7 @@ private void parseNextMode(final BufferedReader reader, final Consumer 0) {
buffer.append(lineSeparator);
}
- buffer.append(line);
+ buffer.append(processLine(line, matches));
lineCount++;
}
@@ -195,17 +275,19 @@ private void parseNextMode(final BufferedReader reader, final ConsumerWhen {@code negate=false}: a line matching the pattern IS a continuation line.
- * When {@code negate=true}: a line NOT matching the pattern IS a continuation line.
- */
- boolean isContinuationLine(final String line) {
- final boolean matches = pattern.matcher(line).find();
- return negate != matches;
+ private String processLine(final String line, final boolean matches) {
+ if (!omitMatchedSection || !matches) {
+ return line;
+ }
+ final Matcher matcher = pattern.matcher(line);
+ return matcher.replaceFirst("");
}
+ /**
+ * Determines if the buffer should be flushed before appending the next line.
+ * Note: if a single line exceeds max_length on its own, it will still be emitted
+ * as a complete event without truncation.
+ */
private boolean shouldFlush(final StringBuilder buffer, final int lineCount, final String nextLine) {
if (lineCount >= maxLines) {
LOG.debug("Flushing multiline event due to max_lines limit of {}", maxLines);
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
index dc26290f3a..9eb76ce4fd 100644
--- a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
@@ -12,9 +12,12 @@
import com.fasterxml.jackson.annotation.JsonProperty;
import jakarta.validation.constraints.AssertTrue;
import jakarta.validation.constraints.Min;
-import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.NotNull;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.StandardCharsets;
+import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -22,25 +25,19 @@
* Configuration class for the multiline input codec.
*
* The multiline codec groups consecutive lines from an input stream into a single event
- * based on a regex pattern. This is useful for log formats where a single logical event
- * spans multiple lines (e.g., Java stack traces, multi-line application logs).
+ * based on a regex pattern. Exactly one of the four pattern fields must be specified:
+ *
+ * - {@code event_start_pattern}: A new event begins at each line matching this pattern.
+ * - {@code event_end_pattern}: An event ends at each line matching this pattern (inclusive).
+ * - {@code continuation_line_start_pattern}: Lines matching this pattern are continuations of the previous event.
+ * - {@code continuation_line_end_pattern}: Lines matching this pattern are prepended to the next event.
+ *
*
* Example configuration for Java stack traces:
*
* codec:
* multiline:
- * match: "^\\s+(at |\\.\\.\\.|Caused by:)"
- * negate: false
- * what: previous
- *
- *
- * Example configuration for timestamp-prefixed logs:
- *
- * codec:
- * multiline:
- * match: "^\\d{4}-\\d{2}-\\d{2}"
- * negate: true
- * what: previous
+ * event_start_pattern: "^\\d{4}-\\d{2}-\\d{2}"
*
*/
public class MultilineInputCodecConfig {
@@ -49,17 +46,20 @@ public class MultilineInputCodecConfig {
static final int DEFAULT_MAX_LENGTH = 10000;
static final String DEFAULT_LINE_SEPARATOR = "\n";
- @NotEmpty(message = "match must not be empty")
- @JsonProperty("match")
- private String match;
+ @JsonProperty("event_start_pattern")
+ private String eventStartPattern;
+
+ @JsonProperty("event_end_pattern")
+ private String eventEndPattern;
- @NotNull(message = "negate must not be null")
- @JsonProperty("negate")
- private Boolean negate = false;
+ @JsonProperty("continuation_line_start_pattern")
+ private String continuationLineStartPattern;
- @NotNull(message = "what must not be null")
- @JsonProperty("what")
- private MultilineWhat what = MultilineWhat.PREVIOUS;
+ @JsonProperty("continuation_line_end_pattern")
+ private String continuationLineEndPattern;
+
+ @JsonProperty("omit_matched_section")
+ private boolean omitMatchedSection = false;
@Min(value = 1, message = "max_lines must be at least 1")
@JsonProperty("max_lines")
@@ -73,75 +73,139 @@ public class MultilineInputCodecConfig {
@JsonProperty("line_separator")
private String lineSeparator = DEFAULT_LINE_SEPARATOR;
- /**
- * The regex pattern used to identify line boundaries.
- *
- * @return The regex pattern string.
- */
- public String getMatch() {
- return match;
+ @JsonProperty("encoding")
+ private String encoding = StandardCharsets.UTF_8.name();
+
+ private Pattern compiledPattern;
+ private Charset encodingCharset;
+
+ public String getEventStartPattern() {
+ return eventStartPattern;
}
- /**
- * Whether to negate the pattern match.
- * When false: lines matching the pattern are considered continuation lines.
- * When true: lines NOT matching the pattern are considered continuation lines.
- *
- * @return true if the pattern should be negated.
- */
- public Boolean getNegate() {
- return negate;
+ public String getEventEndPattern() {
+ return eventEndPattern;
}
- /**
- * Defines whether unmatched (continuation) lines belong to the previous or next event.
- *
- * @return The multiline grouping direction.
- */
- public MultilineWhat getWhat() {
- return what;
+ public String getContinuationLineStartPattern() {
+ return continuationLineStartPattern;
+ }
+
+ public String getContinuationLineEndPattern() {
+ return continuationLineEndPattern;
+ }
+
+ public boolean getOmitMatchedSection() {
+ return omitMatchedSection;
}
- /**
- * The maximum number of lines that can be combined into a single event.
- * When this limit is reached, the accumulated lines are flushed as an event
- * and a new accumulation begins.
- *
- * @return The maximum number of lines per event.
- */
public int getMaxLines() {
return maxLines;
}
+ public int getMaxLength() {
+ return maxLength;
+ }
+
+ public String getLineSeparator() {
+ return lineSeparator;
+ }
+
/**
- * The maximum character length of a combined multiline event.
- * When this limit is reached, the accumulated lines are flushed as an event.
+ * Returns the validated Charset. The encoding is validated once during
+ * bean validation and stored to avoid repeated parsing.
*
- * @return The maximum character length per event.
+ * @return The validated Charset.
*/
- public int getMaxLength() {
- return maxLength;
+ public Charset getEncoding() {
+ return encodingCharset;
}
/**
- * The separator string to use when joining multiple lines into a single event message.
+ * Returns the compiled regex pattern. The pattern is compiled once during validation
+ * and reused to avoid duplicate compilation.
*
- * @return The line separator string.
+ * @return The compiled Pattern.
*/
- public String getLineSeparator() {
- return lineSeparator;
+ public Pattern getCompiledPattern() {
+ return compiledPattern;
+ }
+
+ @AssertTrue(message = "Exactly one pattern field must be specified: event_start_pattern, event_end_pattern, " +
+ "continuation_line_start_pattern, or continuation_line_end_pattern")
+ boolean isExactlyOnePatternSpecified() {
+ int count = 0;
+ if (eventStartPattern != null) count++;
+ if (eventEndPattern != null) count++;
+ if (continuationLineStartPattern != null) count++;
+ if (continuationLineEndPattern != null) count++;
+ return count == 1;
}
- @AssertTrue(message = "match must be a valid regular expression")
+ @AssertTrue(message = "The specified pattern must be a valid regular expression")
boolean isValidPattern() {
- if (match == null || match.isEmpty()) {
+ final String patternString = getConfiguredPatternString();
+ if (patternString == null || patternString.isEmpty()) {
return false;
}
try {
- Pattern.compile(match);
+ compiledPattern = Pattern.compile(patternString);
return true;
} catch (final PatternSyntaxException e) {
return false;
}
}
+
+ @AssertTrue(message = "The specified encoding must be a valid charset")
+ boolean isValidEncoding() {
+ if (encoding == null || encoding.isEmpty()) {
+ return false;
+ }
+ try {
+ encodingCharset = Charset.forName(encoding);
+ return true;
+ } catch (final IllegalCharsetNameException | UnsupportedCharsetException e) {
+ return false;
+ }
+ }
+
+ String getConfiguredPatternString() {
+ if (eventStartPattern != null) return eventStartPattern;
+ if (eventEndPattern != null) return eventEndPattern;
+ if (continuationLineStartPattern != null) return continuationLineStartPattern;
+ if (continuationLineEndPattern != null) return continuationLineEndPattern;
+ return null;
+ }
+
+ static Builder builder() {
+ return new Builder();
+ }
+
+ static class Builder {
+ private final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+
+ Builder withEventStartPattern(final String pattern) {
+ config.eventStartPattern = pattern;
+ return this;
+ }
+
+ Builder withEventEndPattern(final String pattern) {
+ config.eventEndPattern = pattern;
+ return this;
+ }
+
+ Builder withContinuationLineStartPattern(final String pattern) {
+ config.continuationLineStartPattern = pattern;
+ return this;
+ }
+
+ Builder withContinuationLineEndPattern(final String pattern) {
+ config.continuationLineEndPattern = pattern;
+ return this;
+ }
+
+ MultilineInputCodecConfig build() {
+ return config;
+ }
+ }
}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineMode.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineMode.java
new file mode 100644
index 0000000000..fafa8cd0b0
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineMode.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.dataprepper.plugins.codec.multiline;
+
+/**
+ * Internal representation of the multiline grouping mode, determined from the configuration.
+ */
+enum MultilineMode {
+
+ /**
+ * A new event starts at each line matching the pattern.
+ * Non-matching lines are continuations of the preceding event.
+ */
+ EVENT_START,
+
+ /**
+ * An event ends at each line matching the pattern (inclusive).
+ * The next line begins a new event.
+ */
+ EVENT_END,
+
+ /**
+ * Lines matching the pattern are continuations of the previous event.
+ * Non-matching lines start new events.
+ */
+ CONTINUATION_START,
+
+ /**
+ * Lines matching the pattern are prepended to the next event.
+ * Non-matching lines complete the event.
+ */
+ CONTINUATION_END
+}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java
deleted file mode 100644
index ab21b16ac5..0000000000
--- a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhat.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.dataprepper.plugins.codec.multiline;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonValue;
-
-import java.util.Arrays;
-import java.util.Map;
-import java.util.stream.Collectors;
-
-/**
- * Defines whether unmatched lines should be grouped with the previous or next matching line.
- */
-public enum MultilineWhat {
-
- /**
- * Unmatched lines are appended to the previous matching line's event.
- */
- PREVIOUS("previous"),
-
- /**
- * Unmatched lines are prepended to the next matching line's event.
- */
- NEXT("next");
-
- private static final Map OPTIONS_MAP = Arrays.stream(MultilineWhat.values())
- .collect(Collectors.toMap(MultilineWhat::toString, value -> value));
-
- private final String name;
-
- MultilineWhat(final String name) {
- this.name = name;
- }
-
- @JsonCreator
- public static MultilineWhat fromString(final String value) {
- final MultilineWhat result = OPTIONS_MAP.get(value.toLowerCase());
- if (result == null) {
- throw new IllegalArgumentException("Invalid value for 'what': " + value + ". Valid values are: " + OPTIONS_MAP.keySet());
- }
- return result;
- }
-
- @JsonValue
- @Override
- public String toString() {
- return name;
- }
-}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
index 6d2b970db5..f81b4fb424 100644
--- a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
@@ -26,9 +26,9 @@
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.function.Consumer;
+import java.util.regex.Pattern;
import static org.hamcrest.CoreMatchers.equalTo;
-import static org.hamcrest.CoreMatchers.notNullValue;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.mockito.Mockito.lenient;
@@ -51,6 +51,8 @@ void setUp() {
lenient().when(config.getMaxLines()).thenReturn(500);
lenient().when(config.getMaxLength()).thenReturn(50000);
lenient().when(config.getLineSeparator()).thenReturn("\n");
+ lenient().when(config.getOmitMatchedSection()).thenReturn(false);
+ lenient().when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
}
private MultilineInputCodec createObjectUnderTest() {
@@ -62,10 +64,9 @@ private InputStream toInputStream(final String content) {
}
@Test
- void parse_java_stack_trace_groups_exception_with_stack_frames() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ void parse_java_stack_trace_with_event_start_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"));
+ lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
final String input =
"2024-01-15 10:23:45.123 ERROR [main] com.example.UserService - Request failed\n" +
@@ -74,46 +75,32 @@ void parse_java_stack_trace_groups_exception_with_stack_frames() throws IOExcept
"\tat com.example.Controller.handle(Controller.java:28)\n" +
"Caused by: java.sql.SQLException: Connection refused\n" +
"\tat com.mysql.jdbc.Connection.connect(Connection.java:456)\n" +
- "\t... 12 more\n" +
- "2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying\n" +
- "2024-01-15 10:23:46.789 WARN [worker-1] com.example.Cache - Cache miss\n";
+ "2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying\n";
createObjectUnderTest().parse(toInputStream(input), eventConsumer);
final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(3)).accept(captor.capture());
+ verify(eventConsumer, times(2)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: ERROR log + stack trace (7 lines grouped)
final String event1 = records.get(0).getData().get("message", String.class);
- assertThat(event1, notNullValue());
assertThat(event1, containsString("NullPointerException"));
assertThat(event1, containsString("at com.example.UserService.getUser"));
assertThat(event1, containsString("Caused by: java.sql.SQLException"));
- assertThat(event1, containsString("... 12 more"));
-
- // Event 2: INFO single line
- final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, equalTo("2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying"));
-
- // Event 3: WARN single line
- final String event3 = records.get(2).getData().get("message", String.class);
- assertThat(event3, equalTo("2024-01-15 10:23:46.789 WARN [worker-1] com.example.Cache - Cache miss"));
+ assertThat(records.get(1).getData().get("message", String.class),
+ equalTo("2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying"));
}
@Test
- void parse_python_traceback_groups_traceback_with_error_line() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ void parse_python_traceback_with_event_start_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"));
+ lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
final String input =
"2024-03-20 08:15:00,123 INFO Starting application\n" +
"2024-03-20 08:15:02,789 ERROR Unhandled exception\n" +
"Traceback (most recent call last):\n" +
" File \"/app/worker.py\", line 45, in process\n" +
- " result = transform(record)\n" +
"ValueError: invalid literal for int()\n" +
"2024-03-20 08:15:03,456 INFO Recovered\n";
@@ -123,28 +110,19 @@ void parse_python_traceback_groups_traceback_with_error_line() throws IOExceptio
verify(eventConsumer, times(3)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: single INFO line
assertThat(records.get(0).getData().get("message", String.class),
equalTo("2024-03-20 08:15:00,123 INFO Starting application"));
-
- // Event 2: ERROR + traceback (5 lines grouped)
final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, containsString("ERROR Unhandled exception"));
- assertThat(event2, containsString("Traceback (most recent call last):"));
- assertThat(event2, containsString("File \"/app/worker.py\""));
- assertThat(event2, containsString("ValueError: invalid literal"));
-
- // Event 3: single INFO line
+ assertThat(event2, containsString("Traceback"));
+ assertThat(event2, containsString("ValueError"));
assertThat(records.get(2).getData().get("message", String.class),
equalTo("2024-03-20 08:15:03,456 INFO Recovered"));
}
@Test
- void parse_xml_multiline_logs_groups_xml_body_with_header() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\[\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ void parse_xml_multiline_with_event_start_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\[\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"));
+ lenient().when(config.getEventStartPattern()).thenReturn("^\\[\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
final String input =
"[2024-05-10 14:30:00.001] [INFO] Incoming request:\n" +
@@ -159,68 +137,24 @@ void parse_xml_multiline_logs_groups_xml_body_with_header() throws IOException {
verify(eventConsumer, times(2)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: log line + XML body (4 lines grouped)
final String event1 = records.get(0).getData().get("message", String.class);
- assertThat(event1, containsString("[INFO] Incoming request:"));
assertThat(event1, containsString(""));
- assertThat(event1, containsString("value"));
assertThat(event1, containsString(""));
-
- // Event 2: single line
assertThat(records.get(1).getData().get("message", String.class),
equalTo("[2024-05-10 14:30:00.045] [INFO] Request processed"));
}
@Test
- void parse_sql_multiline_logs_groups_query_with_header() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
-
- final String input =
- "2024-07-01 09:00:01 [Query] thread_id=145 exec_time=0.003s\n" +
- "SELECT u.id, u.name\n" +
- "FROM users u\n" +
- "WHERE u.active = 1\n" +
- "ORDER BY u.name;\n" +
- "2024-07-01 09:00:02 [Query] thread_id=146 exec_time=0.001s\n" +
- "SELECT COUNT(*) FROM sessions;\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
-
- final List> records = captor.getAllValues();
-
- // Event 1: query header + multi-line SQL (5 lines grouped)
- final String event1 = records.get(0).getData().get("message", String.class);
- assertThat(event1, containsString("[Query] thread_id=145"));
- assertThat(event1, containsString("SELECT u.id, u.name"));
- assertThat(event1, containsString("FROM users u"));
- assertThat(event1, containsString("WHERE u.active = 1"));
- assertThat(event1, containsString("ORDER BY u.name;"));
-
- // Event 2: query header + single-line SQL (2 lines grouped)
- final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, containsString("[Query] thread_id=146"));
- assertThat(event2, containsString("SELECT COUNT(*) FROM sessions;"));
- }
-
- @Test
- void parse_syslog_ise_multiline_groups_continuation_lines() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^<\\d+>[A-Z][a-z]{2}\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ void parse_syslog_ise_with_event_start_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^<\\d+>[A-Z][a-z]{2}\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}"));
+ lenient().when(config.getEventStartPattern()).thenReturn("^<\\d+>[A-Z][a-z]{2}\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}");
final String input =
- "<181>Jun 1 12:39:49 Infra-ISE CISE_Audit 0000000176 NOTICE Admin-Login: success\n" +
- "<181>Jun 1 12:39:49 Infra-ISE CISE_Audit 0000000177 NOTICE OpenAPI: Response={\\\n" +
- " \"version\" : \"1.0.0\",\\\n" +
- " \"status\" : \"ok\"\\\n" +
+ "<181>Jun 1 12:39:49 Infra-ISE Audit NOTICE Admin-Login: success\n" +
+ "<181>Jun 1 12:39:49 Infra-ISE Audit NOTICE OpenAPI: Response={\n" +
+ " \"version\" : \"1.0.0\"\n" +
"}, HttpCode=200\n" +
- "<181>Jun 1 12:40:15 Infra-ISE CISE_Audit 0000000178 NOTICE Config-Change: added\n";
+ "<181>Jun 1 12:40:15 Infra-ISE Audit NOTICE Config-Change: added\n";
createObjectUnderTest().parse(toInputStream(input), eventConsumer);
@@ -228,35 +162,26 @@ void parse_syslog_ise_multiline_groups_continuation_lines() throws IOException {
verify(eventConsumer, times(3)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: single-line syslog
assertThat(records.get(0).getData().get("message", String.class),
containsString("Admin-Login: success"));
-
- // Event 2: multiline syslog with JSON continuation (4 lines grouped)
final String event2 = records.get(1).getData().get("message", String.class);
assertThat(event2, containsString("OpenAPI: Response="));
assertThat(event2, containsString("\"version\" : \"1.0.0\""));
assertThat(event2, containsString("HttpCode=200"));
-
- // Event 3: single-line syslog
assertThat(records.get(2).getData().get("message", String.class),
containsString("Config-Change: added"));
}
@Test
- void parse_with_negate_false_groups_matching_lines_with_previous() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
- lenient().when(config.getNegate()).thenReturn(false);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ void parse_with_continuation_line_start_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\s+(at |\\.\\.\\.|Caused by:)"));
+ lenient().when(config.getContinuationLineStartPattern()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
final String input =
"java.lang.RuntimeException: error\n" +
" at com.example.A.method(A.java:1)\n" +
- " at com.example.B.method(B.java:2)\n" +
" Caused by: java.io.IOException\n" +
" at com.example.C.read(C.java:3)\n" +
- " ... 5 more\n" +
"Application recovered\n";
createObjectUnderTest().parse(toInputStream(input), eventConsumer);
@@ -265,31 +190,25 @@ void parse_with_negate_false_groups_matching_lines_with_previous() throws IOExce
verify(eventConsumer, times(2)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: exception + all matching stack frames (6 lines grouped)
final String event1 = records.get(0).getData().get("message", String.class);
assertThat(event1, containsString("RuntimeException: error"));
assertThat(event1, containsString("at com.example.A.method"));
assertThat(event1, containsString("Caused by: java.io.IOException"));
- assertThat(event1, containsString("... 5 more"));
-
- // Event 2: non-matching line on its own
assertThat(records.get(1).getData().get("message", String.class),
equalTo("Application recovered"));
}
@Test
- void parse_with_next_mode_prepends_continuation_to_following_event() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\s");
- lenient().when(config.getNegate()).thenReturn(false);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
+ void parse_with_event_end_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^---$"));
+ lenient().when(config.getEventEndPattern()).thenReturn("^---$");
final String input =
- " context-line-1\n" +
- " context-line-2\n" +
- "MAIN EVENT A\n" +
- " context-line-3\n" +
- "MAIN EVENT B\n";
+ "entry 1 line 1\n" +
+ "entry 1 line 2\n" +
+ "---\n" +
+ "entry 2 line 1\n" +
+ "---\n";
createObjectUnderTest().parse(toInputStream(input), eventConsumer);
@@ -297,136 +216,68 @@ void parse_with_next_mode_prepends_continuation_to_following_event() throws IOEx
verify(eventConsumer, times(2)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: continuation lines + first non-continuation
- final String event1 = records.get(0).getData().get("message", String.class);
- assertThat(event1, containsString("context-line-1"));
- assertThat(event1, containsString("context-line-2"));
- assertThat(event1, containsString("MAIN EVENT A"));
-
- // Event 2: continuation line + second non-continuation
- final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, containsString("context-line-3"));
- assertThat(event2, containsString("MAIN EVENT B"));
+ assertThat(records.get(0).getData().get("message", String.class),
+ equalTo("entry 1 line 1\nentry 1 line 2\n---"));
+ assertThat(records.get(1).getData().get("message", String.class),
+ equalTo("entry 2 line 1\n---"));
}
@Test
- void parse_respects_max_lines_limit() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- lenient().when(config.getMaxLines()).thenReturn(3);
+ void parse_with_continuation_end_pattern() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\s"));
+ lenient().when(config.getContinuationLineEndPattern()).thenReturn("^\\s");
final String input =
- "2024 start\n" +
- " line 2\n" +
- " line 3\n" +
- " line 4\n" +
- " line 5\n" +
- "2024 next event\n";
+ " context-line-1\n" +
+ " context-line-2\n" +
+ "MAIN EVENT A\n" +
+ " context-line-3\n" +
+ "MAIN EVENT B\n";
createObjectUnderTest().parse(toInputStream(input), eventConsumer);
final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(3)).accept(captor.capture());
+ verify(eventConsumer, times(2)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: first 3 lines (hit max_lines)
final String event1 = records.get(0).getData().get("message", String.class);
- assertThat(event1, equalTo("2024 start\n line 2\n line 3"));
-
- // Event 2: overflow lines
+ assertThat(event1, containsString("context-line-1"));
+ assertThat(event1, containsString("MAIN EVENT A"));
final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, equalTo(" line 4\n line 5"));
-
- // Event 3: next event
- assertThat(records.get(2).getData().get("message", String.class),
- equalTo("2024 next event"));
+ assertThat(event2, containsString("context-line-3"));
+ assertThat(event2, containsString("MAIN EVENT B"));
}
@Test
- void parse_respects_max_length_limit() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- lenient().when(config.getMaxLength()).thenReturn(25);
+ void parse_with_omit_matched_section() throws IOException {
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+"));
+ lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+");
+ lenient().when(config.getOmitMatchedSection()).thenReturn(true);
final String input =
- "2024 start here\n" +
- " long continuation line\n" +
- "2024 next\n";
+ "2024-01-01 ERROR something bad\n" +
+ " stack trace\n" +
+ "2024-01-02 INFO recovered\n";
createObjectUnderTest().parse(toInputStream(input), eventConsumer);
final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(3)).accept(captor.capture());
+ verify(eventConsumer, times(2)).accept(captor.capture());
final List> records = captor.getAllValues();
-
- // Event 1: flushed due to max_length before adding continuation
assertThat(records.get(0).getData().get("message", String.class),
- equalTo("2024 start here"));
-
- // Event 2: continuation line on its own
+ equalTo("ERROR something bad\n stack trace"));
assertThat(records.get(1).getData().get("message", String.class),
- equalTo(" long continuation line"));
-
- // Event 3: next event
- assertThat(records.get(2).getData().get("message", String.class),
- equalTo("2024 next"));
+ equalTo("INFO recovered"));
}
@Test
void parse_empty_input_produces_no_events() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}"));
+ lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}");
createObjectUnderTest().parse(toInputStream(""), eventConsumer);
verify(eventConsumer, times(0)).accept(ArgumentCaptor.forClass(Record.class).capture());
}
-
- @Test
- void parse_all_lines_are_single_events_when_all_match_pattern() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^\\d{4}");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
-
- final String input =
- "2024 event one\n" +
- "2024 event two\n" +
- "2024 event three\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(3)).accept(captor.capture());
-
- final List> records = captor.getAllValues();
- assertThat(records.get(0).getData().get("message", String.class), equalTo("2024 event one"));
- assertThat(records.get(1).getData().get("message", String.class), equalTo("2024 event two"));
- assertThat(records.get(2).getData().get("message", String.class), equalTo("2024 event three"));
- }
-
- @Test
- void parse_all_lines_form_single_event_when_none_match_pattern() throws IOException {
- lenient().when(config.getMatch()).thenReturn("^NEVER_MATCHES");
- lenient().when(config.getNegate()).thenReturn(true);
- lenient().when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
-
- final String input =
- "line one\n" +
- "line two\n" +
- "line three\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(1)).accept(captor.capture());
-
- assertThat(captor.getValue().getData().get("message", String.class),
- equalTo("line one\nline two\nline three"));
- }
}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java
index 6d74973abf..74c5f97b9a 100644
--- a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfigTest.java
@@ -10,12 +10,9 @@
package org.opensearch.dataprepper.plugins.codec.multiline;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-
-import java.lang.reflect.Field;
import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.MatcherAssert.assertThat;
class MultilineInputCodecConfigTest {
@@ -24,71 +21,102 @@ class MultilineInputCodecConfigTest {
void defaults_are_correct() {
final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- assertThat(config.getNegate(), equalTo(false));
- assertThat(config.getWhat(), equalTo(MultilineWhat.PREVIOUS));
+ assertThat(config.getEventStartPattern(), nullValue());
+ assertThat(config.getEventEndPattern(), nullValue());
+ assertThat(config.getContinuationLineStartPattern(), nullValue());
+ assertThat(config.getContinuationLineEndPattern(), nullValue());
+ assertThat(config.getOmitMatchedSection(), equalTo(false));
assertThat(config.getMaxLines(), equalTo(MultilineInputCodecConfig.DEFAULT_MAX_LINES));
assertThat(config.getMaxLength(), equalTo(MultilineInputCodecConfig.DEFAULT_MAX_LENGTH));
assertThat(config.getLineSeparator(), equalTo(MultilineInputCodecConfig.DEFAULT_LINE_SEPARATOR));
- assertThat(config.getMatch(), equalTo(null));
+ assertThat(config.getConfiguredPatternString(), nullValue());
}
@Test
- void getMatch_returns_configured_value() throws Exception {
- final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- setField(config, "match", "^\\d{4}");
- assertThat(config.getMatch(), equalTo("^\\d{4}"));
+ void isExactlyOnePatternSpecified_returns_true_for_event_start_pattern() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withEventStartPattern("^\\d{4}")
+ .build();
+ assertThat(config.isExactlyOnePatternSpecified(), equalTo(true));
}
@Test
- void isValidPattern_returns_true_for_valid_regex() throws Exception {
- final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- setField(config, "match", "^\\d{4}-\\d{2}-\\d{2}");
- assertThat(config.isValidPattern(), equalTo(true));
+ void isExactlyOnePatternSpecified_returns_true_for_event_end_pattern() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withEventEndPattern("^---$")
+ .build();
+ assertThat(config.isExactlyOnePatternSpecified(), equalTo(true));
}
@Test
- void isValidPattern_returns_false_for_invalid_regex() throws Exception {
- final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- setField(config, "match", "[invalid(");
- assertThat(config.isValidPattern(), equalTo(false));
+ void isExactlyOnePatternSpecified_returns_true_for_continuation_line_start_pattern() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withContinuationLineStartPattern("^\\s")
+ .build();
+ assertThat(config.isExactlyOnePatternSpecified(), equalTo(true));
}
@Test
- void isValidPattern_returns_false_for_null_match() {
+ void isExactlyOnePatternSpecified_returns_true_for_continuation_line_end_pattern() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withContinuationLineEndPattern("^\\s")
+ .build();
+ assertThat(config.isExactlyOnePatternSpecified(), equalTo(true));
+ }
+
+ @Test
+ void isExactlyOnePatternSpecified_returns_false_when_none_specified() {
final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ assertThat(config.isExactlyOnePatternSpecified(), equalTo(false));
+ }
+
+ @Test
+ void isExactlyOnePatternSpecified_returns_false_when_two_specified() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withEventStartPattern("^\\d{4}")
+ .withEventEndPattern("^---$")
+ .build();
+ assertThat(config.isExactlyOnePatternSpecified(), equalTo(false));
+ }
+
+ @Test
+ void isValidPattern_returns_true_for_valid_regex() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withEventStartPattern("^\\d{4}-\\d{2}-\\d{2}")
+ .build();
+ assertThat(config.isValidPattern(), equalTo(true));
+ }
+
+ @Test
+ void isValidPattern_returns_false_for_invalid_regex() {
+ final MultilineInputCodecConfig config = MultilineInputCodecConfig.builder()
+ .withEventStartPattern("[invalid(")
+ .build();
assertThat(config.isValidPattern(), equalTo(false));
}
@Test
- void isValidPattern_returns_false_for_empty_match() throws Exception {
+ void isValidPattern_returns_false_when_no_pattern_configured() {
final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- setField(config, "match", "");
assertThat(config.isValidPattern(), equalTo(false));
}
- @ParameterizedTest
- @ValueSource(ints = {1, 100, 1000})
- void getMaxLines_returns_configured_value(final int maxLines) throws Exception {
+ @Test
+ void getConfiguredPatternString_returns_null_when_none_specified() {
final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- setField(config, "maxLines", maxLines);
- assertThat(config.getMaxLines(), equalTo(maxLines));
+ assertThat(config.getConfiguredPatternString(), nullValue());
}
- @ParameterizedTest
- @ValueSource(ints = {1, 5000, 50000})
- void getMaxLength_returns_configured_value(final int maxLength) throws Exception {
+ @Test
+ void isValidEncoding_returns_true_for_default_utf8() {
final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
- setField(config, "maxLength", maxLength);
- assertThat(config.getMaxLength(), equalTo(maxLength));
+ assertThat(config.isValidEncoding(), equalTo(true));
}
- private void setField(final Object object, final String fieldName, final Object value) throws Exception {
- final Field field = object.getClass().getDeclaredField(fieldName);
- try {
- field.setAccessible(true);
- field.set(object, value);
- } finally {
- field.setAccessible(false);
- }
+ @Test
+ void isValidEncoding_returns_true_for_valid_charset() {
+ final MultilineInputCodecConfig config = new MultilineInputCodecConfig();
+ assertThat(config.isValidEncoding(), equalTo(true));
+ assertThat(config.getEncoding().name(), equalTo("UTF-8"));
}
}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
index 5738a82da7..a7f66c8306 100644
--- a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
@@ -26,6 +26,7 @@
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.notNullValue;
@@ -66,80 +67,39 @@ void constructor_throws_if_eventFactory_is_null() {
}
@Test
- void constructor_throws_if_match_pattern_is_invalid() {
- when(config.getMatch()).thenReturn("[invalid(");
+ void constructor_throws_if_no_pattern_configured() {
+ when(config.getCompiledPattern()).thenReturn(null);
assertThrows(IllegalArgumentException.class, this::createObjectUnderTest);
}
@Test
- void parse_throws_if_inputStream_is_null() {
- when(config.getMatch()).thenReturn("^\\S");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final MultilineInputCodec codec = createObjectUnderTest();
- assertThrows(NullPointerException.class, () -> codec.parse(null, events -> {}));
- }
-
- @Test
- void parse_throws_if_consumer_is_null() {
- when(config.getMatch()).thenReturn("^\\S");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final MultilineInputCodec codec = createObjectUnderTest();
- assertThrows(NullPointerException.class, () -> codec.parse(toInputStream("test"), null));
- }
-
- @Test
- void parse_empty_input_produces_no_events() throws IOException {
- when(config.getMatch()).thenReturn("^\\S");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
+ void constructor_throws_if_pattern_is_invalid() {
+ when(config.getCompiledPattern()).thenReturn(null);
- final List> events = parseContent("");
- assertThat(events.size(), equalTo(0));
+ assertThrows(IllegalArgumentException.class, this::createObjectUnderTest);
}
- @Test
- void parse_single_line_produces_one_event() throws IOException {
- when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ private void setupConfig(final String patternStr) {
+ when(config.getCompiledPattern()).thenReturn(Pattern.compile(patternStr));
when(config.getMaxLines()).thenReturn(500);
when(config.getMaxLength()).thenReturn(10000);
when(config.getLineSeparator()).thenReturn("\n");
-
- final List> events = parseContent("2024-01-01 INFO single line\n");
- assertThat(events.size(), equalTo(1));
- assertThat(events.get(0).getData().get("message", String.class), equalTo("2024-01-01 INFO single line"));
+ when(config.getOmitMatchedSection()).thenReturn(false);
+ when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
}
@Nested
- class PreviousModeWithNegateTrue {
+ class EventStartMode {
@BeforeEach
void setUp() {
- when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
+ setupConfig("^\\d{4}-\\d{2}-\\d{2}");
+ when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
}
@Test
- void groups_java_stack_trace_with_timestamp_start() throws IOException {
+ void groups_stack_trace_with_timestamp_start() throws IOException {
final String input = "2024-01-01 ERROR NullPointerException\n" +
" at com.example.Service.method(Service.java:42)\n" +
" at com.example.Main.run(Main.java:10)\n" +
@@ -157,7 +117,7 @@ void groups_java_stack_trace_with_timestamp_start() throws IOException {
}
@Test
- void multiple_single_line_events_each_matching_pattern() throws IOException {
+ void multiple_single_line_events() throws IOException {
final String input = "2024-01-01 INFO line one\n" +
"2024-01-02 INFO line two\n" +
"2024-01-03 INFO line three\n";
@@ -171,64 +131,115 @@ void multiple_single_line_events_each_matching_pattern() throws IOException {
}
@Test
- void continuation_lines_at_beginning_are_grouped_as_first_event() throws IOException {
- final String input = " orphan continuation line 1\n" +
- " orphan continuation line 2\n" +
- "2024-01-01 INFO first real entry\n";
+ void continuation_lines_at_beginning_grouped_as_first_event() throws IOException {
+ final String input = " orphan line 1\n" +
+ " orphan line 2\n" +
+ "2024-01-01 INFO first entry\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(2));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo(" orphan continuation line 1\n orphan continuation line 2"));
+ equalTo(" orphan line 1\n orphan line 2"));
assertThat(events.get(1).getData().get("message", String.class),
- equalTo("2024-01-01 INFO first real entry"));
+ equalTo("2024-01-01 INFO first entry"));
}
@Test
- void last_event_with_continuations_flushed_at_end_of_stream() throws IOException {
- final String input = "2024-01-01 ERROR Exception occurred\n" +
- " at com.example.Foo.bar(Foo.java:1)\n" +
- " at com.example.Baz.run(Baz.java:2)\n";
+ void last_event_flushed_at_end_of_stream() throws IOException {
+ final String input = "2024-01-01 ERROR Exception\n" +
+ " at com.example.Foo.bar(Foo.java:1)\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(1));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo("2024-01-01 ERROR Exception occurred\n" +
- " at com.example.Foo.bar(Foo.java:1)\n" +
- " at com.example.Baz.run(Baz.java:2)"));
+ equalTo("2024-01-01 ERROR Exception\n at com.example.Foo.bar(Foo.java:1)"));
+ }
+
+ @Test
+ void empty_input_produces_no_events() throws IOException {
+ final List> events = parseContent("");
+ assertThat(events.size(), equalTo(0));
}
@Test
- void no_lines_match_pattern_produces_single_event() throws IOException {
- final String input = " continuation line 1\n" +
- " continuation line 2\n" +
- " continuation line 3\n";
+ void no_lines_match_produces_single_event() throws IOException {
+ final String input = " line 1\n line 2\n line 3\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(1));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo(" continuation line 1\n continuation line 2\n continuation line 3"));
+ equalTo(" line 1\n line 2\n line 3"));
}
}
@Nested
- class PreviousModeWithNegateFalse {
+ class EventEndMode {
@BeforeEach
void setUp() {
- when(config.getMatch()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
- when(config.getNegate()).thenReturn(false);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
+ setupConfig("^---$");
+ when(config.getEventEndPattern()).thenReturn("^---$");
}
@Test
- void groups_stack_trace_lines_matching_pattern_with_previous() throws IOException {
+ void groups_lines_until_separator() throws IOException {
+ final String input = "line 1\n" +
+ "line 2\n" +
+ "---\n" +
+ "line 3\n" +
+ "line 4\n" +
+ "---\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("line 1\nline 2\n---"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("line 3\nline 4\n---"));
+ }
+
+ @Test
+ void trailing_lines_without_end_marker_flushed() throws IOException {
+ final String input = "line 1\n" +
+ "---\n" +
+ "line 2\n" +
+ "line 3\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("line 1\n---"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("line 2\nline 3"));
+ }
+
+ @Test
+ void single_line_matching_end_pattern() throws IOException {
+ final String input = "---\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(1));
+ assertThat(events.get(0).getData().get("message", String.class), equalTo("---"));
+ }
+ }
+
+ @Nested
+ class ContinuationStartMode {
+
+ @BeforeEach
+ void setUp() {
+ setupConfig("^\\s+(at |\\.\\.\\.|Caused by:)");
+ when(config.getContinuationLineStartPattern()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
+ }
+
+ @Test
+ void groups_stack_trace_lines_with_previous() throws IOException {
final String input = "java.lang.NullPointerException: null\n" +
" at com.example.Service.process(Service.java:42)\n" +
" at com.example.Main.run(Main.java:10)\n" +
@@ -246,7 +257,7 @@ void groups_stack_trace_lines_matching_pattern_with_previous() throws IOExceptio
}
@Test
- void caused_by_is_grouped_with_previous() throws IOException {
+ void caused_by_grouped_with_previous() throws IOException {
final String input = "java.lang.RuntimeException: error\n" +
" at com.example.A.method(A.java:1)\n" +
" Caused by: java.io.IOException\n" +
@@ -265,16 +276,11 @@ void caused_by_is_grouped_with_previous() throws IOException {
}
@Nested
- class NextMode {
+ class ContinuationEndMode {
@BeforeEach
void setUp() {
- when(config.getMatch()).thenReturn("^\\s");
- when(config.getNegate()).thenReturn(false);
- when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
+ setupConfig("^\\s");
}
@Test
@@ -291,7 +297,7 @@ void continuation_lines_prepended_to_next_event() throws IOException {
}
@Test
- void multiple_groups_in_next_mode() throws IOException {
+ void multiple_groups() throws IOException {
final String input = " context A\n" +
"EVENT A\n" +
" context B\n" +
@@ -307,25 +313,22 @@ void multiple_groups_in_next_mode() throws IOException {
}
@Test
- void trailing_continuation_lines_flushed_at_end_of_stream() throws IOException {
+ void trailing_continuation_lines_flushed() throws IOException {
final String input = "EVENT A\n" +
- " trailing context 1\n" +
- " trailing context 2\n";
+ " trailing 1\n" +
+ " trailing 2\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(2));
- assertThat(events.get(0).getData().get("message", String.class),
- equalTo("EVENT A"));
+ assertThat(events.get(0).getData().get("message", String.class), equalTo("EVENT A"));
assertThat(events.get(1).getData().get("message", String.class),
- equalTo(" trailing context 1\n trailing context 2"));
+ equalTo(" trailing 1\n trailing 2"));
}
@Test
- void no_continuation_lines_each_line_is_separate_event() throws IOException {
- final String input = "EVENT A\n" +
- "EVENT B\n" +
- "EVENT C\n";
+ void no_continuation_lines_each_is_separate_event() throws IOException {
+ final String input = "EVENT A\nEVENT B\nEVENT C\n";
final List> events = parseContent(input);
@@ -337,60 +340,69 @@ void no_continuation_lines_each_line_is_separate_event() throws IOException {
}
@Nested
- class NextModeMaxLinesLimit {
+ class OmitMatchedSection {
- @BeforeEach
- void setUp() {
- when(config.getMatch()).thenReturn("^\\d{4}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
- when(config.getMaxLines()).thenReturn(3);
+ @Test
+ void event_start_pattern_omits_matched_section() throws IOException {
+ when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+"));
+ when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+");
+ when(config.getMaxLines()).thenReturn(500);
when(config.getMaxLength()).thenReturn(10000);
when(config.getLineSeparator()).thenReturn("\n");
- }
+ when(config.getOmitMatchedSection()).thenReturn(true);
+ when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
- @Test
- void flushes_continuation_lines_when_max_lines_exceeded_in_next_mode() throws IOException {
- final String input = " ctx 1\n" +
- " ctx 2\n" +
- " ctx 3\n" +
- " ctx 4\n" +
- "2024 EVENT\n";
+ final String input = "2024-01-01 ERROR something\n" +
+ " stack trace line\n" +
+ "2024-01-02 INFO recovered\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(2));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo(" ctx 1\n ctx 2\n ctx 3"));
+ equalTo("ERROR something\n stack trace line"));
assertThat(events.get(1).getData().get("message", String.class),
- equalTo(" ctx 4\n2024 EVENT"));
+ equalTo("INFO recovered"));
}
- }
-
- @Nested
- class NextModeWithNegateTrue {
- @BeforeEach
- void setUp() {
- when(config.getMatch()).thenReturn("^\\[");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.NEXT);
+ @Test
+ void event_end_pattern_omits_matched_section() throws IOException {
+ when(config.getCompiledPattern()).thenReturn(Pattern.compile("^---$"));
+ when(config.getEventEndPattern()).thenReturn("^---$");
when(config.getMaxLines()).thenReturn(500);
when(config.getMaxLength()).thenReturn(10000);
when(config.getLineSeparator()).thenReturn("\n");
+ when(config.getOmitMatchedSection()).thenReturn(true);
+ when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
+
+ final String input = "line 1\nline 2\n---\nline 3\n---\n";
+
+ final List> events = parseContent(input);
+
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("line 1\nline 2\n"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("line 3\n"));
}
@Test
- void lines_not_matching_pattern_are_prepended_to_next_matching_line() throws IOException {
- final String input = "preamble line 1\n" +
- "preamble line 2\n" +
- "[2024-01-01] Log entry\n";
+ void omit_false_preserves_matched_section() throws IOException {
+ when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+"));
+ when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+");
+ when(config.getMaxLines()).thenReturn(500);
+ when(config.getMaxLength()).thenReturn(10000);
+ when(config.getLineSeparator()).thenReturn("\n");
+ when(config.getOmitMatchedSection()).thenReturn(false);
+ when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
+
+ final String input = "2024-01-01 ERROR something\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(1));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo("preamble line 1\npreamble line 2\n[2024-01-01] Log entry"));
+ equalTo("2024-01-01 ERROR something"));
}
}
@@ -399,32 +411,28 @@ class MaxLinesLimit {
@BeforeEach
void setUp() {
- when(config.getMatch()).thenReturn("^\\d{4}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}"));
+ when(config.getEventStartPattern()).thenReturn("^\\d{4}");
when(config.getMaxLines()).thenReturn(3);
when(config.getMaxLength()).thenReturn(10000);
when(config.getLineSeparator()).thenReturn("\n");
+ when(config.getOmitMatchedSection()).thenReturn(false);
+ when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
}
@Test
void flushes_event_when_max_lines_exceeded() throws IOException {
- final String input = "2024-01-01 ERROR start\n" +
- " line 2\n" +
- " line 3\n" +
- " line 4\n" +
- " line 5\n" +
- "2024-01-02 INFO next\n";
+ final String input = "2024 start\n line 2\n line 3\n line 4\n line 5\n2024 next\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(3));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo("2024-01-01 ERROR start\n line 2\n line 3"));
+ equalTo("2024 start\n line 2\n line 3"));
assertThat(events.get(1).getData().get("message", String.class),
equalTo(" line 4\n line 5"));
assertThat(events.get(2).getData().get("message", String.class),
- equalTo("2024-01-02 INFO next"));
+ equalTo("2024 next"));
}
}
@@ -433,25 +441,22 @@ class MaxLengthLimit {
@BeforeEach
void setUp() {
- when(config.getMatch()).thenReturn("^\\d{4}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
+ when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}"));
+ when(config.getEventStartPattern()).thenReturn("^\\d{4}");
when(config.getMaxLines()).thenReturn(500);
when(config.getMaxLength()).thenReturn(30);
when(config.getLineSeparator()).thenReturn("\n");
+ when(config.getOmitMatchedSection()).thenReturn(false);
+ when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
}
@Test
void flushes_event_when_max_length_exceeded() throws IOException {
- final String input = "2024 start line here\n" +
- " continuation is long\n" +
- "2024 next entry\n";
+ final String input = "2024 start line here\n continuation is long\n2024 next entry\n";
final List> events = parseContent(input);
assertThat(events.size(), equalTo(3));
- // First event is "2024 start line here" (20 chars)
- // Adding "\n continuation is long" would be 20+1+22=43 > 30, so it flushes
assertThat(events.get(0).getData().get("message", String.class),
equalTo("2024 start line here"));
assertThat(events.get(1).getData().get("message", String.class),
@@ -461,163 +466,10 @@ void flushes_event_when_max_length_exceeded() throws IOException {
}
}
- @Nested
- class CustomLineSeparator {
-
- @BeforeEach
- void setUp() {
- when(config.getMatch()).thenReturn("^\\d{4}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\r\n");
- }
-
- @Test
- void uses_custom_line_separator_when_joining() throws IOException {
- final String input = "2024-01-01 ERROR start\n" +
- " continuation\n" +
- "2024-01-02 INFO next\n";
-
- final List> events = parseContent(input);
-
- assertThat(events.size(), equalTo(2));
- assertThat(events.get(0).getData().get("message", String.class),
- equalTo("2024-01-01 ERROR start\r\n continuation"));
- }
- }
-
- @Nested
- class RealWorldScenarios {
-
- @Test
- void python_traceback() throws IOException {
- when(config.getMatch()).thenReturn("^Traceback|^\\s|^\\w+Error");
- when(config.getNegate()).thenReturn(false);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final String input = "2024-01-01 INFO Starting application\n" +
- "Traceback (most recent call last):\n" +
- " File \"main.py\", line 10, in \n" +
- " result = process()\n" +
- " File \"service.py\", line 5, in process\n" +
- " return 1/0\n" +
- "ZeroDivisionError: division by zero\n" +
- "2024-01-01 INFO Recovered\n";
-
- final List> events = parseContent(input);
-
- assertThat(events.size(), equalTo(2));
- assertThat(events.get(0).getData().get("message", String.class),
- equalTo("2024-01-01 INFO Starting application\n" +
- "Traceback (most recent call last):\n" +
- " File \"main.py\", line 10, in \n" +
- " result = process()\n" +
- " File \"service.py\", line 5, in process\n" +
- " return 1/0\n" +
- "ZeroDivisionError: division by zero"));
- assertThat(events.get(1).getData().get("message", String.class),
- equalTo("2024-01-01 INFO Recovered"));
- }
-
- @Test
- void multiline_xml_in_logs() throws IOException {
- when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final String input = "2024-01-01 Request body:\n" +
- "\n" +
- " value\n" +
- "\n" +
- "2024-01-01 Response sent\n";
-
- final List> events = parseContent(input);
-
- assertThat(events.size(), equalTo(2));
- assertThat(events.get(0).getData().get("message", String.class),
- equalTo("2024-01-01 Request body:\n\n value\n"));
- assertThat(events.get(1).getData().get("message", String.class),
- equalTo("2024-01-01 Response sent"));
- }
-
- @Test
- void log4j_multiline_with_nested_exception() throws IOException {
- when(config.getMatch()).thenReturn("^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final String input = "2024-01-01T12:00:00 ERROR Application failed\n" +
- "java.lang.RuntimeException: Outer\n" +
- "\tat com.example.A.run(A.java:10)\n" +
- "Caused by: java.io.IOException: Inner\n" +
- "\tat com.example.B.read(B.java:20)\n" +
- "\t... 5 more\n" +
- "2024-01-01T12:00:01 INFO Shutdown complete\n";
-
- final List> events = parseContent(input);
-
- assertThat(events.size(), equalTo(2));
- assertThat(events.get(0).getData().get("message", String.class),
- equalTo("2024-01-01T12:00:00 ERROR Application failed\n" +
- "java.lang.RuntimeException: Outer\n" +
- "\tat com.example.A.run(A.java:10)\n" +
- "Caused by: java.io.IOException: Inner\n" +
- "\tat com.example.B.read(B.java:20)\n" +
- "\t... 5 more"));
- }
- }
-
- @Nested
- class IsContinuationLineTests {
-
- @Test
- void negate_false_matching_line_is_continuation() {
- when(config.getMatch()).thenReturn("^\\s");
- when(config.getNegate()).thenReturn(false);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final MultilineInputCodec codec = createObjectUnderTest();
- assertThat(codec.isContinuationLine(" indented"), equalTo(true));
- assertThat(codec.isContinuationLine("not indented"), equalTo(false));
- }
-
- @Test
- void negate_true_non_matching_line_is_continuation() {
- when(config.getMatch()).thenReturn("^\\d{4}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
-
- final MultilineInputCodec codec = createObjectUnderTest();
- assertThat(codec.isContinuationLine(" no timestamp"), equalTo(true));
- assertThat(codec.isContinuationLine("2024 has timestamp"), equalTo(false));
- }
- }
-
@Test
void event_metadata_is_log_type() throws IOException {
- when(config.getMatch()).thenReturn("^\\d{4}");
- when(config.getNegate()).thenReturn(true);
- when(config.getWhat()).thenReturn(MultilineWhat.PREVIOUS);
- when(config.getMaxLines()).thenReturn(500);
- when(config.getMaxLength()).thenReturn(10000);
- when(config.getLineSeparator()).thenReturn("\n");
+ setupConfig("^\\d{4}");
+ when(config.getEventStartPattern()).thenReturn("^\\d{4}");
final List> events = parseContent("2024-01-01 test\n");
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java
deleted file mode 100644
index 9928024685..0000000000
--- a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineWhatTest.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
- *
- * The OpenSearch Contributors require contributions made to
- * this file be licensed under the Apache-2.0 license or a
- * compatible open source license.
- */
-
-package org.opensearch.dataprepper.plugins.codec.multiline;
-
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-
-import static org.hamcrest.CoreMatchers.equalTo;
-import static org.hamcrest.MatcherAssert.assertThat;
-import static org.junit.jupiter.api.Assertions.assertThrows;
-
-class MultilineWhatTest {
-
- @Test
- void fromString_returns_PREVIOUS_for_previous() {
- assertThat(MultilineWhat.fromString("previous"), equalTo(MultilineWhat.PREVIOUS));
- }
-
- @Test
- void fromString_returns_NEXT_for_next() {
- assertThat(MultilineWhat.fromString("next"), equalTo(MultilineWhat.NEXT));
- }
-
- @Test
- void fromString_is_case_insensitive() {
- assertThat(MultilineWhat.fromString("PREVIOUS"), equalTo(MultilineWhat.PREVIOUS));
- assertThat(MultilineWhat.fromString("NEXT"), equalTo(MultilineWhat.NEXT));
- assertThat(MultilineWhat.fromString("Previous"), equalTo(MultilineWhat.PREVIOUS));
- }
-
- @ParameterizedTest
- @ValueSource(strings = {"invalid", "before", "after", ""})
- void fromString_throws_for_invalid_value(final String value) {
- assertThrows(IllegalArgumentException.class, () -> MultilineWhat.fromString(value));
- }
-
- @Test
- void toString_returns_correct_values() {
- assertThat(MultilineWhat.PREVIOUS.toString(), equalTo("previous"));
- assertThat(MultilineWhat.NEXT.toString(), equalTo("next"));
- }
-}
From 71da7e028afef54c91a8ebf62668f39753fb90dd Mon Sep 17 00:00:00 2001
From: Manisha Yadav
Date: Wed, 17 Jun 2026 09:50:28 +0000
Subject: [PATCH 3/3] Address review comments on new design implemenatation
Signed-off-by: Manisha Yadav
---
.../multiline-codecs/build.gradle | 1 +
.../codec/multiline/MultilineInputCodec.java | 188 +++----------
.../multiline/MultilineInputCodecConfig.java | 18 +-
.../codec/multiline/MultilineCodecsIT.java | 246 ++++--------------
.../multiline/MultilineInputCodecTest.java | 11 +-
.../continuation-line-end-pattern.yaml | 15 ++
.../continuation-line-start-pattern.yaml | 15 ++
.../codec/multiline/event-end-pattern.yaml | 15 ++
.../codec/multiline/event-start-pattern.yaml | 15 ++
.../codec/multiline/omit-matched-section.yaml | 16 ++
10 files changed, 172 insertions(+), 368 deletions(-)
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-end-pattern.yaml
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-start-pattern.yaml
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-end-pattern.yaml
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-start-pattern.yaml
create mode 100644 data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/omit-matched-section.yaml
diff --git a/data-prepper-plugins/multiline-codecs/build.gradle b/data-prepper-plugins/multiline-codecs/build.gradle
index ade99e2e54..65a8a97804 100644
--- a/data-prepper-plugins/multiline-codecs/build.gradle
+++ b/data-prepper-plugins/multiline-codecs/build.gradle
@@ -12,4 +12,5 @@ dependencies {
implementation 'com.fasterxml.jackson.core:jackson-annotations'
testImplementation project(':data-prepper-plugins:common')
testImplementation project(':data-prepper-test:test-event')
+ testImplementation project(':data-prepper-test:plugin-test-framework')
}
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
index e3e5e1dc9d..b345a2e1dd 100644
--- a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodec.java
@@ -54,7 +54,8 @@ public class MultilineInputCodec implements InputCodec {
private static final String MESSAGE_FIELD_NAME = "message";
private final Pattern pattern;
- private final MultilineMode mode;
+ private final boolean boundaryOnMatch;
+ private final boolean flushAfter;
private final boolean omitMatchedSection;
private final int maxLines;
private final int maxLength;
@@ -72,7 +73,9 @@ public MultilineInputCodec(final MultilineInputCodecConfig config, final EventFa
throw new IllegalArgumentException("A valid pattern must be configured");
}
- this.mode = resolveMode(config);
+ final MultilineMode mode = resolveMode(config);
+ this.boundaryOnMatch = (mode == MultilineMode.EVENT_START || mode == MultilineMode.EVENT_END);
+ this.flushAfter = (mode == MultilineMode.EVENT_END || mode == MultilineMode.CONTINUATION_END);
this.omitMatchedSection = config.getOmitMatchedSection();
this.maxLines = config.getMaxLines();
this.maxLength = config.getMaxLength();
@@ -97,190 +100,59 @@ public void parse(final InputStream inputStream, final Consumer> e
Objects.requireNonNull(inputStream, "inputStream must not be null");
Objects.requireNonNull(eventConsumer, "eventConsumer must not be null");
- try (final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, encoding))) {
- switch (mode) {
- case EVENT_START:
- parseEventStartMode(reader, eventConsumer);
- break;
- case EVENT_END:
- parseEventEndMode(reader, eventConsumer);
- break;
- case CONTINUATION_START:
- parseContinuationStartMode(reader, eventConsumer);
- break;
- case CONTINUATION_END:
- parseContinuationEndMode(reader, eventConsumer);
- break;
- default:
- throw new IllegalStateException("Unknown multiline mode: " + mode);
- }
- }
+ final BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, encoding));
+ parseLines(reader, eventConsumer);
}
- /**
- * EVENT_START mode: A new event begins at each line matching the pattern.
- * Non-matching lines are continuations of the preceding event.
- */
- private void parseEventStartMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
+ private void parseLines(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
final StringBuilder buffer = new StringBuilder();
int lineCount = 0;
String line;
while ((line = reader.readLine()) != null) {
final boolean matches = pattern.matcher(line).find();
+ final boolean isBoundary = (boundaryOnMatch == matches);
- if (matches || shouldFlush(buffer, lineCount, line)) {
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
- lineCount = 0;
- }
- }
-
- if (buffer.length() > 0) {
- buffer.append(lineSeparator);
- }
- buffer.append(processLine(line, matches));
- lineCount++;
- }
-
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
- }
- }
-
- /**
- * EVENT_END mode: An event ends at each line matching the pattern (inclusive).
- * The matching line is included in the current event, then a new event begins.
- */
- private void parseEventEndMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
- final StringBuilder buffer = new StringBuilder();
- int lineCount = 0;
- String line;
-
- while ((line = reader.readLine()) != null) {
- final boolean matches = pattern.matcher(line).find();
-
- if (shouldFlush(buffer, lineCount, line)) {
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
- lineCount = 0;
- }
+ if ((!flushAfter && isBoundary) || shouldFlush(buffer, lineCount, line)) {
+ flushIfNonEmpty(buffer, eventConsumer);
+ lineCount = 0;
}
- if (buffer.length() > 0) {
- buffer.append(lineSeparator);
- }
- buffer.append(processLine(line, matches));
+ appendLineToBuffer(buffer, processLine(line, matches));
lineCount++;
- if (matches) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
+ if (flushAfter && isBoundary) {
+ flushIfNonEmpty(buffer, eventConsumer);
lineCount = 0;
}
}
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
- }
+ flushIfNonEmpty(buffer, eventConsumer);
}
- /**
- * CONTINUATION_START mode: Lines matching the pattern are continuations of the previous event.
- * Non-matching lines start new events.
- */
- private void parseContinuationStartMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
- final StringBuilder buffer = new StringBuilder();
- int lineCount = 0;
- String line;
-
- while ((line = reader.readLine()) != null) {
- final boolean matches = pattern.matcher(line).find();
-
- if (!matches || shouldFlush(buffer, lineCount, line)) {
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
- lineCount = 0;
- }
- }
-
- if (buffer.length() > 0) {
- buffer.append(lineSeparator);
- }
- buffer.append(processLine(line, matches));
- lineCount++;
- }
-
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
+ private String processLine(final String line, final boolean matches) {
+ if (!omitMatchedSection || !matches) {
+ return line;
}
+ final Matcher matcher = pattern.matcher(line);
+ return matcher.replaceFirst("");
}
- /**
- * CONTINUATION_END mode: Lines matching the pattern are prepended to the next event.
- * Non-matching lines complete the current event.
- */
- private void parseContinuationEndMode(final BufferedReader reader, final Consumer> eventConsumer) throws IOException {
- final StringBuilder buffer = new StringBuilder();
- int lineCount = 0;
- boolean bufferHasNonContinuation = false;
- String line;
-
- while ((line = reader.readLine()) != null) {
- final boolean matches = pattern.matcher(line).find();
-
- if (!matches) {
- if (bufferHasNonContinuation) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
- lineCount = 0;
- bufferHasNonContinuation = false;
- }
- if (buffer.length() > 0) {
- buffer.append(lineSeparator);
- }
- buffer.append(processLine(line, false));
- lineCount++;
- bufferHasNonContinuation = true;
- continue;
- }
-
- if (bufferHasNonContinuation) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
- lineCount = 0;
- bufferHasNonContinuation = false;
- }
-
- if (shouldFlush(buffer, lineCount, line)) {
- if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
- buffer.setLength(0);
- lineCount = 0;
- }
- }
-
- if (buffer.length() > 0) {
- buffer.append(lineSeparator);
- }
- buffer.append(processLine(line, matches));
- lineCount++;
+ private void appendLineToBuffer(final StringBuilder buffer, final String processedLine) {
+ if (processedLine.isEmpty()) {
+ return;
}
-
if (buffer.length() > 0) {
- emitEvent(buffer.toString(), eventConsumer);
+ buffer.append(lineSeparator);
}
+ buffer.append(processedLine);
}
- private String processLine(final String line, final boolean matches) {
- if (!omitMatchedSection || !matches) {
- return line;
+ private void flushIfNonEmpty(final StringBuilder buffer, final Consumer> eventConsumer) {
+ if (buffer.length() > 0) {
+ emitEvent(buffer.toString(), eventConsumer);
+ buffer.setLength(0);
}
- final Matcher matcher = pattern.matcher(line);
- return matcher.replaceFirst("");
}
/**
diff --git a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
index 9eb76ce4fd..4bea8356e7 100644
--- a/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
+++ b/data-prepper-plugins/multiline-codecs/src/main/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecConfig.java
@@ -77,7 +77,6 @@ public class MultilineInputCodecConfig {
private String encoding = StandardCharsets.UTF_8.name();
private Pattern compiledPattern;
- private Charset encodingCharset;
public String getEventStartPattern() {
return eventStartPattern;
@@ -112,22 +111,23 @@ public String getLineSeparator() {
}
/**
- * Returns the validated Charset. The encoding is validated once during
- * bean validation and stored to avoid repeated parsing.
+ * Returns the validated Charset, compiled on first access.
*
- * @return The validated Charset.
+ * @return The Charset.
*/
public Charset getEncoding() {
- return encodingCharset;
+ return Charset.forName(encoding);
}
/**
- * Returns the compiled regex pattern. The pattern is compiled once during validation
- * and reused to avoid duplicate compilation.
+ * Returns the compiled regex pattern, compiled on first access.
*
* @return The compiled Pattern.
*/
public Pattern getCompiledPattern() {
+ if (compiledPattern == null) {
+ compiledPattern = Pattern.compile(getConfiguredPatternString());
+ }
return compiledPattern;
}
@@ -149,7 +149,7 @@ boolean isValidPattern() {
return false;
}
try {
- compiledPattern = Pattern.compile(patternString);
+ Pattern.compile(patternString);
return true;
} catch (final PatternSyntaxException e) {
return false;
@@ -162,7 +162,7 @@ boolean isValidEncoding() {
return false;
}
try {
- encodingCharset = Charset.forName(encoding);
+ Charset.forName(encoding);
return true;
} catch (final IllegalCharsetNameException | UnsupportedCharsetException e) {
return false;
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
index f81b4fb424..ad67ec24df 100644
--- a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineCodecsIT.java
@@ -9,64 +9,36 @@
package org.opensearch.dataprepper.plugins.codec.multiline;
-import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.extension.ExtendWith;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Mock;
-import org.mockito.junit.jupiter.MockitoExtension;
-import org.opensearch.dataprepper.event.TestEventFactory;
+import org.opensearch.dataprepper.model.codec.InputCodec;
import org.opensearch.dataprepper.model.event.Event;
-import org.opensearch.dataprepper.model.event.EventFactory;
import org.opensearch.dataprepper.model.record.Record;
+import org.opensearch.dataprepper.test.plugins.DataPrepperPluginTest;
+import org.opensearch.dataprepper.test.plugins.PluginConfigurationFile;
+import org.opensearch.dataprepper.test.plugins.junit.BaseDataPrepperPluginStandardTestSuite;
import java.io.ByteArrayInputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.List;
-import java.util.function.Consumer;
-import java.util.regex.Pattern;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
-import static org.mockito.Mockito.lenient;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-@ExtendWith(MockitoExtension.class)
-public class MultilineCodecsIT {
+@DataPrepperPluginTest(pluginName = "multiline", pluginType = InputCodec.class)
+public class MultilineCodecsIT extends BaseDataPrepperPluginStandardTestSuite {
- @Mock
- private MultilineInputCodecConfig config;
-
- @Mock
- private Consumer> eventConsumer;
-
- private final EventFactory eventFactory = TestEventFactory.getTestEventFactory();
-
- @BeforeEach
- void setUp() {
- lenient().when(config.getMaxLines()).thenReturn(500);
- lenient().when(config.getMaxLength()).thenReturn(50000);
- lenient().when(config.getLineSeparator()).thenReturn("\n");
- lenient().when(config.getOmitMatchedSection()).thenReturn(false);
- lenient().when(config.getEncoding()).thenReturn(StandardCharsets.UTF_8);
- }
-
- private MultilineInputCodec createObjectUnderTest() {
- return new MultilineInputCodec(config, eventFactory);
- }
-
- private InputStream toInputStream(final String content) {
- return new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
+ private List> parseContent(final InputCodec codec, final String content) throws IOException {
+ final List> events = new ArrayList<>();
+ codec.parse(new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8)), events::add);
+ return events;
}
@Test
- void parse_java_stack_trace_with_event_start_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"));
- lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
+ void parse_java_stack_trace_with_event_start_pattern(
+ @PluginConfigurationFile("event-start-pattern.yaml") final InputCodec codec) throws IOException {
final String input =
"2024-01-15 10:23:45.123 ERROR [main] com.example.UserService - Request failed\n" +
@@ -77,105 +49,40 @@ void parse_java_stack_trace_with_event_start_pattern() throws IOException {
"\tat com.mysql.jdbc.Connection.connect(Connection.java:456)\n" +
"2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying\n";
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+ final List> events = parseContent(codec, input);
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
-
- final List> records = captor.getAllValues();
- final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(events.size(), equalTo(2));
+ final String event1 = events.get(0).getData().get("message", String.class);
assertThat(event1, containsString("NullPointerException"));
assertThat(event1, containsString("at com.example.UserService.getUser"));
assertThat(event1, containsString("Caused by: java.sql.SQLException"));
- assertThat(records.get(1).getData().get("message", String.class),
+ assertThat(events.get(1).getData().get("message", String.class),
equalTo("2024-01-15 10:23:45.456 INFO [main] com.example.UserService - Retrying"));
}
@Test
- void parse_python_traceback_with_event_start_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"));
- lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
+ void parse_with_event_end_pattern(
+ @PluginConfigurationFile("event-end-pattern.yaml") final InputCodec codec) throws IOException {
final String input =
- "2024-03-20 08:15:00,123 INFO Starting application\n" +
- "2024-03-20 08:15:02,789 ERROR Unhandled exception\n" +
- "Traceback (most recent call last):\n" +
- " File \"/app/worker.py\", line 45, in process\n" +
- "ValueError: invalid literal for int()\n" +
- "2024-03-20 08:15:03,456 INFO Recovered\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(3)).accept(captor.capture());
-
- final List> records = captor.getAllValues();
- assertThat(records.get(0).getData().get("message", String.class),
- equalTo("2024-03-20 08:15:00,123 INFO Starting application"));
- final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, containsString("Traceback"));
- assertThat(event2, containsString("ValueError"));
- assertThat(records.get(2).getData().get("message", String.class),
- equalTo("2024-03-20 08:15:03,456 INFO Recovered"));
- }
-
- @Test
- void parse_xml_multiline_with_event_start_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\[\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"));
- lenient().when(config.getEventStartPattern()).thenReturn("^\\[\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}");
-
- final String input =
- "[2024-05-10 14:30:00.001] [INFO] Incoming request:\n" +
- "\n" +
- " value\n" +
- "\n" +
- "[2024-05-10 14:30:00.045] [INFO] Request processed\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
-
- final List> records = captor.getAllValues();
- final String event1 = records.get(0).getData().get("message", String.class);
- assertThat(event1, containsString(""));
- assertThat(event1, containsString(""));
- assertThat(records.get(1).getData().get("message", String.class),
- equalTo("[2024-05-10 14:30:00.045] [INFO] Request processed"));
- }
-
- @Test
- void parse_syslog_ise_with_event_start_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^<\\d+>[A-Z][a-z]{2}\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}"));
- lenient().when(config.getEventStartPattern()).thenReturn("^<\\d+>[A-Z][a-z]{2}\\s+\\d+\\s+\\d{2}:\\d{2}:\\d{2}");
-
- final String input =
- "<181>Jun 1 12:39:49 Infra-ISE Audit NOTICE Admin-Login: success\n" +
- "<181>Jun 1 12:39:49 Infra-ISE Audit NOTICE OpenAPI: Response={\n" +
- " \"version\" : \"1.0.0\"\n" +
- "}, HttpCode=200\n" +
- "<181>Jun 1 12:40:15 Infra-ISE Audit NOTICE Config-Change: added\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+ "entry 1 line 1\n" +
+ "entry 1 line 2\n" +
+ "---\n" +
+ "entry 2 line 1\n" +
+ "---\n";
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(3)).accept(captor.capture());
+ final List> events = parseContent(codec, input);
- final List> records = captor.getAllValues();
- assertThat(records.get(0).getData().get("message", String.class),
- containsString("Admin-Login: success"));
- final String event2 = records.get(1).getData().get("message", String.class);
- assertThat(event2, containsString("OpenAPI: Response="));
- assertThat(event2, containsString("\"version\" : \"1.0.0\""));
- assertThat(event2, containsString("HttpCode=200"));
- assertThat(records.get(2).getData().get("message", String.class),
- containsString("Config-Change: added"));
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("entry 1 line 1\nentry 1 line 2\n---"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("entry 2 line 1\n---"));
}
@Test
- void parse_with_continuation_line_start_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\s+(at |\\.\\.\\.|Caused by:)"));
- lenient().when(config.getContinuationLineStartPattern()).thenReturn("^\\s+(at |\\.\\.\\.|Caused by:)");
+ void parse_with_continuation_line_start_pattern(
+ @PluginConfigurationFile("continuation-line-start-pattern.yaml") final InputCodec codec) throws IOException {
final String input =
"java.lang.RuntimeException: error\n" +
@@ -184,48 +91,38 @@ void parse_with_continuation_line_start_pattern() throws IOException {
" at com.example.C.read(C.java:3)\n" +
"Application recovered\n";
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
+ final List> events = parseContent(codec, input);
- final List> records = captor.getAllValues();
- final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(events.size(), equalTo(2));
+ final String event1 = events.get(0).getData().get("message", String.class);
assertThat(event1, containsString("RuntimeException: error"));
assertThat(event1, containsString("at com.example.A.method"));
assertThat(event1, containsString("Caused by: java.io.IOException"));
- assertThat(records.get(1).getData().get("message", String.class),
+ assertThat(events.get(1).getData().get("message", String.class),
equalTo("Application recovered"));
}
@Test
- void parse_with_event_end_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^---$"));
- lenient().when(config.getEventEndPattern()).thenReturn("^---$");
+ void parse_with_omit_matched_section(
+ @PluginConfigurationFile("omit-matched-section.yaml") final InputCodec codec) throws IOException {
final String input =
- "entry 1 line 1\n" +
- "entry 1 line 2\n" +
- "---\n" +
- "entry 2 line 1\n" +
- "---\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
+ "2024-01-01 ERROR something bad\n" +
+ " stack trace\n" +
+ "2024-01-02 INFO recovered\n";
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
+ final List> events = parseContent(codec, input);
- final List> records = captor.getAllValues();
- assertThat(records.get(0).getData().get("message", String.class),
- equalTo("entry 1 line 1\nentry 1 line 2\n---"));
- assertThat(records.get(1).getData().get("message", String.class),
- equalTo("entry 2 line 1\n---"));
+ assertThat(events.size(), equalTo(2));
+ assertThat(events.get(0).getData().get("message", String.class),
+ equalTo("ERROR something bad\n stack trace"));
+ assertThat(events.get(1).getData().get("message", String.class),
+ equalTo("INFO recovered"));
}
@Test
- void parse_with_continuation_end_pattern() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\s"));
- lenient().when(config.getContinuationLineEndPattern()).thenReturn("^\\s");
+ void parse_with_continuation_line_end_pattern(
+ @PluginConfigurationFile("continuation-line-end-pattern.yaml") final InputCodec codec) throws IOException {
final String input =
" context-line-1\n" +
@@ -234,50 +131,15 @@ void parse_with_continuation_end_pattern() throws IOException {
" context-line-3\n" +
"MAIN EVENT B\n";
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
+ final List> events = parseContent(codec, input);
- final List> records = captor.getAllValues();
- final String event1 = records.get(0).getData().get("message", String.class);
+ assertThat(events.size(), equalTo(2));
+ final String event1 = events.get(0).getData().get("message", String.class);
assertThat(event1, containsString("context-line-1"));
+ assertThat(event1, containsString("context-line-2"));
assertThat(event1, containsString("MAIN EVENT A"));
- final String event2 = records.get(1).getData().get("message", String.class);
+ final String event2 = events.get(1).getData().get("message", String.class);
assertThat(event2, containsString("context-line-3"));
assertThat(event2, containsString("MAIN EVENT B"));
}
-
- @Test
- void parse_with_omit_matched_section() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}-\\d{2}-\\d{2}\\s+"));
- lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}-\\d{2}-\\d{2}\\s+");
- lenient().when(config.getOmitMatchedSection()).thenReturn(true);
-
- final String input =
- "2024-01-01 ERROR something bad\n" +
- " stack trace\n" +
- "2024-01-02 INFO recovered\n";
-
- createObjectUnderTest().parse(toInputStream(input), eventConsumer);
-
- final ArgumentCaptor> captor = ArgumentCaptor.forClass(Record.class);
- verify(eventConsumer, times(2)).accept(captor.capture());
-
- final List> records = captor.getAllValues();
- assertThat(records.get(0).getData().get("message", String.class),
- equalTo("ERROR something bad\n stack trace"));
- assertThat(records.get(1).getData().get("message", String.class),
- equalTo("INFO recovered"));
- }
-
- @Test
- void parse_empty_input_produces_no_events() throws IOException {
- lenient().when(config.getCompiledPattern()).thenReturn(Pattern.compile("^\\d{4}"));
- lenient().when(config.getEventStartPattern()).thenReturn("^\\d{4}");
-
- createObjectUnderTest().parse(toInputStream(""), eventConsumer);
-
- verify(eventConsumer, times(0)).accept(ArgumentCaptor.forClass(Record.class).capture());
- }
}
diff --git a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
index a7f66c8306..ea8dc07d4d 100644
--- a/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
+++ b/data-prepper-plugins/multiline-codecs/src/test/java/org/opensearch/dataprepper/plugins/codec/multiline/MultilineInputCodecTest.java
@@ -73,13 +73,6 @@ void constructor_throws_if_no_pattern_configured() {
assertThrows(IllegalArgumentException.class, this::createObjectUnderTest);
}
- @Test
- void constructor_throws_if_pattern_is_invalid() {
- when(config.getCompiledPattern()).thenReturn(null);
-
- assertThrows(IllegalArgumentException.class, this::createObjectUnderTest);
- }
-
private void setupConfig(final String patternStr) {
when(config.getCompiledPattern()).thenReturn(Pattern.compile(patternStr));
when(config.getMaxLines()).thenReturn(500);
@@ -381,9 +374,9 @@ void event_end_pattern_omits_matched_section() throws IOException {
assertThat(events.size(), equalTo(2));
assertThat(events.get(0).getData().get("message", String.class),
- equalTo("line 1\nline 2\n"));
+ equalTo("line 1\nline 2"));
assertThat(events.get(1).getData().get("message", String.class),
- equalTo("line 3\n"));
+ equalTo("line 3"));
}
@Test
diff --git a/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-end-pattern.yaml b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-end-pattern.yaml
new file mode 100644
index 0000000000..2beb48c08a
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-end-pattern.yaml
@@ -0,0 +1,15 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+test-pipeline:
+ source:
+ unused:
+ processor:
+ - multiline:
+ continuation_line_end_pattern: "^\\s"
+ sink:
+ - unused:
diff --git a/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-start-pattern.yaml b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-start-pattern.yaml
new file mode 100644
index 0000000000..7fbb62d7cc
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/continuation-line-start-pattern.yaml
@@ -0,0 +1,15 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+test-pipeline:
+ source:
+ unused:
+ processor:
+ - multiline:
+ continuation_line_start_pattern: "^\\s+(at |\\.\\.\\.|Caused by:)"
+ sink:
+ - unused:
diff --git a/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-end-pattern.yaml b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-end-pattern.yaml
new file mode 100644
index 0000000000..06b9577b18
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-end-pattern.yaml
@@ -0,0 +1,15 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+test-pipeline:
+ source:
+ unused:
+ processor:
+ - multiline:
+ event_end_pattern: "^---$"
+ sink:
+ - unused:
diff --git a/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-start-pattern.yaml b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-start-pattern.yaml
new file mode 100644
index 0000000000..c95b3b7be9
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/event-start-pattern.yaml
@@ -0,0 +1,15 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+test-pipeline:
+ source:
+ unused:
+ processor:
+ - multiline:
+ event_start_pattern: "^\\d{4}-\\d{2}-\\d{2}\\s+\\d{2}:\\d{2}:\\d{2}"
+ sink:
+ - unused:
diff --git a/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/omit-matched-section.yaml b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/omit-matched-section.yaml
new file mode 100644
index 0000000000..ec7b990b13
--- /dev/null
+++ b/data-prepper-plugins/multiline-codecs/src/test/resources/org/opensearch/dataprepper/plugins/codec/multiline/omit-matched-section.yaml
@@ -0,0 +1,16 @@
+# Copyright OpenSearch Contributors
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+
+test-pipeline:
+ source:
+ unused:
+ processor:
+ - multiline:
+ event_start_pattern: "^\\d{4}-\\d{2}-\\d{2}\\s+"
+ omit_matched_section: true
+ sink:
+ - unused: