diff --git a/data-prepper-core/build.gradle b/data-prepper-core/build.gradle index db2bd9b013..169c9ff2fa 100644 --- a/data-prepper-core/build.gradle +++ b/data-prepper-core/build.gradle @@ -39,6 +39,7 @@ dependencies { implementation project(':data-prepper-plugin-framework') testImplementation project(':data-prepper-plugin-framework').sourceSets.test.output testImplementation project(':data-prepper-plugins:common').sourceSets.test.output + testImplementation project(':data-prepper-plugins:file-source') implementation 'com.fasterxml.jackson.core:jackson-databind' implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml' implementation libs.reflections.core diff --git a/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileFormat.java b/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileFormat.java deleted file mode 100644 index 01201edf7a..0000000000 --- a/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileFormat.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.dataprepper.plugins.source.file; - -import java.util.Arrays; -import java.util.Map; -import java.util.function.Function; -import java.util.stream.Collectors; - -/** - * An enumm to represent the file formats supported in Data Prepper's file source. - * @since 1.2 - */ -public enum FileFormat { - - PLAIN("plain"), - JSON("json"); - - private static final Map NAMES_MAP = Arrays.stream(FileFormat.values()) - .collect(Collectors.toMap(FileFormat::toString, Function.identity())); - - private final String name; - - FileFormat(final String name) { - this.name = name; - } - - public String toString() { - return this.name; - } - - public static FileFormat getByName(final String name) { - return NAMES_MAP.get(name.toLowerCase()); - } -} \ No newline at end of file diff --git a/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSource.java b/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSource.java deleted file mode 100644 index 9698144097..0000000000 --- a/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSource.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.dataprepper.plugins.source.file; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.opensearch.dataprepper.metrics.PluginMetrics; -import org.opensearch.dataprepper.model.annotations.DataPrepperPlugin; -import org.opensearch.dataprepper.model.annotations.DataPrepperPluginConstructor; -import org.opensearch.dataprepper.model.buffer.Buffer; -import org.opensearch.dataprepper.model.codec.DecompressionEngine; -import org.opensearch.dataprepper.model.codec.InputCodec; -import org.opensearch.dataprepper.model.configuration.PluginModel; -import org.opensearch.dataprepper.model.configuration.PluginSetting; -import org.opensearch.dataprepper.model.event.EventBuilder; -import org.opensearch.dataprepper.model.event.EventFactory; -import org.opensearch.dataprepper.model.plugin.PluginFactory; -import org.opensearch.dataprepper.model.record.Record; -import org.opensearch.dataprepper.model.source.Source; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.TimeoutException; - -import static com.google.common.base.Preconditions.checkNotNull; -import static java.lang.String.format; -import static org.opensearch.dataprepper.logging.DataPrepperMarkers.SENSITIVE; - -@DataPrepperPlugin(name = "file", pluginType = Source.class, pluginConfigurationType = FileSourceConfig.class) -public class FileSource implements Source> { - static final String MESSAGE_KEY = "message"; - private static final Logger LOG = LoggerFactory.getLogger(FileSource.class); - private static final TypeReference> MAP_TYPE_REFERENCE = new TypeReference<>() { }; - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - private static final long STOP_WAIT_MILLIS = 200; - private final FileSourceConfig fileSourceConfig; - private final FileStrategy fileStrategy; - private final EventFactory eventFactory; - private final DecompressionEngine decompressionEngine; - - private Thread readThread; - - private boolean isStopRequested; - private final int writeTimeout; - - @DataPrepperPluginConstructor - public FileSource( - final FileSourceConfig fileSourceConfig, final PluginMetrics pluginMetrics, final PluginFactory pluginFactory, - final EventFactory eventFactory) { - this.eventFactory = eventFactory; - fileSourceConfig.validate(); - this.fileSourceConfig = fileSourceConfig; - this.isStopRequested = false; - this.writeTimeout = FileSourceConfig.DEFAULT_TIMEOUT; - this.decompressionEngine = fileSourceConfig.getCompression().getDecompressionEngine(); - - if(fileSourceConfig.getCodec() != null) { - fileStrategy = new CodecFileStrategy(pluginFactory); - } else { - fileStrategy = new ClassicFileStrategy(); - } - } - - - @Override - public void start(final Buffer> buffer) { - checkNotNull(buffer, "Buffer cannot be null for file source to start"); - - LOG.info("Starting file source with {} path.", fileSourceConfig.getFilePathToRead()); - - readThread = new Thread(() -> { - fileStrategy.start(buffer); - LOG.info("Completed reading file."); - }, "file-source"); - readThread.setDaemon(false); - readThread.start(); - } - - @Override - public void stop() { - isStopRequested = true; - - try { - readThread.join(STOP_WAIT_MILLIS); - } catch (final InterruptedException e) { - readThread.interrupt(); - } - } - - private interface FileStrategy { - void start(final Buffer> buffer); - } - - private class ClassicFileStrategy implements FileStrategy { - @Override - public void start(Buffer> buffer) { - Path filePath = Paths.get(fileSourceConfig.getFilePathToRead()); - try (BufferedReader reader = new BufferedReader(new InputStreamReader(decompressionEngine.createInputStream(Files.newInputStream(filePath)), StandardCharsets.UTF_8))) { - String line; - while ((line = reader.readLine()) != null && !isStopRequested) { - writeLineAsEventOrString(line, buffer); - } - } catch (IOException | TimeoutException | IllegalArgumentException ex) { - LOG.error("Error processing the input file path [{}]", fileSourceConfig.getFilePathToRead(), ex); - throw new RuntimeException(format("Error processing the input file %s", - fileSourceConfig.getFilePathToRead()), ex); - } - } - - private Record getEventRecordFromLine(final String line) { - Map structuredLine = new HashMap<>(); - - switch(fileSourceConfig.getFormat()) { - case JSON: - structuredLine = parseJson(line); - break; - case PLAIN: - structuredLine.put(MESSAGE_KEY, line); - break; - } - - return new Record<>( - eventFactory.eventBuilder(EventBuilder.class) - .withEventType(fileSourceConfig.getRecordType()) - .withData(structuredLine) - .build()); - } - - private Map parseJson(final String jsonString) { - try { - return OBJECT_MAPPER.readValue(jsonString, MAP_TYPE_REFERENCE); - } catch (JsonProcessingException e) { - LOG.error(SENSITIVE, "Unable to parse json data [{}], assuming plain text", jsonString, e); - final Map plainMap = new HashMap<>(); - plainMap.put(MESSAGE_KEY, jsonString); - return plainMap; - } - } - - // Temporary function to support both trace and log ingestion pipelines. - // TODO: This function should be removed with the completion of: https://github.com/opensearch-project/data-prepper/issues/546 - private void writeLineAsEventOrString(final String line, final Buffer> buffer) throws TimeoutException, IllegalArgumentException { - if (fileSourceConfig.getRecordType().equals(FileSourceConfig.EVENT_TYPE)) { - buffer.write(getEventRecordFromLine(line), writeTimeout); - } else if (fileSourceConfig.getRecordType().equals(FileSourceConfig.DEFAULT_TYPE)) { - buffer.write(new Record<>(line), writeTimeout); - } - } - } - - - private class CodecFileStrategy implements FileStrategy { - - private final InputCodec codec; - - CodecFileStrategy(final PluginFactory pluginFactory) { - final PluginModel codecConfiguration = fileSourceConfig.getCodec(); - final PluginSetting codecPluginSettings = new PluginSetting(codecConfiguration.getPluginName(), codecConfiguration.getPluginSettings()); - codec = pluginFactory.loadPlugin(InputCodec.class, codecPluginSettings); - } - - @Override - public void start(final Buffer> buffer) { - Path filePath = Paths.get(fileSourceConfig.getFilePathToRead()); - try(InputStream is = decompressionEngine.createInputStream(Files.newInputStream(filePath))) { - codec.parse(is, eventRecord -> { - try { - buffer.write((Record) eventRecord, writeTimeout); - } catch (TimeoutException e) { - throw new RuntimeException(e); - } - }); - } catch (final IOException e) { - throw new RuntimeException(e); - } - - } - } - -} diff --git a/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfig.java b/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfig.java deleted file mode 100644 index 9eb8dd961d..0000000000 --- a/data-prepper-plugins/common/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfig.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.dataprepper.plugins.source.file; - -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; -import jakarta.validation.constraints.AssertTrue; -import org.opensearch.dataprepper.model.configuration.PluginModel; -import org.opensearch.dataprepper.plugins.codec.CompressionOption; - -import java.util.Objects; - -public class FileSourceConfig { - static final String ATTRIBUTE_PATH = "path"; - static final String ATTRIBUTE_TYPE = "record_type"; - static final String ATTRIBUTE_FORMAT = "format"; - static final int DEFAULT_TIMEOUT = 5_000; - static final String DEFAULT_TYPE = "string"; - static final String DEFAULT_FORMAT = "plain"; - static final String EVENT_TYPE = "event"; - - - @JsonProperty(ATTRIBUTE_PATH) - private String filePathToRead; - - @JsonProperty(ATTRIBUTE_FORMAT) - private String format = DEFAULT_FORMAT; - - @JsonProperty(ATTRIBUTE_TYPE) - private String recordType = DEFAULT_TYPE; - - @JsonProperty("codec") - private PluginModel codec; - - @JsonProperty("compression") - private CompressionOption compression = CompressionOption.NONE; - - public String getFilePathToRead() { - return filePathToRead; - } - - @JsonIgnore - public FileFormat getFormat() { - return FileFormat.getByName(format); - } - - public String getRecordType() { - return recordType; - } - - public PluginModel getCodec() { - return codec; - } - - public CompressionOption getCompression() { - return compression; - } - - void validate() { - Objects.requireNonNull(filePathToRead, "File path is required"); - Preconditions.checkArgument(recordType.equals(EVENT_TYPE) || recordType.equals(DEFAULT_TYPE), "Invalid type: must be either [event] or [string]"); - Preconditions.checkArgument(format.equals(DEFAULT_FORMAT) || format.equals("json"), "Invalid file format. Options are [json] and [plain]"); - } - - @AssertTrue(message = "The file source requires recordType to be event when using a codec.") - boolean codeRequiresRecordTypeEvent() { - return codec == null || recordType.equals(EVENT_TYPE); - } -} diff --git a/data-prepper-plugins/common/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfigTest.java b/data-prepper-plugins/common/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfigTest.java deleted file mode 100644 index 9208c52b66..0000000000 --- a/data-prepper-plugins/common/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfigTest.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright OpenSearch Contributors - * SPDX-License-Identifier: Apache-2.0 - */ - -package org.opensearch.dataprepper.plugins.source.file; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.CsvSource; -import org.junit.jupiter.params.provider.ValueSource; -import org.opensearch.dataprepper.model.configuration.PluginModel; - -import java.util.Collections; -import java.util.Map; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; - -class FileSourceConfigTest { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @ParameterizedTest - @ValueSource(strings = {FileSourceConfig.EVENT_TYPE, FileSourceConfig.DEFAULT_FORMAT}) - void codeRequiresRecordTypeEvent_returns_true_if_no_codec(final String recordType) { - final Map fileConfigMap = Map.of(FileSourceConfig.ATTRIBUTE_TYPE, recordType); - final FileSourceConfig objectUnderTest = OBJECT_MAPPER.convertValue(fileConfigMap, FileSourceConfig.class); - - assertThat(objectUnderTest.codeRequiresRecordTypeEvent(), equalTo(true)); - } - - @ParameterizedTest - @CsvSource({ - FileSourceConfig.EVENT_TYPE + ",true", - FileSourceConfig.DEFAULT_FORMAT + ",false" - }) - void codeRequiresRecordTypeEvent_returns_expected_value_when_there_is_a_codec(final String recordType, final boolean expected) { - final Map fileConfigMap = Map.of( - FileSourceConfig.ATTRIBUTE_TYPE, recordType, - "codec", new PluginModel("fake_codec", Collections.emptyMap()) - ); - final FileSourceConfig objectUnderTest = OBJECT_MAPPER.convertValue(fileConfigMap, FileSourceConfig.class); - - assertThat(objectUnderTest.codeRequiresRecordTypeEvent(), equalTo(expected)); - } -} \ No newline at end of file diff --git a/data-prepper-plugins/file-source/build.gradle b/data-prepper-plugins/file-source/build.gradle new file mode 100644 index 0000000000..23d1d154ad --- /dev/null +++ b/data-prepper-plugins/file-source/build.gradle @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +dependencies { + implementation project(':data-prepper-api') + implementation project(':data-prepper-plugins:common') + implementation 'com.fasterxml.jackson.core:jackson-databind' + implementation 'io.micrometer:micrometer-core' + testImplementation project(':data-prepper-plugins:blocking-buffer') + testImplementation project(':data-prepper-test:test-event') + testImplementation 'org.awaitility:awaitility' + testImplementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310' +} + +test { + maxHeapSize = '512m' +} + +sourceSets { + integrationTest { + java { + compileClasspath = main.output + test.output + compileClasspath + runtimeClasspath = main.output + test.output + runtimeClasspath + srcDir file('src/integrationTest/java') + } + } +} + +configurations { + integrationTestImplementation.extendsFrom testImplementation + integrationTestRuntimeOnly.extendsFrom testRuntimeOnly +} + +task integrationTest(type: Test) { + group = 'verification' + testClassesDirs = sourceSets.integrationTest.output.classesDirs + classpath = sourceSets.integrationTest.runtimeClasspath + useJUnitPlatform() + + filter { + includeTestsMatching '*IT' + } +} + +jacocoTestCoverageVerification { + dependsOn jacocoTestReport + violationRules { + rule { + limit { + minimum = 1.0 + } + } + } +} diff --git a/data-prepper-plugins/file-source/src/integrationTest/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTailIT.java b/data-prepper-plugins/file-source/src/integrationTest/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTailIT.java new file mode 100644 index 0000000000..1293acc502 --- /dev/null +++ b/data-prepper-plugins/file-source/src/integrationTest/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTailIT.java @@ -0,0 +1,919 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import org.opensearch.dataprepper.metrics.PluginMetrics; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSet; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; +import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.event.Event; +import org.opensearch.dataprepper.model.event.EventFactory; +import org.opensearch.dataprepper.model.plugin.PluginFactory; +import org.opensearch.dataprepper.model.record.Record; +import org.opensearch.dataprepper.event.TestEventFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +import static org.awaitility.Awaitility.await; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasSize; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyCollection; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +class FileSourceTailIT { + + private static final Duration WAIT_TIMEOUT = Duration.ofSeconds(15); + private static final long ROTATION_DETECTION_DELAY_MS = 2000; + private static final long SHORT_DELAY_MS = 1000; + private static final long CLOSE_INACTIVE_WAIT_MS = 5000; + private static final long DELETION_DETECTION_DELAY_MS = 3000; + private static final long ACK_RETRY_INTERVAL_MS = 500; + + @TempDir + Path tempDir; + + @Mock + private PluginFactory pluginFactory; + + @Mock + private AcknowledgementSetManager acknowledgementSetManager; + + private PluginMetrics pluginMetrics; + private EventFactory eventFactory; + private Buffer> buffer; + private List> capturedRecords; + private FileSource fileSource; + + @BeforeEach + @SuppressWarnings("unchecked") + void setUp() throws Exception { + pluginMetrics = PluginMetrics.fromNames("file", "test-pipeline"); + eventFactory = TestEventFactory.getTestEventFactory(); + capturedRecords = Collections.synchronizedList(new ArrayList<>()); + + buffer = (Buffer>) mock(Buffer.class); + doAnswer(invocation -> { + Record record = invocation.getArgument(0); + capturedRecords.add(record); + return null; + }).when(buffer).write(any(Record.class), anyInt()); + doAnswer(invocation -> { + Collection> records = invocation.getArgument(0); + capturedRecords.addAll(records); + return null; + }).when(buffer).writeAll(anyCollection(), anyInt()); + } + + @AfterEach + void tearDown() { + if (fileSource != null) { + fileSource.stop(); + } + } + + @Test + void tail_mode_reads_existing_lines_with_start_position_beginning() throws Exception { + final Path logFile = tempDir.resolve("app.log"); + Files.write(logFile, List.of("line one", "line two", "line three")); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(3)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("line one")); + assertThat(eventMessage(capturedRecords.get(1)), equalTo("line two")); + assertThat(eventMessage(capturedRecords.get(2)), equalTo("line three")); + }); + } + + @Test + void tail_mode_with_start_position_end_skips_existing_content() throws Exception { + final Path logFile = tempDir.resolve("existing.log"); + Files.write(logFile, List.of("old line 1", "old line 2")); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "end"); + + fileSource = createSource(config); + fileSource.start(buffer); + + Thread.sleep(ROTATION_DETECTION_DELAY_MS); + assertThat(capturedRecords, hasSize(0)); + + appendLine(logFile, "new line after start"); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("new line after start")); + }); + } + + @Test + void tail_mode_follows_appended_lines() throws Exception { + final Path logFile = tempDir.resolve("append.log"); + Files.write(logFile, "initial\n".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + appendLine(logFile, "appended line 1"); + appendLine(logFile, "appended line 2"); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(3)); + assertThat(eventMessage(capturedRecords.get(1)), equalTo("appended line 1")); + assertThat(eventMessage(capturedRecords.get(2)), equalTo("appended line 2")); + }); + } + + @Test + void tail_mode_discovers_files_via_glob_pattern() throws Exception { + Files.write(tempDir.resolve("server1.log"), List.of("from server 1")); + Files.write(tempDir.resolve("server2.log"), List.of("from server 2")); + Files.write(tempDir.resolve("server.txt"), List.of("should not be read")); + + final String globPattern = tempDir.resolve("*.log").toString(); + final FileSourceConfig config = createTailConfig( + null, List.of(globPattern), "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(2))); + } + + @Test + void tail_mode_excludes_paths_matching_exclude_pattern() throws Exception { + Files.write(tempDir.resolve("app.log"), List.of("app log line")); + Files.write(tempDir.resolve("debug.log"), List.of("debug log line")); + + final String globPattern = tempDir.resolve("*.log").toString(); + final String excludePattern = tempDir.resolve("debug*").toString(); + final FileSourceConfig config = createTailConfigWithExclude( + globPattern, excludePattern, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("app log line")); + }); + } + + @Test + void tail_mode_detects_copytruncate_rotation() throws Exception { + final Path logFile = tempDir.resolve("rotating.log"); + Files.write(logFile, "this is a long line before truncation happens here\n".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + Files.write(logFile, new byte[0], StandardOpenOption.TRUNCATE_EXISTING); + Thread.sleep(ROTATION_DETECTION_DELAY_MS); + appendLine(logFile, "short"); + + await().atMost(Duration.ofSeconds(15)).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(greaterThanOrEqualTo(2))); + assertThat(eventMessage(capturedRecords.get(capturedRecords.size() - 1)), + equalTo("short")); + }); + } + + @Test + void tail_mode_detects_create_rename_rotation() throws Exception { + final Path logFile = tempDir.resolve("app.log"); + Files.write(logFile, "before rotation\n".getBytes(StandardCharsets.UTF_8)); + + final String globPattern = tempDir.resolve("app.log").toString(); + final FileSourceConfig config = createTailConfig( + globPattern, null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + Files.move(logFile, tempDir.resolve("app.log.1")); + Thread.sleep(ROTATION_DETECTION_DELAY_MS); + Files.write(logFile, "after rotation\n".getBytes(StandardCharsets.UTF_8)); + + await().atMost(Duration.ofSeconds(20)).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(greaterThanOrEqualTo(2))); + assertThat(eventMessage(capturedRecords.get(capturedRecords.size() - 1)), + equalTo("after rotation")); + }); + } + + @Test + void tail_mode_includes_file_metadata() throws Exception { + final Path logFile = tempDir.resolve("meta.log"); + Files.write(logFile, List.of("metadata test")); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + final Event event = (Event) capturedRecords.get(0).getData(); + @SuppressWarnings("unchecked") + final Map fileMetadata = (Map) event.get("file", Object.class); + assertThat(fileMetadata.get("path"), + equalTo(logFile.toAbsolutePath().toString())); + assertThat(fileMetadata.get("name"), equalTo("meta.log")); + }); + } + + @Test + void tail_mode_resumes_from_checkpoint_after_restart() throws Exception { + final Path logFile = tempDir.resolve("checkpoint.log"); + final Path checkpointFile = tempDir.resolve("checkpoint.json"); + Files.write(logFile, List.of("line 1", "line 2", "line 3")); + + final FileSourceConfig config = createTailConfigWithCheckpoint( + logFile.toString(), "beginning", checkpointFile.toString()); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(3))); + + fileSource.stop(); + fileSource = null; + Thread.sleep(SHORT_DELAY_MS); + + capturedRecords.clear(); + appendLine(logFile, "line 4"); + + final FileSourceConfig config2 = createTailConfigWithCheckpoint( + logFile.toString(), "beginning", checkpointFile.toString()); + fileSource = createSource(config2); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("line 4")); + }); + } + + @Test + void tail_mode_discovers_new_file_created_after_start() throws Exception { + final String globPattern = tempDir.resolve("*.log").toString(); + final FileSourceConfig config = createTailConfig( + null, List.of(globPattern), "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + Thread.sleep(SHORT_DELAY_MS); + assertThat(capturedRecords, hasSize(0)); + + Files.write(tempDir.resolve("new-file.log"), List.of("discovered after start")); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("discovered after start")); + }); + } + + @Test + void tail_mode_handles_back_pressure_with_retry() throws Exception { + final Path logFile = tempDir.resolve("backpressure.log"); + Files.write(logFile, "line1\nline2\nline3\n".getBytes(StandardCharsets.UTF_8)); + + final AtomicInteger writeAttempts = new AtomicInteger(0); + final int failFirstN = 3; + + @SuppressWarnings("unchecked") + final Buffer> slowBuffer = (Buffer>) mock(Buffer.class); + doAnswer(invocation -> { + if (writeAttempts.incrementAndGet() <= failFirstN) { + throw new TimeoutException("Buffer full"); + } + Record record = invocation.getArgument(0); + capturedRecords.add(record); + return null; + }).when(slowBuffer).write(any(Record.class), anyInt()); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(slowBuffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(3)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("line1")); + }); + assertThat(writeAttempts.get(), greaterThanOrEqualTo(failFirstN + 3)); + } + + @Test + void tail_mode_with_acknowledgements_commits_offset_on_positive_ack() throws Exception { + final Path logFile = tempDir.resolve("ack.log"); + final Path checkpointFile = tempDir.resolve("ack-checkpoint.json"); + Files.write(logFile, "ack line 1\nack line 2\n".getBytes(StandardCharsets.UTF_8)); + + final List> ackCallbacks = Collections.synchronizedList(new ArrayList<>()); + + final AcknowledgementSet mockAckSet = mock(AcknowledgementSet.class); + final AcknowledgementSetManager ackManager = mock(AcknowledgementSetManager.class); + doAnswer(invocation -> { + Consumer callback = invocation.getArgument(0); + ackCallbacks.add(callback); + return mockAckSet; + }).when(ackManager).create(any(), org.mockito.ArgumentMatchers.any(Duration.class)); + + final FileSourceConfig config = buildConfigWithAcknowledgements( + logFile.toString(), "beginning", checkpointFile.toString()); + + fileSource = new FileSource(config, pluginMetrics, pluginFactory, eventFactory, ackManager); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(2))); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(ackCallbacks, hasSize(greaterThanOrEqualTo(1)))); + + ackCallbacks.get(0).accept(true); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(Files.exists(checkpointFile), equalTo(true)); + final String checkpointContent = Files.readString(checkpointFile); + assertThat(checkpointContent.contains("committedOffset"), equalTo(true)); + }); + } + + @Test + void tail_mode_with_acknowledgements_does_not_advance_offset_on_negative_ack() throws Exception { + final Path logFile = tempDir.resolve("nack.log"); + final Path checkpointFile = tempDir.resolve("nack-checkpoint.json"); + Files.write(logFile, "nack line\n".getBytes(StandardCharsets.UTF_8)); + + final List> ackCallbacks = Collections.synchronizedList(new ArrayList<>()); + + final AcknowledgementSet mockAckSet = mock(AcknowledgementSet.class); + final AcknowledgementSetManager ackManager = mock(AcknowledgementSetManager.class); + doAnswer(invocation -> { + Consumer callback = invocation.getArgument(0); + ackCallbacks.add(callback); + return mockAckSet; + }).when(ackManager).create(any(), org.mockito.ArgumentMatchers.any(Duration.class)); + + final FileSourceConfig config = buildConfigWithAcknowledgements( + logFile.toString(), "beginning", checkpointFile.toString()); + + fileSource = new FileSource(config, pluginMetrics, pluginFactory, eventFactory, ackManager); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(ackCallbacks, hasSize(greaterThanOrEqualTo(1)))); + + ackCallbacks.get(0).accept(false); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(Files.exists(checkpointFile), equalTo(true)); + final String checkpointContent = Files.readString(checkpointFile); + assertThat(checkpointContent.contains("\"committedOffset\":0"), equalTo(true)); + }); + } + + @Test + void tail_mode_max_active_files_queues_excess_files() throws Exception { + for (int i = 0; i < 5; i++) { + Files.write(tempDir.resolve("file" + i + ".log"), + ("content from file " + i + "\n").getBytes(StandardCharsets.UTF_8)); + } + + final String globPattern = tempDir.resolve("*.log").toString(); + final FileSourceConfig config = buildConfigWithMaxActiveFiles( + globPattern, "beginning", 3); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(Duration.ofSeconds(30)).untilAsserted(() -> + assertThat(capturedRecords, hasSize(greaterThanOrEqualTo(5)))); + } + + @Test + void tail_mode_max_read_time_prevents_starvation() throws Exception { + final Path largeFile = tempDir.resolve("large.log"); + final StringBuilder content = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + content.append("line ").append(i).append("\n"); + } + Files.write(largeFile, content.toString().getBytes(StandardCharsets.UTF_8)); + + final Path smallFile = tempDir.resolve("small.log"); + Files.write(smallFile, "small file line\n".getBytes(StandardCharsets.UTF_8)); + + final String globPattern = tempDir.resolve("*.log").toString(); + final FileSourceConfig config = createTailConfig( + null, List.of(globPattern), "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(Duration.ofSeconds(20)).untilAsserted(() -> { + final boolean hasSmallFileLine = capturedRecords.stream() + .anyMatch(r -> "small file line".equals(eventMessage(r))); + assertThat(hasSmallFileLine, equalTo(true)); + }); + } + + @Test + void tail_mode_close_inactive_releases_file_handle() throws Exception { + final Path logFile = tempDir.resolve("inactive.log"); + Files.write(logFile, "initial line\n".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = buildConfigWithCloseInactive( + logFile.toString(), "beginning", "PT2S"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + Thread.sleep(CLOSE_INACTIVE_WAIT_MS); + + appendLine(logFile, "after inactive"); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(greaterThanOrEqualTo(2)))); + } + + @Test + void tail_mode_handles_file_deleted_while_tailing() throws Exception { + final Path logFile = tempDir.resolve("deleteme.log"); + Files.write(logFile, "will be deleted\n".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + Files.delete(logFile); + Thread.sleep(DELETION_DETECTION_DELAY_MS); + + assertThat(fileSource.areAcknowledgementsEnabled(), equalTo(false)); + } + + @Test + void tail_mode_max_line_length_truncates_long_lines() throws Exception { + final Path logFile = tempDir.resolve("longline.log"); + final String longLine = "x".repeat(5000); + Files.write(logFile, (longLine + "\n").getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = buildConfigWithMaxLineLength( + logFile.toString(), "beginning", 1024); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + final String message = eventMessage(capturedRecords.get(0)); + assertThat(message.length(), equalTo(1024)); + }); + } + + @Test + void tail_mode_back_pressure_during_rotation_does_not_lose_data() throws Exception { + final Path logFile = tempDir.resolve("bp-rotate.log"); + Files.write(logFile, "line before rotation\n".getBytes(StandardCharsets.UTF_8)); + + final AtomicInteger writeCount = new AtomicInteger(0); + final int blockFirstNWrites = 2; + + @SuppressWarnings("unchecked") + final Buffer> slowBuffer = (Buffer>) mock(Buffer.class); + doAnswer(invocation -> { + if (writeCount.incrementAndGet() <= blockFirstNWrites) { + throw new TimeoutException("Buffer full - simulating back pressure"); + } + Record record = invocation.getArgument(0); + capturedRecords.add(record); + return null; + }).when(slowBuffer).write(any(Record.class), anyInt()); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(slowBuffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + Files.move(logFile, tempDir.resolve("bp-rotate.log.1")); + Thread.sleep(SHORT_DELAY_MS); + Files.write(logFile, "line after rotation\n".getBytes(StandardCharsets.UTF_8)); + + await().atMost(Duration.ofSeconds(20)).untilAsserted(() -> + assertThat(capturedRecords, hasSize(greaterThanOrEqualTo(2)))); + } + + @Test + void tail_mode_negative_ack_retry_exhaustion_advances_offset() throws Exception { + final Path logFile = tempDir.resolve("retry-exhaust.log"); + final Path checkpointFile = tempDir.resolve("retry-exhaust-checkpoint.json"); + Files.write(logFile, "retry line\n".getBytes(StandardCharsets.UTF_8)); + + final List> ackCallbacks = Collections.synchronizedList(new ArrayList<>()); + + final AcknowledgementSet mockAckSet = mock(AcknowledgementSet.class); + final AcknowledgementSetManager ackManager = mock(AcknowledgementSetManager.class); + doAnswer(invocation -> { + Consumer callback = invocation.getArgument(0); + ackCallbacks.add(callback); + return mockAckSet; + }).when(ackManager).create(any(), org.mockito.ArgumentMatchers.any(Duration.class)); + + final FileSourceConfig config = buildConfigWithAcknowledgementsAndRetries( + logFile.toString(), "beginning", checkpointFile.toString(), 2); + + fileSource = new FileSource(config, pluginMetrics, pluginFactory, eventFactory, ackManager); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(ackCallbacks, hasSize(greaterThanOrEqualTo(1)))); + + ackCallbacks.get(0).accept(false); + Thread.sleep(ACK_RETRY_INTERVAL_MS); + ackCallbacks.get(0).accept(false); + Thread.sleep(ACK_RETRY_INTERVAL_MS); + ackCallbacks.get(0).accept(false); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(Files.exists(checkpointFile), equalTo(true)); + final String content = Files.readString(checkpointFile); + assertThat(content.contains("\"committedOffset\":0"), equalTo(false)); + }); + } + + @Test + void tail_mode_checkpoint_is_persisted_periodically() throws Exception { + final Path logFile = tempDir.resolve("checkpoint-persist.log"); + final Path checkpointFile = tempDir.resolve("persist-checkpoint.json"); + Files.write(logFile, "checkpoint test line\n".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = createTailConfigWithCheckpoint( + logFile.toString(), "beginning", checkpointFile.toString()); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + await().atMost(Duration.ofSeconds(20)).untilAsserted(() -> { + assertThat(Files.exists(checkpointFile), equalTo(true)); + final String content = Files.readString(checkpointFile); + assertThat(content.contains("readOffset"), equalTo(true)); + }); + } + + @Test + void tail_mode_both_path_and_paths_are_merged() throws Exception { + final Path singleFile = tempDir.resolve("single.log"); + Files.write(singleFile, "from single path\n".getBytes(StandardCharsets.UTF_8)); + + final Path globFile = tempDir.resolve("glob-match.log"); + Files.write(globFile, "from glob path\n".getBytes(StandardCharsets.UTF_8)); + + final String globPattern = tempDir.resolve("glob-*.log").toString(); + + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("path", singleFile.toString()); + configMap.put("paths", List.of(globPattern)); + configMap.put("start_position", "beginning"); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("checkpoint_file", tempDir.resolve("merged-checkpoint.json").toString()); + final FileSourceConfig config = mapper.convertValue(configMap, FileSourceConfig.class); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(2))); + + final List messages = capturedRecords.stream() + .map(this::eventMessage) + .collect(Collectors.toList()); + assertThat(messages.contains("from single path"), equalTo(true)); + assertThat(messages.contains("from glob path"), equalTo(true)); + } + + @Test + void tail_mode_close_removed_true_stops_reading_deleted_file() throws Exception { + final Path logFile = tempDir.resolve("close-removed.log"); + Files.write(logFile, "will be removed\n".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> + assertThat(capturedRecords, hasSize(1))); + + Files.delete(logFile); + Thread.sleep(DELETION_DETECTION_DELAY_MS); + + final int recordsAfterDelete = capturedRecords.size(); + Thread.sleep(ROTATION_DETECTION_DELAY_MS); + assertThat(capturedRecords.size(), equalTo(recordsAfterDelete)); + } + + @Test + void tail_mode_multiple_files_read_concurrently_with_reader_threads() throws Exception { + for (int i = 0; i < 4; i++) { + final Path file = tempDir.resolve("concurrent" + i + ".log"); + final StringBuilder content = new StringBuilder(); + for (int j = 0; j < 10; j++) { + content.append("file").append(i).append("-line").append(j).append("\n"); + } + Files.write(file, content.toString().getBytes(StandardCharsets.UTF_8)); + } + + final String globPattern = tempDir.resolve("concurrent*.log").toString(); + final FileSourceConfig config = createTailConfig( + null, List.of(globPattern), "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(Duration.ofSeconds(20)).untilAsserted(() -> + assertThat(capturedRecords, hasSize(40))); + } + + @Test + void tail_mode_empty_file_produces_no_events() throws Exception { + final Path emptyFile = tempDir.resolve("empty.log"); + Files.write(emptyFile, new byte[0]); + + final FileSourceConfig config = createTailConfig( + emptyFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + Thread.sleep(DELETION_DETECTION_DELAY_MS); + assertThat(capturedRecords, hasSize(0)); + + appendLine(emptyFile, "content after empty"); + + await().atMost(WAIT_TIMEOUT).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("content after empty")); + }); + } + + @Test + void tail_mode_partial_line_without_newline_flushed_on_read_timeout() throws Exception { + final Path logFile = tempDir.resolve("partial.log"); + Files.write(logFile, "no newline at end".getBytes(StandardCharsets.UTF_8)); + + final FileSourceConfig config = createTailConfig( + logFile.toString(), null, "beginning"); + + fileSource = createSource(config); + fileSource.start(buffer); + + await().atMost(Duration.ofSeconds(20)).untilAsserted(() -> { + assertThat(capturedRecords, hasSize(1)); + assertThat(eventMessage(capturedRecords.get(0)), equalTo("no newline at end")); + }); + } + + private FileSource createSource(final FileSourceConfig config) { + return new FileSource(config, pluginMetrics, pluginFactory, eventFactory, acknowledgementSetManager); + } + + private FileSourceConfig createTailConfig(final String path, final List paths, final String startPosition) { + return buildConfig(path, paths, null, startPosition, null); + } + + private FileSourceConfig createTailConfigWithExclude(final String globPattern, final String excludePattern, + final String startPosition) { + return buildConfig(null, List.of(globPattern), List.of(excludePattern), startPosition, null); + } + + private FileSourceConfig createTailConfigWithCheckpoint(final String path, final String startPosition, + final String checkpointFilePath) { + return buildConfig(path, null, null, startPosition, checkpointFilePath); + } + + private FileSourceConfig buildConfig(final String path, final List paths, + final List excludePaths, final String startPosition, + final String checkpointFile) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("start_position", startPosition != null ? startPosition : "beginning"); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("poll_interval", "PT0.5S"); + configMap.put("rotate_wait", "PT0.5S"); + if (path != null) { + configMap.put("path", path); + } + if (paths != null) { + configMap.put("paths", paths); + } + if (excludePaths != null) { + configMap.put("exclude_paths", excludePaths); + } + if (checkpointFile != null) { + configMap.put("checkpoint_file", checkpointFile); + } else { + configMap.put("checkpoint_file", tempDir.resolve("default-checkpoint.json").toString()); + } + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private FileSourceConfig buildConfigWithAcknowledgements(final String path, final String startPosition, + final String checkpointFile) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("path", path); + configMap.put("start_position", startPosition); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("acknowledgments", true); + configMap.put("batch_size", 10); + configMap.put("checkpoint_file", checkpointFile); + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private FileSourceConfig buildConfigWithMaxActiveFiles(final String globPattern, final String startPosition, + final int maxActiveFiles) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("paths", List.of(globPattern)); + configMap.put("start_position", startPosition); + configMap.put("reader_threads", 2); + configMap.put("max_active_files", maxActiveFiles); + configMap.put("include_file_metadata", true); + configMap.put("checkpoint_file", tempDir.resolve("max-active-checkpoint.json").toString()); + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private FileSourceConfig buildConfigWithCloseInactive(final String path, final String startPosition, + final String closeInactive) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("path", path); + configMap.put("start_position", startPosition); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("close_inactive", closeInactive); + configMap.put("checkpoint_file", tempDir.resolve("close-inactive-checkpoint.json").toString()); + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private FileSourceConfig buildConfigWithMaxLineLength(final String path, final String startPosition, + final int maxLineLength) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("path", path); + configMap.put("start_position", startPosition); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("max_line_length", maxLineLength); + configMap.put("checkpoint_file", tempDir.resolve("maxline-checkpoint.json").toString()); + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private FileSourceConfig buildConfigWithAcknowledgementsAndRetries(final String path, final String startPosition, + final String checkpointFile, final int maxRetries) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("path", path); + configMap.put("start_position", startPosition); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("acknowledgments", true); + configMap.put("batch_size", 10); + configMap.put("max_acknowledgment_retries", maxRetries); + configMap.put("checkpoint_file", checkpointFile); + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private FileSourceConfig buildConfigWithCheckpointCleanup(final String path, final String startPosition, + final String checkpointFile, final String cleanupAfter) { + final ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + final Map configMap = new HashMap<>(); + configMap.put("tail", true); + configMap.put("path", path); + configMap.put("start_position", startPosition); + configMap.put("reader_threads", 2); + configMap.put("include_file_metadata", true); + configMap.put("checkpoint_cleanup_after", cleanupAfter); + configMap.put("checkpoint_file", checkpointFile); + return mapper.convertValue(configMap, FileSourceConfig.class); + } + + private void appendLine(final Path file, final String line) throws IOException { + Files.write(file, (line + "\n").getBytes(StandardCharsets.UTF_8), + StandardOpenOption.APPEND, StandardOpenOption.CREATE); + } + + private String eventMessage(final Record record) { + final Event event = (Event) record.getData(); + return event.get("message", String.class); + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointEntry.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointEntry.java new file mode 100644 index 0000000000..82ecb8641d --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointEntry.java @@ -0,0 +1,77 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +public final class CheckpointEntry { + + private long readOffset; + private long committedOffset; + private CheckpointStatus status; + private long lastUpdatedMillis; + + @JsonCreator + public CheckpointEntry( + @JsonProperty("readOffset") final long readOffset, + @JsonProperty("committedOffset") final long committedOffset, + @JsonProperty("status") final CheckpointStatus status, + @JsonProperty("lastUpdatedMillis") final long lastUpdatedMillis) { + this.readOffset = readOffset; + this.committedOffset = committedOffset; + this.status = status; + this.lastUpdatedMillis = lastUpdatedMillis; + } + + public CheckpointEntry(final long readOffset, final long committedOffset, final CheckpointStatus status) { + this(readOffset, committedOffset, status, System.currentTimeMillis()); + } + + public CheckpointEntry() { + this(0, 0, CheckpointStatus.ACTIVE); + } + + public synchronized long getReadOffset() { + return readOffset; + } + + public synchronized void setReadOffset(final long readOffset) { + this.readOffset = readOffset; + this.lastUpdatedMillis = System.currentTimeMillis(); + } + + public synchronized long getCommittedOffset() { + return committedOffset; + } + + public synchronized void setCommittedOffset(final long committedOffset) { + this.committedOffset = committedOffset; + this.lastUpdatedMillis = System.currentTimeMillis(); + } + + public synchronized CheckpointStatus getStatus() { + return status; + } + + public synchronized void setStatus(final CheckpointStatus status) { + this.status = status; + this.lastUpdatedMillis = System.currentTimeMillis(); + } + + public synchronized long getLastUpdatedMillis() { + return lastUpdatedMillis; + } + + public synchronized CheckpointEntry snapshot() { + return new CheckpointEntry(readOffset, committedOffset, status, lastUpdatedMillis); + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointRegistry.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointRegistry.java new file mode 100644 index 0000000000..d1e3b0b1ac --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointRegistry.java @@ -0,0 +1,160 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.time.Duration; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +public final class CheckpointRegistry { + + private static final Logger LOG = LoggerFactory.getLogger(CheckpointRegistry.class); + private static final TypeReference> MAP_TYPE = new TypeReference<>() { }; + + private final ConcurrentHashMap storage; + private final ObjectMapper objectMapper; + private final Path checkpointFile; + private final Duration cleanupAfter; + private final ScheduledExecutorService scheduler; + + public CheckpointRegistry(final Path checkpointFile, final Duration flushInterval, final Duration cleanupAfter) { + this(checkpointFile, flushInterval, cleanupAfter, () -> Executors.newSingleThreadScheduledExecutor(r -> { + final Thread thread = new Thread(r, "file-checkpoint-flush"); + thread.setDaemon(true); + return thread; + })); + } + + CheckpointRegistry(final Path checkpointFile, final Duration flushInterval, final Duration cleanupAfter, + final Supplier schedulerSupplier) { + this.checkpointFile = checkpointFile; + this.cleanupAfter = cleanupAfter; + this.objectMapper = new ObjectMapper(); + this.storage = new ConcurrentHashMap<>(); + this.scheduler = schedulerSupplier.get(); + + load(); + scheduler.scheduleAtFixedRate(this::flush, flushInterval.toMillis(), flushInterval.toMillis(), TimeUnit.MILLISECONDS); + } + + public CheckpointEntry getOrCreate(final String key) { + return storage.computeIfAbsent(key, k -> new CheckpointEntry()); + } + + public CheckpointEntry get(final String key) { + return storage.get(key); + } + + public void markCompleted(final String key) { + final CheckpointEntry entry = storage.get(key); + if (entry != null) { + entry.setStatus(CheckpointStatus.COMPLETED); + } + } + + public synchronized void flush() { + if (checkpointFile == null) { + return; + } + try { + cleanupStaleEntries(); + final Map snapshot = new HashMap<>(); + for (final Map.Entry entry : storage.entrySet()) { + snapshot.put(entry.getKey(), entry.getValue().snapshot()); + } + final Path tempFile = checkpointFile.resolveSibling(checkpointFile.getFileName() + ".tmp"); + objectMapper.writeValue(tempFile.toFile(), snapshot); + Files.move(tempFile, checkpointFile, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE); + } catch (final IOException e) { + LOG.error("Failed to flush checkpoint file", e); + } + } + + public void shutdown() { + scheduler.shutdown(); + boolean interrupted = false; + try { + if (!scheduler.awaitTermination(5, TimeUnit.SECONDS)) { + scheduler.shutdownNow(); + } + } catch (final InterruptedException e) { + scheduler.shutdownNow(); + interrupted = true; + } + flush(); + if (interrupted) { + Thread.currentThread().interrupt(); + } + } + + private void load() { + if (checkpointFile == null) { + return; + } + try { + if (checkpointFile.getParent() != null) { + Files.createDirectories(checkpointFile.getParent()); + } + final Map loaded = objectMapper.readValue(checkpointFile.toFile(), MAP_TYPE); + if (loaded != null) { + storage.putAll(loaded); + } + LOG.info("Loaded {} checkpoint entries from {}", storage.size(), checkpointFile); + } catch (final FileNotFoundException | NoSuchFileException e) { + LOG.debug("No existing checkpoint file at {}. Starting with empty state.", checkpointFile); + } catch (final IOException e) { + LOG.warn("Corrupt or unreadable checkpoint file at {}. Renaming to .corrupt and starting fresh.", checkpointFile, e); + renameCorruptFile(); + } + } + + private void renameCorruptFile() { + try { + final Path corruptPath = checkpointFile.resolveSibling(checkpointFile.getFileName() + ".corrupt"); + Files.move(checkpointFile, corruptPath, StandardCopyOption.REPLACE_EXISTING); + LOG.info("Renamed corrupt checkpoint file to {}", corruptPath); + } catch (final IOException renameEx) { + LOG.warn("Failed to rename corrupt checkpoint file at {}", checkpointFile, renameEx); + } + } + + private void cleanupStaleEntries() { + final long now = System.currentTimeMillis(); + final long cleanupThreshold = cleanupAfter.toMillis(); + final Iterator> iterator = storage.entrySet().iterator(); + while (iterator.hasNext()) { + final Map.Entry entry = iterator.next(); + final CheckpointEntry checkpoint = entry.getValue(); + if (CheckpointStatus.COMPLETED == checkpoint.getStatus() && + (now - checkpoint.getLastUpdatedMillis()) > cleanupThreshold) { + iterator.remove(); + LOG.debug("Removed stale checkpoint entry: {}", entry.getKey()); + } + } + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointStatus.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointStatus.java new file mode 100644 index 0000000000..3d7874d149 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/CheckpointStatus.java @@ -0,0 +1,49 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public enum CheckpointStatus { + ACTIVE("ACTIVE"), + COMPLETED("COMPLETED"); + + private static final Map NAMES_MAP = Stream.of(values()) + .collect(Collectors.toMap(CheckpointStatus::getValue, v -> v)); + + private final String value; + + CheckpointStatus(final String value) { + this.value = value; + } + + @JsonValue + public String getValue() { + return value; + } + + @JsonCreator + public static CheckpointStatus fromString(final String value) { + if (value == null) { + throw new IllegalArgumentException("Invalid checkpoint status: null. Valid values are: " + NAMES_MAP.keySet()); + } + final CheckpointStatus status = NAMES_MAP.get(value.toUpperCase()); + if (status == null) { + throw new IllegalArgumentException("Invalid checkpoint status: " + value + ". Valid values are: " + NAMES_MAP.keySet()); + } + return status; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/DefaultFileSystemOperations.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/DefaultFileSystemOperations.java new file mode 100644 index 0000000000..bd679a7ad6 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/DefaultFileSystemOperations.java @@ -0,0 +1,54 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.stream.Stream; + +final class DefaultFileSystemOperations implements FileSystemOperations { + + @Override + public FileChannel openReadChannel(final Path path) throws IOException { + return FileChannel.open(path, StandardOpenOption.READ); + } + + @Override + public BasicFileAttributes readAttributes(final Path path) throws IOException { + return Files.readAttributes(path, BasicFileAttributes.class); + } + + @Override + public Stream listDirectory(final Path directory) throws IOException { + return Files.list(directory); + } + + @Override + public boolean exists(final Path path) { + return Files.exists(path); + } + + @Override + public long size(final Path path) throws IOException { + return Files.size(path); + } + + @Override + public byte[] readBytes(final Path path, final int length) throws IOException { + try (final var inputStream = Files.newInputStream(path)) { + return inputStream.readNBytes(length); + } + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/DirectoryWatcher.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/DirectoryWatcher.java new file mode 100644 index 0000000000..7b61d67d56 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/DirectoryWatcher.java @@ -0,0 +1,403 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.ClosedWatchServiceException; +import java.nio.file.FileStore; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardWatchEventKinds; +import java.nio.file.WatchEvent; +import java.nio.file.WatchKey; +import java.nio.file.WatchService; +import java.time.Duration; +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.function.Supplier; + +public final class DirectoryWatcher { + + @FunctionalInterface + interface WatchServiceFactory { + WatchService create() throws IOException; + } + + private static final Logger LOG = LoggerFactory.getLogger(DirectoryWatcher.class); + private static final long SHUTDOWN_TIMEOUT_SECONDS = 10; + private static final Set NETWORK_FS_TYPES = Set.of( + "nfs", "nfs4", "cifs", "smb", "smb2", "fuse.sshfs", "afs", "9p" + ); + + private final GlobPathResolver globPathResolver; + private final FileReaderPool readerPool; + private final CheckpointRegistry checkpointRegistry; + private final FileSourceConfig config; + private final FileSystemOperations fileOps; + private final FileMetrics metrics; + private final Duration rotateWait; + private final boolean closeRemoved; + private final Set knownFiles; + private final WatchServiceFactory watchServiceFactory; + private final Supplier pollSchedulerSupplier; + private final Predicate networkFsCheck; + private final boolean macOS; + + private volatile WatchService watchService; + private volatile Thread watchThread; + private volatile ScheduledExecutorService pollScheduler; + private volatile boolean running; + + public DirectoryWatcher(final GlobPathResolver globPathResolver, + final FileReaderPool readerPool, + final CheckpointRegistry checkpointRegistry, + final FileSourceConfig config, + final FileSystemOperations fileOps, + final FileMetrics metrics, + final Duration rotateWait, + final boolean closeRemoved) { + this(globPathResolver, readerPool, checkpointRegistry, config, fileOps, metrics, rotateWait, closeRemoved, + () -> FileSystems.getDefault().newWatchService(), isMacOS()); + } + + DirectoryWatcher(final GlobPathResolver globPathResolver, + final FileReaderPool readerPool, + final CheckpointRegistry checkpointRegistry, + final FileSourceConfig config, + final FileSystemOperations fileOps, + final FileMetrics metrics, + final Duration rotateWait, + final boolean closeRemoved, + final WatchServiceFactory watchServiceFactory) { + this(globPathResolver, readerPool, checkpointRegistry, config, fileOps, metrics, rotateWait, closeRemoved, + watchServiceFactory, isMacOS()); + } + + DirectoryWatcher(final GlobPathResolver globPathResolver, + final FileReaderPool readerPool, + final CheckpointRegistry checkpointRegistry, + final FileSourceConfig config, + final FileSystemOperations fileOps, + final FileMetrics metrics, + final Duration rotateWait, + final boolean closeRemoved, + final WatchServiceFactory watchServiceFactory, + final boolean macOS) { + this(globPathResolver, readerPool, checkpointRegistry, config, fileOps, metrics, rotateWait, closeRemoved, + watchServiceFactory, macOS, DirectoryWatcher::createDefaultPollScheduler, + DirectoryWatcher::isNetworkFilesystem); + } + + DirectoryWatcher(final GlobPathResolver globPathResolver, + final FileReaderPool readerPool, + final CheckpointRegistry checkpointRegistry, + final FileSourceConfig config, + final FileSystemOperations fileOps, + final FileMetrics metrics, + final Duration rotateWait, + final boolean closeRemoved, + final WatchServiceFactory watchServiceFactory, + final boolean macOS, + final Supplier pollSchedulerSupplier) { + this(globPathResolver, readerPool, checkpointRegistry, config, fileOps, metrics, rotateWait, closeRemoved, + watchServiceFactory, macOS, pollSchedulerSupplier, + DirectoryWatcher::isNetworkFilesystem); + } + + DirectoryWatcher(final GlobPathResolver globPathResolver, + final FileReaderPool readerPool, + final CheckpointRegistry checkpointRegistry, + final FileSourceConfig config, + final FileSystemOperations fileOps, + final FileMetrics metrics, + final Duration rotateWait, + final boolean closeRemoved, + final WatchServiceFactory watchServiceFactory, + final boolean macOS, + final Supplier pollSchedulerSupplier, + final Predicate networkFsCheck) { + this.globPathResolver = Objects.requireNonNull(globPathResolver, "globPathResolver must not be null"); + this.readerPool = Objects.requireNonNull(readerPool, "readerPool must not be null"); + this.checkpointRegistry = Objects.requireNonNull(checkpointRegistry, "checkpointRegistry must not be null"); + this.config = Objects.requireNonNull(config, "config must not be null"); + this.fileOps = Objects.requireNonNull(fileOps, "fileOps must not be null"); + this.metrics = Objects.requireNonNull(metrics, "metrics must not be null"); + this.rotateWait = Objects.requireNonNull(rotateWait, "rotateWait must not be null"); + this.closeRemoved = closeRemoved; + this.knownFiles = ConcurrentHashMap.newKeySet(); + this.watchServiceFactory = Objects.requireNonNull(watchServiceFactory, "watchServiceFactory must not be null"); + this.macOS = macOS; + this.pollSchedulerSupplier = Objects.requireNonNull(pollSchedulerSupplier, "pollSchedulerSupplier must not be null"); + this.networkFsCheck = Objects.requireNonNull(networkFsCheck, "networkFsCheck must not be null"); + } + + public void start() { + running = true; + + final Set initialFiles = globPathResolver.resolve(); + knownFiles.addAll(initialFiles); + for (final Path file : initialFiles) { + addFileToPool(file); + } + + final boolean useWatchService = shouldUseWatchService(); + startPollScheduler(useWatchService); + if (useWatchService) { + startWatchService(); + } + } + + public void stop() { + running = false; + + if (watchThread != null) { + watchThread.interrupt(); + } + + if (watchService != null) { + try { + watchService.close(); + } catch (final IOException e) { + LOG.warn("Error closing WatchService", e); + } + } + + if (pollScheduler != null) { + pollScheduler.shutdown(); + try { + if (!pollScheduler.awaitTermination(SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + pollScheduler.shutdownNow(); + } + } catch (final InterruptedException e) { + pollScheduler.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + + if (watchThread != null) { + try { + watchThread.join(TimeUnit.SECONDS.toMillis(SHUTDOWN_TIMEOUT_SECONDS)); + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + private boolean shouldUseWatchService() { + final Set watchDirs = globPathResolver.getWatchDirectories(); + for (final Path dir : watchDirs) { + if (Files.isDirectory(dir) && networkFsCheck.test(dir)) { + LOG.info("Network filesystem detected at {}. Using polling only.", dir); + return false; + } + } + return true; + } + + static boolean isNetworkFilesystem(final Path path) { + try { + final FileStore fileStore = Files.getFileStore(path); + final String fsType = fileStore.type().toLowerCase(); + return NETWORK_FS_TYPES.contains(fsType); + } catch (final IOException e) { + LOG.warn("Unable to determine filesystem type for {}. Assuming local.", path); + return false; + } + } + + private static boolean isMacOS() { + return System.getProperty("os.name").toLowerCase().contains("mac"); + } + + private void startWatchService() { + try { + watchService = watchServiceFactory.create(); + final Set watchDirs = globPathResolver.getWatchDirectories(); + for (final Path dir : watchDirs) { + if (Files.isDirectory(dir)) { + registerDirectory(dir); + } + } + + watchThread = new Thread(this::watchLoop, "file-watcher"); + watchThread.setDaemon(true); + watchThread.start(); + LOG.info("WatchService started for {} directories", watchDirs.size()); + } catch (final IOException | RuntimeException e) { + LOG.error("Failed to create WatchService. Falling back to polling only.", e); + watchService = null; + } + } + + private void registerDirectory(final Path dir) { + try { + dir.register(watchService, + StandardWatchEventKinds.ENTRY_CREATE, + StandardWatchEventKinds.ENTRY_DELETE, + StandardWatchEventKinds.OVERFLOW); + LOG.debug("Registered WatchService for directory: {}", dir); + } catch (final IOException e) { + LOG.warn("Failed to register WatchService for directory: {}", dir, e); + } + } + + static ScheduledExecutorService createDefaultPollScheduler() { + return Executors.newSingleThreadScheduledExecutor(r -> { + final Thread thread = new Thread(r, "file-poll"); + thread.setDaemon(true); + return thread; + }); + } + + private void startPollScheduler(final boolean watchServiceActive) { + pollScheduler = pollSchedulerSupplier.get(); + + final long intervalMillis; + if (!watchServiceActive || macOS) { + intervalMillis = config.getPollInterval().toMillis(); + LOG.info("Poll scheduler started with interval {}ms (primary mode)", intervalMillis); + } else { + final int supplementaryPollMultiplier = 10; + intervalMillis = config.getPollInterval().toMillis() * supplementaryPollMultiplier; + LOG.info("Poll scheduler started with interval {}ms (supplementary mode)", intervalMillis); + } + + pollScheduler.scheduleAtFixedRate(this::pollScan, intervalMillis, intervalMillis, TimeUnit.MILLISECONDS); + } + + private void watchLoop() { + while (running) { + try { + final WatchKey key = watchService.take(); + for (final WatchEvent event : key.pollEvents()) { + handleWatchEvent(key, event); + } + if (!key.reset()) { + LOG.warn("WatchKey no longer valid. Directory may have been deleted."); + } + } catch (final ClosedWatchServiceException e) { + LOG.debug("WatchService closed, exiting watch loop"); + break; + } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + } + + @SuppressWarnings("unchecked") + private void handleWatchEvent(final WatchKey key, final WatchEvent event) { + final WatchEvent.Kind kind = event.kind(); + + if (kind == StandardWatchEventKinds.OVERFLOW) { + LOG.warn("WatchService OVERFLOW detected. Triggering full rescan."); + triggerFullRescan(); + return; + } + + final Path watchedDir = (Path) key.watchable(); + final WatchEvent pathEvent = (WatchEvent) event; + final Path child = watchedDir.resolve(pathEvent.context()).toAbsolutePath().normalize(); + + if (kind == StandardWatchEventKinds.ENTRY_CREATE) { + if (Files.isRegularFile(child) && globPathResolver.matches(child)) { + LOG.debug("New file detected via WatchService: {}", child); + knownFiles.add(child); + addFileToPool(child); + } + } else if (kind == StandardWatchEventKinds.ENTRY_DELETE) { + if (knownFiles.contains(child)) { + LOG.debug("File deletion detected via WatchService: {}. Waiting {} before treating as removed.", child, rotateWait); + try { + pollScheduler.schedule(() -> handleDeletion(child), rotateWait.toMillis(), TimeUnit.MILLISECONDS); + } catch (final RejectedExecutionException e) { + LOG.debug("Poll scheduler rejected deletion handling (shutting down)"); + } + } + } + } + + private void triggerFullRescan() { + try { + pollScheduler.execute(this::pollScan); + } catch (final RejectedExecutionException e) { + LOG.debug("Poll scheduler rejected rescan (shutting down)"); + } + } + + private void handleDeletion(final Path file) { + if (!Files.exists(file)) { + knownFiles.remove(file); + if (closeRemoved) { + LOG.info("File confirmed removed after rotate_wait. Closing reader for: {}", file); + readerPool.closeReaderForPath(file); + } else { + LOG.info("File confirmed removed after rotate_wait. closeRemoved=false, keeping reader open for: {}", file); + } + } else { + LOG.debug("File reappeared during rotate_wait period (likely rotation): {}", file); + } + } + + void pollScan() { + if (!running) { + return; + } + + try { + readerPool.closeInactiveReaders(); + + final Set currentFiles = globPathResolver.resolve(); + + final Set newFiles = new HashSet<>(currentFiles); + newFiles.removeAll(knownFiles); + for (final Path file : newFiles) { + LOG.debug("New file detected via poll scan: {}", file); + addFileToPool(file); + } + + final Set vanishedFiles = new HashSet<>(knownFiles); + vanishedFiles.removeAll(currentFiles); + for (final Path file : vanishedFiles) { + LOG.debug("File vanished detected via poll scan: {}. Deferring by rotateWait.", file); + try { + pollScheduler.schedule(() -> handleDeletion(file), rotateWait.toMillis(), TimeUnit.MILLISECONDS); + } catch (final RejectedExecutionException e) { + LOG.debug("Poll scheduler rejected vanished file handling (shutting down)"); + } + } + + knownFiles.addAll(currentFiles); + } catch (final RuntimeException e) { + LOG.error("Error during poll scan", e); + } + } + + private void addFileToPool(final Path file) { + try { + final FileIdentity identity = FileIdentity.from(file, fileOps, config.getFingerprintBytes()); + readerPool.addFile(identity, file); + } catch (final RuntimeException e) { + LOG.warn("Failed to add file to reader pool: {}", file, e); + } + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileFormat.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileFormat.java new file mode 100644 index 0000000000..8f79397031 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileFormat.java @@ -0,0 +1,51 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public enum FileFormat { + + PLAIN("plain"), + JSON("json"); + + private static final Map NAMES_MAP = Stream.of(values()) + .collect(Collectors.toMap(FileFormat::toString, v -> v)); + + private final String name; + + FileFormat(final String name) { + this.name = name; + } + + @JsonValue + @Override + public String toString() { + return this.name; + } + + @JsonCreator + public static FileFormat fromString(final String name) { + if (name == null) { + throw new IllegalArgumentException("Invalid format: null. Valid values are: " + NAMES_MAP.keySet()); + } + final FileFormat value = NAMES_MAP.get(name.toLowerCase()); + if (value == null) { + throw new IllegalArgumentException("Invalid format: " + name + ". Valid values are: " + NAMES_MAP.keySet()); + } + return value; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileIdentity.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileIdentity.java new file mode 100644 index 0000000000..d1247a3b92 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileIdentity.java @@ -0,0 +1,106 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Objects; +import java.util.zip.CRC32; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public final class FileIdentity { + + private static final Logger LOG = LoggerFactory.getLogger(FileIdentity.class); + private final String identity; + private final Path path; + + private FileIdentity(final String identity, final Path path) { + this.identity = Objects.requireNonNull(identity, "identity must not be null"); + this.path = Objects.requireNonNull(path, "path must not be null"); + } + + public static FileIdentity from(final Path path, final FileSystemOperations fileOps, final int fingerprintBytes) { + Objects.requireNonNull(path, "path must not be null"); + Objects.requireNonNull(fileOps, "fileOps must not be null"); + if (fingerprintBytes <= 0) { + throw new IllegalArgumentException("fingerprintBytes must be positive"); + } + + try { + final BasicFileAttributes attrs = fileOps.readAttributes(path); + final Object fileKey = attrs.fileKey(); + if (fileKey != null) { + final String creationTime = attrs.creationTime().toString(); + return new FileIdentity("inode:" + fileKey + ":created:" + creationTime, path); + } + return buildFallbackIdentity(path, fileOps, attrs, fingerprintBytes); + } catch (final IOException e) { + LOG.warn("Failed to read file attributes for {}. Using path-based identity which disables rotation detection.", path, e); + return new FileIdentity("path:" + path.toAbsolutePath(), path); + } + } + + private static FileIdentity buildFallbackIdentity(final Path path, final FileSystemOperations fileOps, + final BasicFileAttributes attrs, final int fingerprintBytes) { + long fileSize; + try { + fileSize = fileOps.size(path); + } catch (final IOException e) { + LOG.warn("Failed to read file size for {}. Using path-based identity.", path, e); + return new FileIdentity("path:" + path.toAbsolutePath(), path); + } + + if (fileSize == 0) { + return new FileIdentity("path:" + path.toAbsolutePath(), path); + } + + final int bytesToRead = (int) Math.min(fingerprintBytes, fileSize); + long crcValue = 0; + try { + final byte[] bytes = fileOps.readBytes(path, bytesToRead); + final CRC32 crc32 = new CRC32(); + crc32.update(bytes); + crcValue = crc32.getValue(); + } catch (final IOException e) { + LOG.warn("Failed to read fingerprint bytes for {}. Using path-based identity.", path, e); + return new FileIdentity("path:" + path.toAbsolutePath(), path); + } + + final String creationTime = attrs.creationTime().toString(); + final String id = "crc:" + crcValue + ":created:" + creationTime; + return new FileIdentity(id, path); + } + + public Path getPath() { + return path; + } + + @Override + public boolean equals(final Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + final FileIdentity that = (FileIdentity) o; + return identity.equals(that.identity); + } + + @Override + public int hashCode() { + return identity.hashCode(); + } + + @Override + public String toString() { + return "FileIdentity{" + identity + ", path=" + path + "}"; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileMetrics.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileMetrics.java new file mode 100644 index 0000000000..335205a79d --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileMetrics.java @@ -0,0 +1,128 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.Timer; +import org.opensearch.dataprepper.metrics.PluginMetrics; + +import java.util.concurrent.atomic.AtomicLong; + +public final class FileMetrics { + + private final Counter linesRead; + private final Counter bytesRead; + private final Counter linesTruncated; + private final Counter filesOpened; + private final Counter filesClosed; + private final Counter filesRotated; + private final Counter readErrors; + private final Counter writeTimeouts; + private final Counter checkpointFlushes; + private final Counter checkpointErrors; + private final Counter eventsEmitted; + private final Counter dataLossEvents; + private final Counter acknowledgmentFailures; + private final Counter truncationEvents; + private final Timer backpressureTimer; + private final AtomicLong activeFileCount; + private final AtomicLong fileLagBytes; + + public FileMetrics(final PluginMetrics pluginMetrics) { + this.linesRead = pluginMetrics.counter("linesRead"); + this.bytesRead = pluginMetrics.counter("bytesRead"); + this.linesTruncated = pluginMetrics.counter("linesTruncated"); + this.filesOpened = pluginMetrics.counter("filesOpened"); + this.filesClosed = pluginMetrics.counter("filesClosed"); + this.filesRotated = pluginMetrics.counter("filesRotated"); + this.readErrors = pluginMetrics.counter("readErrors"); + this.writeTimeouts = pluginMetrics.counter("writeTimeouts"); + this.checkpointFlushes = pluginMetrics.counter("checkpointFlushes"); + this.checkpointErrors = pluginMetrics.counter("checkpointErrors"); + this.eventsEmitted = pluginMetrics.counter("eventsEmitted"); + this.dataLossEvents = pluginMetrics.counter("dataLossEvents"); + this.acknowledgmentFailures = pluginMetrics.counter("acknowledgmentFailures"); + this.truncationEvents = pluginMetrics.counter("truncationEvents"); + this.backpressureTimer = pluginMetrics.timer("backpressureTime"); + this.activeFileCount = new AtomicLong(0); + pluginMetrics.gauge("activeFiles", activeFileCount); + this.fileLagBytes = new AtomicLong(0); + pluginMetrics.gauge("fileLagBytes", fileLagBytes); + } + + public Counter getLinesRead() { + return linesRead; + } + + public Counter getBytesRead() { + return bytesRead; + } + + public Counter getLinesTruncated() { + return linesTruncated; + } + + public Counter getFilesOpened() { + return filesOpened; + } + + public Counter getFilesClosed() { + return filesClosed; + } + + public Counter getFilesRotated() { + return filesRotated; + } + + public Counter getReadErrors() { + return readErrors; + } + + public Counter getWriteTimeouts() { + return writeTimeouts; + } + + public Counter getCheckpointFlushes() { + return checkpointFlushes; + } + + public Counter getCheckpointErrors() { + return checkpointErrors; + } + + public AtomicLong getActiveFileCount() { + return activeFileCount; + } + + public Counter getEventsEmitted() { + return eventsEmitted; + } + + public Timer getBackpressureTimer() { + return backpressureTimer; + } + + public AtomicLong getFileLagBytes() { + return fileLagBytes; + } + + public Counter getDataLossEvents() { + return dataLossEvents; + } + + public Counter getAcknowledgmentFailures() { + return acknowledgmentFailures; + } + + public Counter getTruncationEvents() { + return truncationEvents; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReader.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReader.java new file mode 100644 index 0000000000..7b0aaf6cd8 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReader.java @@ -0,0 +1,529 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSet; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; +import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.codec.DecompressionEngine; +import org.opensearch.dataprepper.model.codec.InputCodec; +import org.opensearch.dataprepper.model.event.Event; +import org.opensearch.dataprepper.model.event.EventBuilder; +import org.opensearch.dataprepper.model.event.EventFactory; +import org.opensearch.dataprepper.model.record.Record; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +public final class FileReader implements Runnable { + + private static final Logger LOG = LoggerFactory.getLogger(FileReader.class); + private static final long BACK_PRESSURE_SLEEP_MILLIS = 100; + private static final String MESSAGE_KEY = "message"; + private static final String EVENT_TYPE = "event"; + private static final String FILE_KEY = "file"; + private static final String FILE_PATH_KEY = "path"; + private static final String FILE_NAME_KEY = "name"; + private static final String OFFSET_KEY = "offset"; + + private final FileIdentity fileIdentity; + private final Path path; + private final Buffer> buffer; + private final EventFactory eventFactory; + private final FileSystemOperations fileOps; + private final CheckpointEntry checkpointEntry; + private final FileMetrics metrics; + private final Charset encoding; + private final int readBufferSize; + private final int maxLineLength; + private final int writeTimeout; + private final Duration maxReadTimePerFile; + private final boolean includeFileMetadata; + private final Runnable onComplete; + private final RotationDetector rotationDetector; + private final AcknowledgementSetManager acknowledgementSetManager; + private final boolean acknowledgementsEnabled; + private final Duration acknowledgmentTimeout; + private final int batchSize; + private final StartPosition startPosition; + private final Duration rotationDrainTimeout; + private final Duration batchTimeout; + private final int maxAcknowledgmentRetries; + private final InputCodec codec; + private final boolean tailMode; + private final DecompressionEngine decompressionEngine; + + private final AtomicLong readOffset; + private final StringBuilder partialLine; + private final String cachedAbsolutePath; + private volatile long lastActivityMillis; + private boolean skippingToNewline; + private volatile long currentBatchEndOffset; + + private AcknowledgementSet currentAckSet; + private int currentBatchCount; + private long batchStartOffset; + private long batchOpenedAtMillis; + private final AtomicInteger acknowledgmentRetryCount = new AtomicInteger(0); + private volatile RotationType lastRotationType; + + public FileReader(final Path path, + final FileIdentity fileIdentity, + final CheckpointEntry checkpointEntry, + final FileReaderContext context, + final Runnable onComplete) { + this.path = Objects.requireNonNull(path, "path must not be null"); + this.fileIdentity = Objects.requireNonNull(fileIdentity, "fileIdentity must not be null"); + this.checkpointEntry = Objects.requireNonNull(checkpointEntry, "checkpointEntry must not be null"); + Objects.requireNonNull(context, "context must not be null"); + this.onComplete = Objects.requireNonNull(onComplete, "onComplete must not be null"); + + this.buffer = context.getBuffer(); + this.eventFactory = context.getEventFactory(); + this.fileOps = context.getFileOps(); + this.metrics = context.getMetrics(); + this.encoding = context.getEncoding(); + this.readBufferSize = context.getReadBufferSize(); + this.maxLineLength = context.getMaxLineLength(); + this.writeTimeout = context.getWriteTimeout(); + this.maxReadTimePerFile = context.getMaxReadTimePerFile(); + this.includeFileMetadata = context.isIncludeFileMetadata(); + this.rotationDetector = context.getRotationDetector(); + this.acknowledgementSetManager = context.getAcknowledgementSetManager(); + this.acknowledgementsEnabled = context.isAcknowledgementsEnabled(); + this.acknowledgmentTimeout = context.getAcknowledgmentTimeout(); + this.batchSize = context.getBatchSize(); + this.startPosition = context.getStartPosition(); + this.rotationDrainTimeout = context.getRotationDrainTimeout(); + this.batchTimeout = context.getBatchTimeout(); + this.maxAcknowledgmentRetries = context.getMaxAcknowledgmentRetries(); + this.codec = context.getCodec(); + this.tailMode = context.isTailMode(); + this.decompressionEngine = context.getDecompressionEngine(); + + this.readOffset = new AtomicLong(checkpointEntry.getReadOffset()); + if (checkpointEntry.getReadOffset() == 0 && startPosition == StartPosition.END) { + try { + final long fileSize = fileOps.size(path); + this.readOffset.set(fileSize); + checkpointEntry.setReadOffset(fileSize); + } catch (final IOException e) { + LOG.warn("Unable to determine file size for start_position=end on {}. Starting from offset 0.", path); + } + } + this.partialLine = new StringBuilder(); + this.cachedAbsolutePath = path.toAbsolutePath().toString(); + this.currentBatchCount = 0; + this.batchStartOffset = readOffset.get(); + this.batchOpenedAtMillis = System.currentTimeMillis(); + this.lastRotationType = RotationType.NO_ROTATION; + this.lastActivityMillis = System.currentTimeMillis(); + } + + @Override + public void run() { + try { + if (!tailMode && codec != null) { + readFileWithCodecOneShot(); + return; + } + + final RotationResult rotation = rotationDetector.checkRotation(path, fileIdentity, readOffset.get()); + lastRotationType = rotation.getRotationType(); + + final RotationType rotationType = rotation.getRotationType(); + + if (rotationType == RotationType.COPYTRUNCATE) { + LOG.info("Copytruncate detected for {}. Resetting offset to 0.", path); + metrics.getFilesRotated().increment(); + metrics.getTruncationEvents().increment(); + completePendingAckSet(); + readOffset.set(0); + checkpointEntry.setReadOffset(0); + batchStartOffset = 0; + readFile(); + } else if (rotationType == RotationType.DELETED) { + LOG.info("File deleted: {}. Closing reader.", path); + completePendingAckSet(); + } else if (rotationType == RotationType.CREATE_RENAME) { + LOG.info("Create/rename rotation detected for {}. Draining current file.", path); + metrics.getFilesRotated().increment(); + drainCurrentFile(); + completePendingAckSet(); + } else { + readFile(); + } + } catch (final RuntimeException e) { + LOG.error("Error reading file {}", path, e); + metrics.getReadErrors().increment(); + } finally { + flushPartialLine(); + completePendingAckSet(); + onComplete.run(); + } + } + + private void readFileWithCodecOneShot() { + try (final InputStream rawStream = Files.newInputStream(path); + final InputStream decompressedStream = decompressionEngine.createInputStream(rawStream)) { + metrics.getFilesOpened().increment(); + codec.parse(decompressedStream, record -> { + try { + buffer.write((Record) record, writeTimeout); + metrics.getEventsEmitted().increment(); + } catch (final TimeoutException e) { + metrics.getWriteTimeouts().increment(); + throw new RuntimeException(e); + } + }); + } catch (final IOException e) { + LOG.error("Error reading file with codec: {}", path, e); + metrics.getReadErrors().increment(); + } finally { + metrics.getFilesClosed().increment(); + } + } + + private void drainCurrentFile() { + try (final FileChannel channel = fileOps.openReadChannel(path)) { + metrics.getFilesOpened().increment(); + channel.position(readOffset.get()); + readLoop(channel, rotationDrainTimeout.toMillis(), true); + } catch (final NoSuchFileException e) { + LOG.warn("File already removed during drain: {}", path); + } catch (final IOException e) { + LOG.error("IO error draining file {}", path, e); + metrics.getReadErrors().increment(); + } finally { + metrics.getFilesClosed().increment(); + } + } + + private void readFile() { + try (final FileChannel channel = fileOps.openReadChannel(path)) { + metrics.getFilesOpened().increment(); + channel.position(readOffset.get()); + readLoop(channel, maxReadTimePerFile.toMillis(), false); + updateFileLagBytes(); + } catch (final NoSuchFileException e) { + LOG.warn("File not found: {}", path); + } catch (final IOException e) { + LOG.error("IO error reading file {}", path, e); + metrics.getReadErrors().increment(); + } finally { + metrics.getFilesClosed().increment(); + } + } + + private void readLoop(final FileChannel channel, final long timeoutMillis, final boolean isDraining) throws IOException { + final ByteBuffer byteBuffer = ByteBuffer.allocate(readBufferSize); + final long loopStart = System.currentTimeMillis(); + final ByteArrayOutputStream codecAccumulator = codec != null ? new ByteArrayOutputStream() : null; + long codecBytesAccumulated = 0; + final CharsetDecoder decoder = codec == null ? encoding.newDecoder() + .onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE) : null; + final CharBuffer charBuffer = codec == null ? CharBuffer.allocate(readBufferSize) : null; + final ByteBuffer decoderCarryover = codec == null ? ByteBuffer.allocate(8) : null; + + while (!Thread.currentThread().isInterrupted()) { + final long elapsed = System.currentTimeMillis() - loopStart; + if (elapsed >= timeoutMillis) { + if (isDraining) { + long currentFileSize = 0; + try { + currentFileSize = channel.size(); + } catch (final IOException ignored) { + } + if (readOffset.get() < currentFileSize) { + LOG.warn("Rotation drain timeout expired with unread data for {}. Potential data loss.", path); + metrics.getDataLossEvents().increment(); + } + } else { + LOG.debug("Max read time reached for file {}", path); + } + break; + } + + byteBuffer.clear(); + if (decoderCarryover != null && decoderCarryover.position() > 0) { + decoderCarryover.flip(); + byteBuffer.put(decoderCarryover); + decoderCarryover.clear(); + } + final int bytesRead = channel.read(byteBuffer); + if (bytesRead <= 0 && byteBuffer.position() == 0) { + break; + } + + final int totalBytes = byteBuffer.position(); + metrics.getBytesRead().increment(Math.max(0, bytesRead)); + byteBuffer.flip(); + + if (codec != null) { + final byte[] bytes = new byte[byteBuffer.remaining()]; + byteBuffer.get(bytes); + codecAccumulator.write(bytes); + codecBytesAccumulated += bytes.length; + } else { + charBuffer.clear(); + final CoderResult result = decoder.decode(byteBuffer, charBuffer, false); + if (result.isUnderflow() && byteBuffer.hasRemaining()) { + decoderCarryover.put(byteBuffer); + } + charBuffer.flip(); + if (charBuffer.hasRemaining()) { + processChunk(charBuffer.toString()); + } + } + + readOffset.addAndGet(Math.max(0, bytesRead)); + lastActivityMillis = System.currentTimeMillis(); + + if (acknowledgementsEnabled && currentAckSet != null && currentBatchCount > 0) { + final long batchAge = System.currentTimeMillis() - batchOpenedAtMillis; + if (batchAge >= batchTimeout.toMillis()) { + completePendingAckSet(); + } + } + } + + if (codec != null && codecAccumulator.size() > 0) { + if (parseWithCodec(codecAccumulator.toByteArray())) { + checkpointEntry.setReadOffset(readOffset.get()); + } + } else if (codec == null) { + final int carryoverBytes = decoderCarryover.position(); + checkpointEntry.setReadOffset(readOffset.get() - carryoverBytes); + } + } + + private void updateFileLagBytes() { + try { + final long currentFileSize = fileOps.size(path); + final long lag = currentFileSize - readOffset.get(); + metrics.getFileLagBytes().set(Math.max(0, lag)); + } catch (final IOException e) { + LOG.debug("Unable to determine file size for lag calculation on {}", path); + } + } + + private void processChunk(final String chunk) { + int start = 0; + for (int i = 0; i < chunk.length(); i++) { + if (chunk.charAt(i) == '\n') { + if (skippingToNewline) { + skippingToNewline = false; + start = i + 1; + continue; + } + final String segment = chunk.substring(start, i); + partialLine.append(segment); + final String line = partialLine.length() > maxLineLength + ? partialLine.substring(0, maxLineLength) + : partialLine.toString(); + if (partialLine.length() > maxLineLength) { + metrics.getLinesTruncated().increment(); + } + emitLine(line); + partialLine.setLength(0); + start = i + 1; + } + } + if (!skippingToNewline && start < chunk.length()) { + partialLine.append(chunk, start, chunk.length()); + } + + if (partialLine.length() > maxLineLength) { + emitLine(partialLine.substring(0, maxLineLength)); + partialLine.setLength(0); + skippingToNewline = true; + metrics.getLinesTruncated().increment(); + } + } + + private void flushPartialLine() { + if (partialLine.length() > 0) { + emitLine(partialLine.toString()); + partialLine.setLength(0); + } + } + + private boolean parseWithCodec(final byte[] bytes) { + try { + codec.parse(new ByteArrayInputStream(bytes), record -> { + emitCodecRecord(record); + }); + return true; + } catch (final IOException e) { + LOG.error("Codec parse error for file {}", path, e); + metrics.getReadErrors().increment(); + return false; + } + } + + @SuppressWarnings("unchecked") + private void emitCodecRecord(final Record record) { + final Record objectRecord = (Record) (Record) record; + final Event event = record.getData(); + writeRecordWithRetry(objectRecord, event); + } + + private void emitLine(final String line) { + final Map data = new HashMap<>(); + data.put(MESSAGE_KEY, line); + if (includeFileMetadata) { + final Map fileMetadata = new HashMap<>(); + fileMetadata.put(FILE_PATH_KEY, cachedAbsolutePath); + fileMetadata.put(FILE_NAME_KEY, path.getFileName().toString()); + data.put(FILE_KEY, fileMetadata); + data.put(OFFSET_KEY, readOffset.get()); + } + + final Event event = eventFactory.eventBuilder(EventBuilder.class) + .withEventType(EVENT_TYPE) + .withData(data) + .build(); + + final Record record = new Record<>(event); + writeRecordWithRetry(record, event); + } + + private void writeRecordWithRetry(final Record record, final Event event) { + boolean written = false; + long backpressureStartNanos = 0; + boolean backpressureActive = false; + final long maxRetryMillis = maxReadTimePerFile.toMillis(); + final long retryStart = System.currentTimeMillis(); + while (!written && !Thread.currentThread().isInterrupted()) { + if (System.currentTimeMillis() - retryStart > maxRetryMillis) { + LOG.warn("Backpressure retry timeout exceeded for file {}. Event may be lost.", path); + metrics.getDataLossEvents().increment(); + break; + } + try { + buffer.write(record, writeTimeout); + written = true; + metrics.getLinesRead().increment(); + metrics.getEventsEmitted().increment(); + + if (backpressureActive) { + final long backpressureElapsedNanos = System.nanoTime() - backpressureStartNanos; + metrics.getBackpressureTimer().record(backpressureElapsedNanos, TimeUnit.NANOSECONDS); + } + + if (acknowledgementsEnabled && acknowledgementSetManager != null) { + ensureAckSet(); + currentAckSet.add(event); + currentBatchCount++; + if (currentBatchCount >= batchSize) { + completePendingAckSet(); + } + } + } catch (final TimeoutException e) { + if (!backpressureActive) { + backpressureStartNanos = System.nanoTime(); + backpressureActive = true; + } + metrics.getWriteTimeouts().increment(); + LOG.debug("Back pressure from buffer, retrying for file {}", path); + try { + Thread.sleep(BACK_PRESSURE_SLEEP_MILLIS); + } catch (final InterruptedException ie) { + Thread.currentThread().interrupt(); + return; + } + } + } + } + + private void ensureAckSet() { + if (currentAckSet == null) { + final long capturedBatchStart = readOffset.get(); + batchStartOffset = capturedBatchStart; + batchOpenedAtMillis = System.currentTimeMillis(); + currentAckSet = acknowledgementSetManager.create( + result -> handleAcknowledgement(result, capturedBatchStart, currentBatchEndOffset), + acknowledgmentTimeout); + } + } + + private void completePendingAckSet() { + if (currentAckSet != null) { + currentBatchEndOffset = readOffset.get(); + currentAckSet.complete(); + currentAckSet = null; + currentBatchCount = 0; + } + } + + private void handleAcknowledgement(final boolean result, final long batchStart, final long batchEnd) { + if (result) { + acknowledgmentRetryCount.set(0); + checkpointEntry.setCommittedOffset(batchEnd); + LOG.debug("Positive acknowledgement for file {} offset range [{}, {}]", path, batchStart, batchEnd); + } else { + final int retryCount = acknowledgmentRetryCount.incrementAndGet(); + metrics.getAcknowledgmentFailures().increment(); + if (retryCount > maxAcknowledgmentRetries) { + LOG.error("Exceeded max acknowledgment retries ({}) for file {} offset range [{}, {}]. Advancing offset to avoid infinite retry.", + maxAcknowledgmentRetries, path, batchStart, batchEnd); + checkpointEntry.setCommittedOffset(batchEnd); + acknowledgmentRetryCount.set(0); + } else { + LOG.warn("Negative acknowledgement for file {} offset range [{}, {}]. Retry {}/{}.", + path, batchStart, batchEnd, retryCount, maxAcknowledgmentRetries); + } + } + } + + public FileIdentity getFileIdentity() { + return fileIdentity; + } + + public Path getPath() { + return path; + } + + public long getReadOffset() { + return readOffset.get(); + } + + public RotationType getLastRotationType() { + return lastRotationType; + } + + public long getLastActivityMillis() { + return lastActivityMillis; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReaderContext.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReaderContext.java new file mode 100644 index 0000000000..36ca0aabc1 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReaderContext.java @@ -0,0 +1,182 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; +import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.codec.DecompressionEngine; +import org.opensearch.dataprepper.model.codec.InputCodec; +import org.opensearch.dataprepper.model.event.EventFactory; +import org.opensearch.dataprepper.model.record.Record; + +import java.nio.charset.Charset; +import java.time.Duration; +import java.util.Objects; + +public final class FileReaderContext { + + private final Buffer> buffer; + private final EventFactory eventFactory; + private final FileSystemOperations fileOps; + private final FileMetrics metrics; + private final RotationDetector rotationDetector; + private final AcknowledgementSetManager acknowledgementSetManager; + private final boolean acknowledgementsEnabled; + private final Charset encoding; + private final int readBufferSize; + private final int maxLineLength; + private final int writeTimeout; + private final Duration maxReadTimePerFile; + private final Duration rotationDrainTimeout; + private final StartPosition startPosition; + private final boolean includeFileMetadata; + private final Duration acknowledgmentTimeout; + private final int batchSize; + private final Duration batchTimeout; + private final int maxAcknowledgmentRetries; + private final InputCodec codec; + private final boolean tailMode; + private final DecompressionEngine decompressionEngine; + + public FileReaderContext(final Buffer> buffer, + final EventFactory eventFactory, + final FileSystemOperations fileOps, + final FileMetrics metrics, + final RotationDetector rotationDetector, + final AcknowledgementSetManager acknowledgementSetManager, + final boolean acknowledgementsEnabled, + final Charset encoding, + final int readBufferSize, + final int maxLineLength, + final int writeTimeout, + final Duration maxReadTimePerFile, + final Duration rotationDrainTimeout, + final StartPosition startPosition, + final boolean includeFileMetadata, + final Duration acknowledgmentTimeout, + final int batchSize, + final Duration batchTimeout, + final int maxAcknowledgmentRetries, + final InputCodec codec, + final boolean tailMode, + final DecompressionEngine decompressionEngine) { + this.buffer = Objects.requireNonNull(buffer, "buffer must not be null"); + this.eventFactory = Objects.requireNonNull(eventFactory, "eventFactory must not be null"); + this.fileOps = Objects.requireNonNull(fileOps, "fileOps must not be null"); + this.metrics = Objects.requireNonNull(metrics, "metrics must not be null"); + this.rotationDetector = Objects.requireNonNull(rotationDetector, "rotationDetector must not be null"); + this.acknowledgementSetManager = acknowledgementSetManager; + this.acknowledgementsEnabled = acknowledgementsEnabled; + this.encoding = Objects.requireNonNull(encoding, "encoding must not be null"); + this.readBufferSize = readBufferSize; + this.maxLineLength = maxLineLength; + this.writeTimeout = writeTimeout; + this.maxReadTimePerFile = Objects.requireNonNull(maxReadTimePerFile, "maxReadTimePerFile must not be null"); + this.rotationDrainTimeout = Objects.requireNonNull(rotationDrainTimeout, "rotationDrainTimeout must not be null"); + this.startPosition = Objects.requireNonNull(startPosition, "startPosition must not be null"); + this.includeFileMetadata = includeFileMetadata; + this.acknowledgmentTimeout = Objects.requireNonNull(acknowledgmentTimeout, "acknowledgmentTimeout must not be null"); + this.batchSize = batchSize; + this.batchTimeout = Objects.requireNonNull(batchTimeout, "batchTimeout must not be null"); + this.maxAcknowledgmentRetries = maxAcknowledgmentRetries; + this.codec = codec; + this.tailMode = tailMode; + this.decompressionEngine = decompressionEngine; + } + + public Buffer> getBuffer() { + return buffer; + } + + public EventFactory getEventFactory() { + return eventFactory; + } + + public FileSystemOperations getFileOps() { + return fileOps; + } + + public FileMetrics getMetrics() { + return metrics; + } + + public RotationDetector getRotationDetector() { + return rotationDetector; + } + + public AcknowledgementSetManager getAcknowledgementSetManager() { + return acknowledgementSetManager; + } + + public boolean isAcknowledgementsEnabled() { + return acknowledgementsEnabled; + } + + public Charset getEncoding() { + return encoding; + } + + public int getReadBufferSize() { + return readBufferSize; + } + + public int getMaxLineLength() { + return maxLineLength; + } + + public int getWriteTimeout() { + return writeTimeout; + } + + public Duration getMaxReadTimePerFile() { + return maxReadTimePerFile; + } + + public Duration getRotationDrainTimeout() { + return rotationDrainTimeout; + } + + public StartPosition getStartPosition() { + return startPosition; + } + + public boolean isIncludeFileMetadata() { + return includeFileMetadata; + } + + public Duration getAcknowledgmentTimeout() { + return acknowledgmentTimeout; + } + + public int getBatchSize() { + return batchSize; + } + + public Duration getBatchTimeout() { + return batchTimeout; + } + + public int getMaxAcknowledgmentRetries() { + return maxAcknowledgmentRetries; + } + + public InputCodec getCodec() { + return codec; + } + + public boolean isTailMode() { + return tailMode; + } + + public DecompressionEngine getDecompressionEngine() { + return decompressionEngine; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReaderPool.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReaderPool.java new file mode 100644 index 0000000000..6ab8d9855c --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileReaderPool.java @@ -0,0 +1,212 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.nio.file.Path; +import java.time.Duration; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +public final class FileReaderPool { + + private static final Logger LOG = LoggerFactory.getLogger(FileReaderPool.class); + private static final long SHUTDOWN_TIMEOUT_SECONDS = 30; + private static final long RE_POLL_DELAY_MILLIS = 500; + + private final ConcurrentHashMap activeReaders; + private final Set pendingIdentities; + private final ConcurrentLinkedQueue pendingQueue; + private final ExecutorService executorService; + private final ScheduledExecutorService scheduler; + private final CheckpointRegistry checkpointRegistry; + private final FileMetrics metrics; + private final int maxActiveFiles; + private final Duration closeInactive; + private final FileReaderContext readerContext; + + public FileReaderPool(final CheckpointRegistry checkpointRegistry, + final FileMetrics metrics, + final int maxActiveFiles, + final int readerThreads, + final Duration closeInactive, + final FileReaderContext readerContext) { + this(checkpointRegistry, metrics, maxActiveFiles, closeInactive, readerContext, + () -> Executors.newFixedThreadPool(readerThreads, r -> { + final Thread thread = new Thread(r, "file-reader"); + thread.setDaemon(true); + return thread; + })); + } + + FileReaderPool(final CheckpointRegistry checkpointRegistry, + final FileMetrics metrics, + final int maxActiveFiles, + final Duration closeInactive, + final FileReaderContext readerContext, + final Supplier executorServiceSupplier) { + this.checkpointRegistry = Objects.requireNonNull(checkpointRegistry, "checkpointRegistry must not be null"); + this.metrics = Objects.requireNonNull(metrics, "metrics must not be null"); + this.maxActiveFiles = maxActiveFiles; + this.closeInactive = Objects.requireNonNull(closeInactive, "closeInactive must not be null"); + this.readerContext = Objects.requireNonNull(readerContext, "readerContext must not be null"); + this.activeReaders = new ConcurrentHashMap<>(); + this.pendingIdentities = ConcurrentHashMap.newKeySet(); + this.pendingQueue = new ConcurrentLinkedQueue<>(); + this.executorService = executorServiceSupplier.get(); + this.scheduler = Executors.newSingleThreadScheduledExecutor(r -> { + final Thread thread = new Thread(r, "file-reader-scheduler"); + thread.setDaemon(true); + return thread; + }); + } + + public synchronized void addFile(final FileIdentity fileIdentity, final Path path) { + if (activeReaders.containsKey(fileIdentity) || pendingIdentities.contains(fileIdentity)) { + return; + } + + if (activeReaders.size() < maxActiveFiles) { + submitReader(fileIdentity, path); + } else { + pendingIdentities.add(fileIdentity); + pendingQueue.add(new PendingFile(fileIdentity, path)); + LOG.debug("File queued as pending: {}", path); + } + } + + private synchronized void submitReader(final FileIdentity fileIdentity, final Path path) { + if (executorService.isShutdown() || activeReaders.containsKey(fileIdentity)) { + return; + } + final CheckpointEntry checkpoint = checkpointRegistry.getOrCreate(fileIdentity.toString()); + final FileReader reader = new FileReader( + path, fileIdentity, checkpoint, readerContext, + () -> onReaderComplete(fileIdentity, path)); + activeReaders.put(fileIdentity, reader); + metrics.getActiveFileCount().incrementAndGet(); + try { + executorService.submit(reader); + } catch (final RejectedExecutionException e) { + activeReaders.remove(fileIdentity); + metrics.getActiveFileCount().decrementAndGet(); + LOG.debug("Reader submission rejected for {}", path); + } + } + + private synchronized void onReaderComplete(final FileIdentity fileIdentity, final Path path) { + final FileReader completedReader = activeReaders.remove(fileIdentity); + if (completedReader == null) { + return; + } + metrics.getActiveFileCount().decrementAndGet(); + + if (completedReader.getLastRotationType() == RotationType.CREATE_RENAME) { + LOG.info("Re-adding path {} after create/rename rotation", path); + final FileIdentity newIdentity = FileIdentity.from(path, readerContext.getFileOps(), + readerContext.getRotationDetector().getFingerprintBytes()); + submitReader(newIdentity, path); + } else if (completedReader.getLastRotationType() != RotationType.DELETED) { + final PendingFile next = pendingQueue.poll(); + if (next != null) { + pendingIdentities.remove(next.getFileIdentity()); + submitReader(next.getFileIdentity(), next.getPath()); + pendingQueue.add(new PendingFile(fileIdentity, path)); + pendingIdentities.add(fileIdentity); + } else { + scheduler.schedule(() -> submitReader(fileIdentity, path), RE_POLL_DELAY_MILLIS, TimeUnit.MILLISECONDS); + } + } else { + checkpointRegistry.markCompleted(fileIdentity.toString()); + final PendingFile next = pendingQueue.poll(); + if (next != null) { + pendingIdentities.remove(next.getFileIdentity()); + submitReader(next.getFileIdentity(), next.getPath()); + } + } + } + + public void shutdown() { + scheduler.shutdownNow(); + executorService.shutdown(); + try { + scheduler.awaitTermination(5, TimeUnit.SECONDS); + if (!executorService.awaitTermination(SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + executorService.shutdownNow(); + if (!executorService.awaitTermination(SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { + LOG.warn("Reader pool did not terminate within the allowed time"); + } + } + } catch (final InterruptedException e) { + executorService.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + + public int getActiveReaderCount() { + return activeReaders.size(); + } + + public int getPendingCount() { + return pendingQueue.size(); + } + + public synchronized void closeInactiveReaders() { + final long now = System.currentTimeMillis(); + final long inactiveThresholdMillis = closeInactive.toMillis(); + activeReaders.entrySet().removeIf(entry -> { + final FileReader reader = entry.getValue(); + if ((now - reader.getLastActivityMillis()) >= inactiveThresholdMillis) { + LOG.info("Closing inactive reader for file identity {}", entry.getKey()); + metrics.getActiveFileCount().decrementAndGet(); + metrics.getFilesClosed().increment(); + return true; + } + return false; + }); + promotePendingFiles(); + } + + public synchronized void closeReaderForPath(final Path path) { + final Path absolutePath = path.toAbsolutePath().normalize(); + activeReaders.entrySet().removeIf(entry -> { + final FileReader reader = entry.getValue(); + if (reader.getPath().toAbsolutePath().normalize().equals(absolutePath)) { + LOG.info("Closing reader for removed file: {}", path); + metrics.getActiveFileCount().decrementAndGet(); + metrics.getFilesClosed().increment(); + return true; + } + return false; + }); + promotePendingFiles(); + } + + private void promotePendingFiles() { + while (activeReaders.size() < maxActiveFiles && !pendingQueue.isEmpty()) { + final PendingFile next = pendingQueue.poll(); + if (next != null) { + pendingIdentities.remove(next.getFileIdentity()); + submitReader(next.getFileIdentity(), next.getPath()); + } + } + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSource.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSource.java new file mode 100644 index 0000000000..65826bee85 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSource.java @@ -0,0 +1,317 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.opensearch.dataprepper.metrics.PluginMetrics; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; +import org.opensearch.dataprepper.model.annotations.DataPrepperPlugin; +import org.opensearch.dataprepper.model.annotations.DataPrepperPluginConstructor; +import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.codec.DecompressionEngine; +import org.opensearch.dataprepper.model.codec.InputCodec; +import org.opensearch.dataprepper.model.configuration.PluginModel; +import org.opensearch.dataprepper.model.configuration.PluginSetting; +import org.opensearch.dataprepper.model.event.EventBuilder; +import org.opensearch.dataprepper.model.event.EventFactory; +import org.opensearch.dataprepper.model.plugin.PluginFactory; +import org.opensearch.dataprepper.model.record.Record; +import org.opensearch.dataprepper.model.source.Source; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.charset.Charset; +import java.util.Objects; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeoutException; + +import static java.lang.String.format; +import static org.opensearch.dataprepper.logging.DataPrepperMarkers.SENSITIVE; + +@DataPrepperPlugin(name = "file", pluginType = Source.class, pluginConfigurationType = FileSourceConfig.class) +public class FileSource implements Source> { + private static final String MESSAGE_KEY = "message"; + private static final Logger LOG = LoggerFactory.getLogger(FileSource.class); + private static final TypeReference> MAP_TYPE_REFERENCE = new TypeReference<>() { }; + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final long STOP_WAIT_MILLIS = 200; + private static final int MAX_FILES_PER_THREAD_WARNING_THRESHOLD = 250; + private final FileSourceConfig fileSourceConfig; + private final FileStrategy fileStrategy; + private final EventFactory eventFactory; + private final PluginMetrics pluginMetrics; + private final PluginFactory pluginFactory; + private final DecompressionEngine decompressionEngine; + private final AcknowledgementSetManager acknowledgementSetManager; + private final boolean acknowledgementsEnabled; + + private Thread readThread; + private FileReaderPool readerPool; + private CheckpointRegistry checkpointRegistry; + private DirectoryWatcher directoryWatcher; + + private volatile boolean isStopRequested; + private final int writeTimeout; + + @DataPrepperPluginConstructor + public FileSource( + final FileSourceConfig fileSourceConfig, final PluginMetrics pluginMetrics, final PluginFactory pluginFactory, + final EventFactory eventFactory, final AcknowledgementSetManager acknowledgementSetManager) { + Objects.requireNonNull(fileSourceConfig, "fileSourceConfig must not be null"); + this.eventFactory = Objects.requireNonNull(eventFactory, "eventFactory must not be null"); + this.pluginMetrics = Objects.requireNonNull(pluginMetrics, "pluginMetrics must not be null"); + this.pluginFactory = Objects.requireNonNull(pluginFactory, "pluginFactory must not be null"); + this.acknowledgementsEnabled = fileSourceConfig.isAcknowledgments(); + if (acknowledgementsEnabled) { + Objects.requireNonNull(acknowledgementSetManager, "AcknowledgementSetManager is required when acknowledgments is enabled"); + } + this.acknowledgementSetManager = acknowledgementSetManager; + fileSourceConfig.validate(); + this.fileSourceConfig = fileSourceConfig; + this.isStopRequested = false; + this.writeTimeout = FileSourceConfig.DEFAULT_TIMEOUT; + this.decompressionEngine = fileSourceConfig.getCompression().getDecompressionEngine(); + + if (fileSourceConfig.isLegacyConfig()) { + fileStrategy = new ClassicFileStrategy(); + } else { + fileStrategy = null; + } + } + + @Override + public void start(final Buffer> buffer) { + Objects.requireNonNull(buffer, "Buffer cannot be null for file source to start"); + + if (fileSourceConfig.isLegacyConfig()) { + LOG.info("Starting file source in legacy mode with path: {}", fileSourceConfig.getFilePathToRead()); + readThread = new Thread(() -> { + fileStrategy.start(buffer); + LOG.info("Completed reading file."); + }, "file-source"); + readThread.setDaemon(false); + readThread.start(); + return; + } + + startModernPath(buffer); + } + + private void startModernPath(final Buffer> buffer) { + LOG.info("Starting file source with paths: {}", fileSourceConfig.getAllPaths()); + + final int maxActiveFiles = fileSourceConfig.getMaxActiveFiles(); + final int readerThreads = fileSourceConfig.getEffectiveReaderThreads(); + if (readerThreads > 0 && maxActiveFiles / readerThreads > MAX_FILES_PER_THREAD_WARNING_THRESHOLD) { + LOG.warn("max_active_files ({}) is {} times reader_threads ({}). Files with pending data may experience high latency.", + maxActiveFiles, maxActiveFiles / readerThreads, readerThreads); + } + + try { + final FileMetrics fileMetrics = new FileMetrics(pluginMetrics); + final FileSystemOperations fileOps = new DefaultFileSystemOperations(); + + final String checkpointPath = fileSourceConfig.getCheckpointFile(); + final Path cpFile; + if (checkpointPath != null) { + cpFile = Paths.get(checkpointPath); + } else { + LOG.warn("No checkpoint_file configured. Checkpoint state will not be persisted across restarts."); + cpFile = null; + } + + checkpointRegistry = new CheckpointRegistry( + cpFile, + fileSourceConfig.getCheckpointInterval(), + fileSourceConfig.getCheckpointCleanupAfter()); + + final Charset encoding = Charset.forName(fileSourceConfig.getEncoding()); + + final RotationDetector rotationDetector = new RotationDetector(fileOps, fileSourceConfig.getFingerprintBytes()); + + final InputCodec fileCodec = createCodec(); + + final FileReaderContext readerContext = new FileReaderContext( + buffer, eventFactory, fileOps, fileMetrics, rotationDetector, + acknowledgementSetManager, acknowledgementsEnabled, + encoding, + fileSourceConfig.getReadBufferSize(), + fileSourceConfig.getMaxLineLength(), + writeTimeout, + fileSourceConfig.getMaxReadTimePerFile(), + fileSourceConfig.getRotationDrainTimeout(), + fileSourceConfig.getStartPosition(), + fileSourceConfig.isIncludeFileMetadata(), + fileSourceConfig.getAcknowledgmentTimeout(), + fileSourceConfig.getBatchSize(), + fileSourceConfig.getBatchTimeout(), + fileSourceConfig.getMaxAcknowledgmentRetries(), + fileCodec, + fileSourceConfig.isTail(), + decompressionEngine); + + readerPool = new FileReaderPool( + checkpointRegistry, fileMetrics, + maxActiveFiles, + readerThreads, + fileSourceConfig.getCloseInactive(), + readerContext); + + final GlobPathResolver globPathResolver = new GlobPathResolver( + fileSourceConfig.getAllPaths(), + fileSourceConfig.getExcludePaths()); + + if (fileSourceConfig.isTail()) { + directoryWatcher = new DirectoryWatcher( + globPathResolver, readerPool, checkpointRegistry, + fileSourceConfig, fileOps, fileMetrics, + fileSourceConfig.getRotateWait(), + fileSourceConfig.isCloseRemoved()); + directoryWatcher.start(); + } else { + final Set resolvedPaths = globPathResolver.resolve(); + for (final Path path : resolvedPaths) { + final FileIdentity fileIdentity = FileIdentity.from(path, fileOps, fileSourceConfig.getFingerprintBytes()); + readerPool.addFile(fileIdentity, path); + } + } + } catch (final RuntimeException e) { + shutdownTailingResources(); + throw e; + } + } + + private void shutdownTailingResources() { + if (directoryWatcher != null) { + directoryWatcher.stop(); + } + if (readerPool != null) { + readerPool.shutdown(); + } + if (checkpointRegistry != null) { + checkpointRegistry.shutdown(); + } + } + + private InputCodec createCodec() { + if (fileSourceConfig.getCodec() == null) { + return null; + } + final PluginModel codecConfiguration = fileSourceConfig.getCodec(); + final PluginSetting codecPluginSettings = new PluginSetting( + codecConfiguration.getPluginName(), codecConfiguration.getPluginSettings()); + return pluginFactory.loadPlugin(InputCodec.class, codecPluginSettings); + } + + @Override + public void stop() { + isStopRequested = true; + + shutdownTailingResources(); + + if (readThread != null) { + try { + readThread.join(STOP_WAIT_MILLIS); + } catch (final InterruptedException e) { + readThread.interrupt(); + } + } + } + + @Override + public boolean areAcknowledgementsEnabled() { + return acknowledgementsEnabled; + } + + private interface FileStrategy { + void start(final Buffer> buffer); + } + + private class ClassicFileStrategy implements FileStrategy { + @Override + public void start(Buffer> buffer) { + final GlobPathResolver resolver = new GlobPathResolver( + fileSourceConfig.getAllPaths(), fileSourceConfig.getExcludePaths()); + final Set resolvedPaths = resolver.resolve(); + if (resolvedPaths.isEmpty() && fileSourceConfig.getFilePathToRead() != null) { + resolvedPaths.add(Paths.get(fileSourceConfig.getFilePathToRead()).toAbsolutePath().normalize()); + } + for (final Path filePath : resolvedPaths) { + if (isStopRequested) { + break; + } + readFile(filePath, buffer); + } + } + + private void readFile(final Path filePath, final Buffer> buffer) { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(decompressionEngine.createInputStream(Files.newInputStream(filePath)), Charset.forName(fileSourceConfig.getEncoding())))) { + String line; + while ((line = reader.readLine()) != null && !isStopRequested) { + writeLineAsEventOrString(line, buffer); + } + } catch (IOException | TimeoutException | IllegalArgumentException ex) { + LOG.error("Error processing the input file path [{}]", filePath, ex); + throw new RuntimeException(format("Error processing the input file %s", filePath), ex); + } + } + + private Record getEventRecordFromLine(final String line) { + Map structuredLine = new HashMap<>(); + + switch(fileSourceConfig.getFormat()) { + case JSON: + structuredLine = parseJson(line); + break; + case PLAIN: + structuredLine.put(MESSAGE_KEY, line); + break; + } + + return new Record<>( + eventFactory.eventBuilder(EventBuilder.class) + .withEventType(fileSourceConfig.getRecordType().toString()) + .withData(structuredLine) + .build()); + } + + private Map parseJson(final String jsonString) { + try { + return OBJECT_MAPPER.readValue(jsonString, MAP_TYPE_REFERENCE); + } catch (JsonProcessingException e) { + LOG.error(SENSITIVE, "Unable to parse json data [{}], assuming plain text", jsonString, e); + final Map plainMap = new HashMap<>(); + plainMap.put(MESSAGE_KEY, jsonString); + return plainMap; + } + } + + private void writeLineAsEventOrString(final String line, final Buffer> buffer) throws TimeoutException, IllegalArgumentException { + if (fileSourceConfig.getRecordType() == RecordType.EVENT) { + buffer.write(getEventRecordFromLine(line), writeTimeout); + } else if (fileSourceConfig.getRecordType() == RecordType.STRING) { + buffer.write(new Record<>(line), writeTimeout); + } + } + } + +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfig.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfig.java new file mode 100644 index 0000000000..f52d636f32 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfig.java @@ -0,0 +1,272 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import jakarta.validation.constraints.AssertTrue; +import org.opensearch.dataprepper.model.configuration.PluginModel; +import org.opensearch.dataprepper.plugins.codec.CompressionOption; + +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class FileSourceConfig { + static final String ATTRIBUTE_PATH = "path"; + static final String ATTRIBUTE_TYPE = "record_type"; + static final String ATTRIBUTE_FORMAT = "format"; + static final int DEFAULT_TIMEOUT = 5_000; + static final String DEFAULT_TYPE = "string"; + static final String EVENT_TYPE = "event"; + + @JsonProperty(ATTRIBUTE_PATH) + private String filePathToRead; + + @JsonProperty("paths") + private List paths = Collections.emptyList(); + + @JsonProperty("tail") + private boolean tail = false; + + @JsonProperty(ATTRIBUTE_FORMAT) + private FileFormat format = FileFormat.PLAIN; + + @JsonProperty(ATTRIBUTE_TYPE) + private RecordType recordType = RecordType.STRING; + + @JsonProperty("codec") + private PluginModel codec; + + @JsonProperty("compression") + private CompressionOption compression = CompressionOption.NONE; + + @JsonProperty("start_position") + private StartPosition startPosition = StartPosition.BEGINNING; + + @JsonProperty("poll_interval") + private Duration pollInterval = Duration.ofSeconds(1); + + @JsonProperty("encoding") + private String encoding = "UTF-8"; + + @JsonProperty("read_buffer_size") + private int readBufferSize = 65536; + + @JsonProperty("max_active_files") + private int maxActiveFiles = 1000; + + @JsonProperty("reader_threads") + private int readerThreads = 4; + + @JsonProperty("max_read_time_per_file") + private Duration maxReadTimePerFile = Duration.ofSeconds(5); + + @JsonProperty("rotate_wait") + private Duration rotateWait = Duration.ofSeconds(5); + + @JsonProperty("rotation_drain_timeout") + private Duration rotationDrainTimeout = Duration.ofSeconds(30); + + @JsonProperty("checkpoint_file") + private String checkpointFile; + + @JsonProperty("checkpoint_interval") + private Duration checkpointInterval = Duration.ofSeconds(5); + + @JsonProperty("checkpoint_cleanup_after") + private Duration checkpointCleanupAfter = Duration.ofDays(7); + + @JsonProperty("fingerprint_bytes") + private int fingerprintBytes = 1024; + + @JsonProperty("close_inactive") + private Duration closeInactive = Duration.ofMinutes(30); + + @JsonProperty("close_removed") + private boolean closeRemoved = true; + + @JsonProperty("batch_size") + private int batchSize = 100; + + @JsonProperty("batch_timeout") + private Duration batchTimeout = Duration.ofSeconds(5); + + @JsonProperty("acknowledgment_timeout") + private Duration acknowledgmentTimeout = Duration.ofSeconds(60); + + @JsonProperty("max_acknowledgment_retries") + private int maxAcknowledgmentRetries = 3; + + @JsonProperty("acknowledgments") + private boolean acknowledgments = false; + + @JsonProperty("include_file_metadata") + private boolean includeFileMetadata = true; + + @JsonProperty("max_line_length") + private int maxLineLength = 1048576; + + @JsonProperty("exclude_paths") + private List excludePaths = Collections.emptyList(); + + public String getFilePathToRead() { + return filePathToRead; + } + + public List getPaths() { + return paths; + } + + public boolean isTail() { + return tail; + } + + public List getAllPaths() { + final List allPaths = new ArrayList<>(getPaths()); + if (filePathToRead != null && !allPaths.contains(filePathToRead)) { + allPaths.add(filePathToRead); + } + return allPaths; + } + + public FileFormat getFormat() { + return format; + } + + public RecordType getRecordType() { + return recordType; + } + + public PluginModel getCodec() { + return codec; + } + + public CompressionOption getCompression() { + return compression; + } + + public StartPosition getStartPosition() { + return startPosition; + } + + public Duration getPollInterval() { + return pollInterval; + } + + public String getEncoding() { + return encoding; + } + + public int getReadBufferSize() { + return readBufferSize; + } + + public int getMaxActiveFiles() { + return maxActiveFiles; + } + + public int getReaderThreads() { + return readerThreads; + } + + public Duration getMaxReadTimePerFile() { + return maxReadTimePerFile; + } + + public Duration getRotateWait() { + return rotateWait; + } + + public Duration getRotationDrainTimeout() { + return rotationDrainTimeout; + } + + public String getCheckpointFile() { + return checkpointFile; + } + + public Duration getCheckpointInterval() { + return checkpointInterval; + } + + public Duration getCheckpointCleanupAfter() { + return checkpointCleanupAfter; + } + + public int getFingerprintBytes() { + return fingerprintBytes; + } + + public Duration getCloseInactive() { + return closeInactive; + } + + public boolean isCloseRemoved() { + return closeRemoved; + } + + public int getBatchSize() { + return batchSize; + } + + public Duration getBatchTimeout() { + return batchTimeout; + } + + public Duration getAcknowledgmentTimeout() { + return acknowledgmentTimeout; + } + + public int getMaxAcknowledgmentRetries() { + return maxAcknowledgmentRetries; + } + + public boolean isAcknowledgments() { + return acknowledgments; + } + + public boolean isIncludeFileMetadata() { + return includeFileMetadata; + } + + public int getMaxLineLength() { + return maxLineLength; + } + + public List getExcludePaths() { + return excludePaths; + } + + void validate() { + Preconditions.checkArgument( + (filePathToRead != null && !filePathToRead.isEmpty()) || !paths.isEmpty(), + "At least one of path or paths is required"); + if (!tail) { + Preconditions.checkArgument(startPosition != StartPosition.END, + "start_position: end is only valid when tail is true"); + } + } + + public boolean isLegacyConfig() { + return codec == null && !tail && paths.isEmpty() && excludePaths.isEmpty(); + } + + public int getEffectiveReaderThreads() { + return tail ? readerThreads : 1; + } + + @AssertTrue(message = "The file source requires recordType to be event when using a codec.") + boolean codeRequiresRecordTypeEvent() { + return codec == null || recordType == RecordType.EVENT; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSystemOperations.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSystemOperations.java new file mode 100644 index 0000000000..ddafdb078e --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/FileSystemOperations.java @@ -0,0 +1,32 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.stream.Stream; + +public interface FileSystemOperations { + + FileChannel openReadChannel(Path path) throws IOException; + + BasicFileAttributes readAttributes(Path path) throws IOException; + + Stream listDirectory(Path directory) throws IOException; + + boolean exists(Path path); + + long size(Path path) throws IOException; + + byte[] readBytes(Path path, int length) throws IOException; +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/GlobPathResolver.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/GlobPathResolver.java new file mode 100644 index 0000000000..a6bff96d6f --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/GlobPathResolver.java @@ -0,0 +1,170 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.FileSystems; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.Paths; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.regex.PatternSyntaxException; + +public final class GlobPathResolver { + + private static final Logger LOG = LoggerFactory.getLogger(GlobPathResolver.class); + private static final String WILDCARD_CHARS = "*?{["; + + private final List includePatterns; + private final List excludePatterns; + private final List includeMatchers; + private final List excludeMatchers; + + public GlobPathResolver(final List includePatterns, final List excludePatterns) { + this.includePatterns = Objects.requireNonNull(includePatterns, "includePatterns must not be null"); + this.excludePatterns = excludePatterns != null ? excludePatterns : Collections.emptyList(); + this.includeMatchers = buildMatchers(this.includePatterns); + this.excludeMatchers = buildMatchers(this.excludePatterns); + } + + public Set resolve() { + final Set result = new HashSet<>(); + final Set baseDirectories = getWatchDirectories(); + + for (final Path baseDir : baseDirectories) { + if (!Files.isDirectory(baseDir)) { + LOG.warn("Base directory does not exist or is not a directory: {}", baseDir); + continue; + } + walkDirectory(baseDir, result); + } + + return result; + } + + void walkDirectory(final Path baseDir, final Set result) { + walkDirectory(baseDir, createFileVisitor(result)); + } + + void walkDirectory(final Path baseDir, final SimpleFileVisitor visitor) { + try { + Files.walkFileTree(baseDir, visitor); + } catch (final IOException e) { + LOG.warn("Failed to walk directory tree at: {}", baseDir, e); + } + } + + SimpleFileVisitor createFileVisitor(final Set result) { + return new SimpleFileVisitor<>() { + @Override + public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) { + final Path normalized = file.toAbsolutePath().normalize(); + if (matches(normalized)) { + result.add(normalized); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(final Path file, final IOException exc) { + LOG.warn("Failed to access file during glob resolution: {}", file, exc); + return FileVisitResult.CONTINUE; + } + }; + } + + public boolean matches(final Path path) { + final Path normalized = path.toAbsolutePath().normalize(); + + boolean included = false; + for (final PathMatcher matcher : includeMatchers) { + if (matcher.matches(normalized)) { + included = true; + break; + } + } + + if (!included) { + return false; + } + + for (final PathMatcher matcher : excludeMatchers) { + if (matcher.matches(normalized)) { + return false; + } + } + + return true; + } + + public Set getWatchDirectories() { + final Set directories = new HashSet<>(); + for (final String pattern : includePatterns) { + final Path baseDir = extractBaseDirectory(pattern); + directories.add(baseDir); + } + return directories; + } + + static Path extractBaseDirectory(final String pattern) { + final String normalized = Paths.get(pattern).toAbsolutePath().normalize().toString(); + final StringBuilder staticPrefix = new StringBuilder(); + + for (int i = 0; i < normalized.length(); i++) { + final char c = normalized.charAt(i); + if (WILDCARD_CHARS.indexOf(c) >= 0) { + break; + } + staticPrefix.append(c); + } + + String prefix = staticPrefix.toString(); + final int lastSep = prefix.lastIndexOf('/'); + if (lastSep > 0) { + prefix = prefix.substring(0, lastSep); + } + + final Path result = Paths.get(prefix).toAbsolutePath().normalize(); + if (Files.isDirectory(result)) { + return result; + } + + final Path parent = result.getParent(); + return Objects.requireNonNullElse(parent, result); + } + + private static List buildMatchers(final List patterns) { + final List matchers = new ArrayList<>(patterns.size()); + for (final String pattern : patterns) { + try { + final String absolutePattern = Paths.get(pattern).toAbsolutePath().normalize().toString(); + matchers.add(FileSystems.getDefault().getPathMatcher("glob:" + absolutePattern)); + } catch (final PatternSyntaxException | InvalidPathException e) { + LOG.error("Invalid glob pattern '{}': {}", pattern, e.getMessage()); + throw new IllegalArgumentException("Invalid glob pattern: " + pattern, e); + } + } + return matchers; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/PendingFile.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/PendingFile.java new file mode 100644 index 0000000000..b53ee06622 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/PendingFile.java @@ -0,0 +1,44 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import java.nio.file.Path; +import java.util.Objects; + +public final class PendingFile { + + private final FileIdentity fileIdentity; + private final Path path; + private final long enqueuedTimeMillis; + + public PendingFile(final FileIdentity fileIdentity, final Path path) { + this.fileIdentity = Objects.requireNonNull(fileIdentity, "fileIdentity must not be null"); + this.path = Objects.requireNonNull(path, "path must not be null"); + this.enqueuedTimeMillis = System.currentTimeMillis(); + } + + public FileIdentity getFileIdentity() { + return fileIdentity; + } + + public Path getPath() { + return path; + } + + public long getEnqueuedTimeMillis() { + return enqueuedTimeMillis; + } + + @Override + public String toString() { + return "PendingFile{path=" + path + ", identity=" + fileIdentity + "}"; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RecordType.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RecordType.java new file mode 100644 index 0000000000..3716649e22 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RecordType.java @@ -0,0 +1,50 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonValue; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public enum RecordType { + STRING("string"), + EVENT("event"); + + private static final Map NAMES_MAP = Stream.of(values()) + .collect(Collectors.toMap(RecordType::toString, v -> v)); + + private final String name; + + RecordType(final String name) { + this.name = name; + } + + @JsonValue + @Override + public String toString() { + return name; + } + + @JsonCreator + public static RecordType fromString(final String name) { + if (name == null) { + throw new IllegalArgumentException("Invalid record_type: null. Valid values are: " + NAMES_MAP.keySet()); + } + final RecordType value = NAMES_MAP.get(name.toLowerCase()); + if (value == null) { + throw new IllegalArgumentException("Invalid record_type: " + name + ". Valid values are: " + NAMES_MAP.keySet()); + } + return value; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationDetector.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationDetector.java new file mode 100644 index 0000000000..057235d799 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationDetector.java @@ -0,0 +1,93 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.util.Objects; + +public final class RotationDetector { + + private static final Logger LOG = LoggerFactory.getLogger(RotationDetector.class); + + private final FileSystemOperations fileOps; + private final int fingerprintBytes; + + public RotationDetector(final FileSystemOperations fileOps, final int fingerprintBytes) { + this.fileOps = Objects.requireNonNull(fileOps, "fileOps must not be null"); + if (fingerprintBytes <= 0) { + throw new IllegalArgumentException("fingerprintBytes must be positive"); + } + this.fingerprintBytes = fingerprintBytes; + } + + public int getFingerprintBytes() { + return fingerprintBytes; + } + + public RotationResult checkRotation(final Path path, final FileIdentity knownIdentity, final long currentOffset) { + if (!fileOps.exists(path)) { + LOG.debug("File deleted: {}", path); + return RotationResult.DELETED; + } + + final FileIdentity currentIdentity; + try { + currentIdentity = FileIdentity.from(path, fileOps, fingerprintBytes); + } catch (final RuntimeException e) { + if (isCausedByNoSuchFile(e)) { + LOG.debug("File deleted: {}", path); + return RotationResult.DELETED; + } + LOG.warn("Error checking rotation for file {}", path, e); + return RotationResult.NO_ROTATION; + } + + if (!currentIdentity.equals(knownIdentity)) { + LOG.info("Create/rename rotation detected for file {}", path); + return new RotationResult(RotationType.CREATE_RENAME, currentIdentity); + } + + try { + final long fileSize = fileOps.size(path); + if (fileSize < currentOffset) { + LOG.info("Copytruncate rotation detected for file {} (size={}, offset={})", path, fileSize, currentOffset); + return new RotationResult(RotationType.COPYTRUNCATE, currentIdentity); + } + } catch (final NoSuchFileException e) { + LOG.debug("File deleted during size check: {}", path); + return RotationResult.DELETED; + } catch (final IOException e) { + LOG.warn("Error reading file size for {}", path, e); + return RotationResult.NO_ROTATION; + } + + return RotationResult.NO_ROTATION; + } + + boolean isCausedByNoSuchFile(final Exception e) { + if (e instanceof NoSuchFileException) { + return true; + } + Throwable cause = e.getCause(); + while (cause != null) { + if (cause instanceof NoSuchFileException) { + return true; + } + cause = cause.getCause(); + } + return false; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationResult.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationResult.java new file mode 100644 index 0000000000..7aa6470534 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationResult.java @@ -0,0 +1,41 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import java.util.Objects; + +public final class RotationResult { + + static final RotationResult NO_ROTATION = new RotationResult(RotationType.NO_ROTATION, null); + static final RotationResult DELETED = new RotationResult(RotationType.DELETED, null); + + private final RotationType rotationType; + private final FileIdentity newFileIdentity; + + RotationResult(final RotationType rotationType, final FileIdentity newFileIdentity) { + this.rotationType = Objects.requireNonNull(rotationType, "rotationType must not be null"); + this.newFileIdentity = newFileIdentity; + } + + public RotationType getRotationType() { + return rotationType; + } + + public FileIdentity getNewFileIdentity() { + return newFileIdentity; + } + + @Override + public String toString() { + return "RotationResult{type=" + rotationType + + (newFileIdentity != null ? ", newIdentity=" + newFileIdentity : "") + "}"; + } +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationType.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationType.java new file mode 100644 index 0000000000..9bd6a3b48c --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/RotationType.java @@ -0,0 +1,18 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +public enum RotationType { + NO_ROTATION, + CREATE_RENAME, + COPYTRUNCATE, + DELETED +} diff --git a/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/StartPosition.java b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/StartPosition.java new file mode 100644 index 0000000000..03bf75dff0 --- /dev/null +++ b/data-prepper-plugins/file-source/src/main/java/org/opensearch/dataprepper/plugins/source/file/StartPosition.java @@ -0,0 +1,48 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.annotation.JsonCreator; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public enum StartPosition { + BEGINNING("beginning"), + END("end"); + + private static final Map NAMES_MAP = Stream.of(values()) + .collect(Collectors.toMap(StartPosition::toString, v -> v)); + + private final String name; + + StartPosition(final String name) { + this.name = name; + } + + @JsonCreator + public static StartPosition fromString(final String name) { + if (name == null) { + throw new IllegalArgumentException("Invalid start_position: null. Valid values are: " + NAMES_MAP.keySet()); + } + final StartPosition value = NAMES_MAP.get(name.toLowerCase()); + if (value == null) { + throw new IllegalArgumentException("Invalid start_position: " + name + ". Valid values are: " + NAMES_MAP.keySet()); + } + return value; + } + + @Override + public String toString() { + return name; + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointEntryTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointEntryTest.java new file mode 100644 index 0000000000..a574cd150c --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointEntryTest.java @@ -0,0 +1,141 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.notNullValue; + +class CheckpointEntryTest { + + private CheckpointEntry checkpointEntry; + + @BeforeEach + void setUp() { + checkpointEntry = new CheckpointEntry(); + } + + @Test + void defaultConstructorSetsZeroOffsets() { + assertThat(checkpointEntry.getReadOffset(), equalTo(0L)); + assertThat(checkpointEntry.getCommittedOffset(), equalTo(0L)); + } + + @Test + void defaultConstructorSetsActiveStatus() { + assertThat(checkpointEntry.getStatus(), equalTo(CheckpointStatus.ACTIVE)); + } + + @Test + void defaultConstructorSetsLastUpdatedMillis() { + final long before = System.currentTimeMillis(); + final CheckpointEntry entry = new CheckpointEntry(); + final long after = System.currentTimeMillis(); + + assertThat(entry.getLastUpdatedMillis(), greaterThanOrEqualTo(before)); + assertThat(entry.getLastUpdatedMillis(), lessThanOrEqualTo(after)); + } + + @Test + void parameterizedConstructorSetsAllFields() { + final CheckpointEntry entry = new CheckpointEntry(100L, 50L, CheckpointStatus.COMPLETED); + + assertThat(entry.getReadOffset(), equalTo(100L)); + assertThat(entry.getCommittedOffset(), equalTo(50L)); + assertThat(entry.getStatus(), equalTo(CheckpointStatus.COMPLETED)); + } + + @Test + void setReadOffsetUpdatesValueAndTimestamp() { + final long beforeUpdate = System.currentTimeMillis(); + checkpointEntry.setReadOffset(500L); + final long afterUpdate = System.currentTimeMillis(); + + assertThat(checkpointEntry.getReadOffset(), equalTo(500L)); + assertThat(checkpointEntry.getLastUpdatedMillis(), greaterThanOrEqualTo(beforeUpdate)); + assertThat(checkpointEntry.getLastUpdatedMillis(), lessThanOrEqualTo(afterUpdate)); + } + + @Test + void setCommittedOffsetUpdatesValueAndTimestamp() { + final long beforeUpdate = System.currentTimeMillis(); + checkpointEntry.setCommittedOffset(300L); + final long afterUpdate = System.currentTimeMillis(); + + assertThat(checkpointEntry.getCommittedOffset(), equalTo(300L)); + assertThat(checkpointEntry.getLastUpdatedMillis(), greaterThanOrEqualTo(beforeUpdate)); + assertThat(checkpointEntry.getLastUpdatedMillis(), lessThanOrEqualTo(afterUpdate)); + } + + @Test + void setStatusUpdatesValueAndTimestamp() { + final long beforeUpdate = System.currentTimeMillis(); + checkpointEntry.setStatus(CheckpointStatus.COMPLETED); + final long afterUpdate = System.currentTimeMillis(); + + assertThat(checkpointEntry.getStatus(), equalTo(CheckpointStatus.COMPLETED)); + assertThat(checkpointEntry.getLastUpdatedMillis(), greaterThanOrEqualTo(beforeUpdate)); + assertThat(checkpointEntry.getLastUpdatedMillis(), lessThanOrEqualTo(afterUpdate)); + } + + @Test + void snapshotReturnsNewInstanceWithSameValues() { + checkpointEntry.setReadOffset(200L); + checkpointEntry.setCommittedOffset(100L); + checkpointEntry.setStatus(CheckpointStatus.COMPLETED); + + final CheckpointEntry snapshot = checkpointEntry.snapshot(); + + assertThat(snapshot, notNullValue()); + assertThat(snapshot.getReadOffset(), equalTo(200L)); + assertThat(snapshot.getCommittedOffset(), equalTo(100L)); + assertThat(snapshot.getStatus(), equalTo(CheckpointStatus.COMPLETED)); + } + + @Test + void snapshotIsIndependentOfOriginal() { + checkpointEntry.setReadOffset(200L); + checkpointEntry.setCommittedOffset(100L); + + final CheckpointEntry snapshot = checkpointEntry.snapshot(); + + checkpointEntry.setReadOffset(999L); + checkpointEntry.setCommittedOffset(888L); + checkpointEntry.setStatus(CheckpointStatus.COMPLETED); + + assertThat(snapshot.getReadOffset(), equalTo(200L)); + assertThat(snapshot.getCommittedOffset(), equalTo(100L)); + assertThat(snapshot.getStatus(), equalTo(CheckpointStatus.ACTIVE)); + } + + @Test + void multipleAdvancesAccumulateCorrectly() { + checkpointEntry.setReadOffset(100L); + checkpointEntry.setReadOffset(200L); + checkpointEntry.setReadOffset(300L); + + assertThat(checkpointEntry.getReadOffset(), equalTo(300L)); + } + + @Test + void statusTransitionFromActiveToCompleted() { + assertThat(checkpointEntry.getStatus(), equalTo(CheckpointStatus.ACTIVE)); + + checkpointEntry.setStatus(CheckpointStatus.COMPLETED); + + assertThat(checkpointEntry.getStatus(), equalTo(CheckpointStatus.COMPLETED)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointRegistryTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointRegistryTest.java new file mode 100644 index 0000000000..daaeb1591f --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointRegistryTest.java @@ -0,0 +1,340 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import static org.awaitility.Awaitility.await; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +class CheckpointRegistryTest { + + private static final Duration FLUSH_INTERVAL = Duration.ofHours(1); + private static final Duration CLEANUP_AFTER = Duration.ofHours(24); + + @TempDir + Path tempDir; + + private Path checkpointFile; + private CheckpointRegistry registry; + + @BeforeEach + void setUp() { + checkpointFile = tempDir.resolve("checkpoints.json"); + } + + @AfterEach + void tearDown() { + if (registry != null) { + registry.shutdown(); + } + } + + private CheckpointRegistry createRegistry() { + return createRegistry(checkpointFile); + } + + private CheckpointRegistry createRegistry(final Path file) { + return new CheckpointRegistry(file, FLUSH_INTERVAL, CLEANUP_AFTER); + } + + @Test + void getOrCreateReturnsNewEntryForUnknownKey() { + registry = createRegistry(); + + final CheckpointEntry entry = registry.getOrCreate("test-key"); + + assertThat(entry, notNullValue()); + assertThat(entry.getReadOffset(), equalTo(0L)); + assertThat(entry.getCommittedOffset(), equalTo(0L)); + assertThat(entry.getStatus(), equalTo(CheckpointStatus.ACTIVE)); + } + + @Test + void getOrCreateReturnsSameEntryForSameKey() { + registry = createRegistry(); + + final CheckpointEntry first = registry.getOrCreate("test-key"); + first.setReadOffset(42L); + + final CheckpointEntry second = registry.getOrCreate("test-key"); + + assertThat(second.getReadOffset(), equalTo(42L)); + } + + @Test + void getReturnsNullForUnknownKey() { + registry = createRegistry(); + + assertThat(registry.get("nonexistent"), nullValue()); + } + + @Test + void getReturnsEntryAfterGetOrCreate() { + registry = createRegistry(); + + registry.getOrCreate("my-key").setReadOffset(77L); + + final CheckpointEntry retrieved = registry.get("my-key"); + assertThat(retrieved, notNullValue()); + assertThat(retrieved.getReadOffset(), equalTo(77L)); + } + + @Test + void flushAndLoadRoundTripPreservesEntries() { + registry = createRegistry(); + final CheckpointEntry entry = registry.getOrCreate("/var/log/app.log"); + entry.setReadOffset(1024L); + entry.setCommittedOffset(512L); + entry.setStatus(CheckpointStatus.ACTIVE); + + registry.flush(); + registry.shutdown(); + + final CheckpointRegistry reloaded = createRegistry(); + registry = reloaded; + + final CheckpointEntry loaded = reloaded.get("/var/log/app.log"); + assertThat(loaded, notNullValue()); + assertThat(loaded.getReadOffset(), equalTo(1024L)); + assertThat(loaded.getCommittedOffset(), equalTo(512L)); + assertThat(loaded.getStatus(), equalTo(CheckpointStatus.ACTIVE)); + } + + @Test + void flushCreatesCheckpointFileOnDisk() { + registry = createRegistry(); + registry.getOrCreate("some-file"); + + registry.flush(); + + assertThat(Files.exists(checkpointFile), equalTo(true)); + } + + @Test + void flushUsesAtomicWriteWithTempFile() { + registry = createRegistry(); + registry.getOrCreate("file1"); + + registry.flush(); + + final Path tempFile = checkpointFile.resolveSibling(checkpointFile.getFileName() + ".tmp"); + assertThat(Files.exists(tempFile), equalTo(false)); + assertThat(Files.exists(checkpointFile), equalTo(true)); + } + + @Test + void corruptCheckpointFileStartsWithEmptyState() throws IOException { + Files.writeString(checkpointFile, "THIS IS NOT VALID JSON{{{"); + + registry = createRegistry(); + + assertThat(registry.get("any-key"), nullValue()); + } + + @Test + void emptyCheckpointFileStartsWithEmptyState() throws IOException { + Files.writeString(checkpointFile, ""); + + registry = createRegistry(); + + assertThat(registry.get("any-key"), nullValue()); + } + + @Test + void cleanupRemovesStaleCompletedEntries() { + final Duration zeroCleanup = Duration.ZERO; + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, zeroCleanup); + + final CheckpointEntry entry = registry.getOrCreate("stale-file"); + entry.setStatus(CheckpointStatus.COMPLETED); + + await().atMost(Duration.ofSeconds(2)).untilAsserted(() -> { + registry.flush(); + assertThat(registry.get("stale-file"), nullValue()); + }); + + assertThat(registry.get("stale-file"), nullValue()); + } + + @Test + void cleanupKeepsActiveEntries() { + final Duration zeroCleanup = Duration.ZERO; + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, zeroCleanup); + + registry.getOrCreate("active-file").setReadOffset(100L); + + registry.flush(); + + final CheckpointEntry entry = registry.get("active-file"); + assertThat(entry, notNullValue()); + assertThat(entry.getReadOffset(), equalTo(100L)); + } + + @Test + void multipleEntriesPersistCorrectly() { + registry = createRegistry(); + + registry.getOrCreate("file-a").setReadOffset(10L); + registry.getOrCreate("file-b").setReadOffset(20L); + registry.getOrCreate("file-c").setReadOffset(30L); + + registry.flush(); + registry.shutdown(); + + final CheckpointRegistry reloaded = createRegistry(); + registry = reloaded; + + assertThat(reloaded.get("file-a").getReadOffset(), equalTo(10L)); + assertThat(reloaded.get("file-b").getReadOffset(), equalTo(20L)); + assertThat(reloaded.get("file-c").getReadOffset(), equalTo(30L)); + } + + @Test + void shutdownFlushesBeforeTerminating() { + registry = createRegistry(); + registry.getOrCreate("flush-on-shutdown").setReadOffset(999L); + + registry.shutdown(); + + final CheckpointRegistry reloaded = createRegistry(); + registry = reloaded; + + final CheckpointEntry entry = reloaded.get("flush-on-shutdown"); + assertThat(entry, notNullValue()); + assertThat(entry.getReadOffset(), equalTo(999L)); + } + + @Test + void flush_handles_io_error_on_unwritable_path() { + Path unwritablePath = Path.of("/nonexistent-dir-" + System.nanoTime() + "/sub/checkpoints.json"); + registry = createRegistry(unwritablePath); + registry.getOrCreate("some-key").setReadOffset(100L); + + registry.flush(); + } + + @Test + void shutdown_handles_scheduler_interrupted() throws Exception { + registry = createRegistry(); + registry.getOrCreate("interrupt-key").setReadOffset(50L); + + Thread shutdownThread = new Thread(() -> { + Thread.currentThread().interrupt(); + registry.shutdown(); + }); + shutdownThread.start(); + shutdownThread.join(5000); + + assertThat(shutdownThread.isAlive(), equalTo(false)); + registry = null; + } + + @Test + void load_handles_null_checkpoint_file() { + CheckpointRegistry nullRegistry = new CheckpointRegistry(null, FLUSH_INTERVAL, CLEANUP_AFTER); + + assertThat(nullRegistry.get("any"), nullValue()); + nullRegistry.getOrCreate("test").setReadOffset(10L); + assertThat(nullRegistry.get("test").getReadOffset(), equalTo(10L)); + } + + @Test + void shutdown_calls_shutdownNow_when_awaitTermination_returns_false() throws Exception { + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(false); + + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, CLEANUP_AFTER, () -> mockScheduler); + + registry.shutdown(); + registry = null; + } + + @Test + void shutdown_calls_shutdownNow_when_awaitTermination_throws_interrupted() throws Exception { + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenThrow(new InterruptedException("test")); + + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, CLEANUP_AFTER, () -> mockScheduler); + + registry.shutdown(); + registry = null; + + assertThat(Thread.currentThread().isInterrupted(), equalTo(true)); + Thread.interrupted(); + } + + @Test + void load_with_file_having_no_parent_directory(@TempDir final Path altDir) { + final Path noParentFile = altDir.resolve("checkpoint-no-parent.json"); + registry = new CheckpointRegistry(noParentFile, FLUSH_INTERVAL, CLEANUP_AFTER); + assertThat(registry.get("any"), nullValue()); + } + + @Test + void load_handles_null_map_from_json() throws IOException { + Files.writeString(checkpointFile, "null"); + registry = createRegistry(); + assertThat(registry.get("any"), nullValue()); + } + + @Test + void cleanup_does_not_remove_recently_completed_entry() { + final Duration longCleanup = Duration.ofHours(48); + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, longCleanup); + + final CheckpointEntry entry = registry.getOrCreate("recent-completed"); + entry.setStatus(CheckpointStatus.COMPLETED); + + registry.flush(); + + assertThat(registry.get("recent-completed"), notNullValue()); + } + + @Test + void markCompleted_sets_status_on_existing_entry() { + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, CLEANUP_AFTER); + registry.getOrCreate("mark-test"); + registry.markCompleted("mark-test"); + assertThat(registry.get("mark-test").getStatus(), equalTo(CheckpointStatus.COMPLETED)); + } + + @Test + void markCompleted_does_nothing_for_nonexistent_key() { + registry = new CheckpointRegistry(checkpointFile, FLUSH_INTERVAL, CLEANUP_AFTER); + registry.markCompleted("nonexistent"); + assertThat(registry.get("nonexistent"), nullValue()); + } + + @Test + void flush_with_null_checkpoint_file_does_not_throw() { + final CheckpointRegistry nullFileRegistry = new CheckpointRegistry(null, FLUSH_INTERVAL, CLEANUP_AFTER); + nullFileRegistry.flush(); + nullFileRegistry.shutdown(); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointStatusTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointStatusTest.java new file mode 100644 index 0000000000..b8ec63d91b --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/CheckpointStatusTest.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class CheckpointStatusTest { + + @Test + void fromString_returns_active() { + assertThat(CheckpointStatus.fromString("ACTIVE"), equalTo(CheckpointStatus.ACTIVE)); + } + + @Test + void fromString_returns_completed() { + assertThat(CheckpointStatus.fromString("COMPLETED"), equalTo(CheckpointStatus.COMPLETED)); + } + + @Test + void fromString_is_case_insensitive() { + assertThat(CheckpointStatus.fromString("active"), equalTo(CheckpointStatus.ACTIVE)); + assertThat(CheckpointStatus.fromString("completed"), equalTo(CheckpointStatus.COMPLETED)); + } + + @ParameterizedTest + @ValueSource(strings = {"invalid", "pending", ""}) + void fromString_throws_for_invalid_value(final String value) { + assertThrows(IllegalArgumentException.class, () -> CheckpointStatus.fromString(value)); + } + + @Test + void getValue_returns_value() { + assertThat(CheckpointStatus.ACTIVE.getValue(), equalTo("ACTIVE")); + assertThat(CheckpointStatus.COMPLETED.getValue(), equalTo("COMPLETED")); + } + + @Test + void fromString_with_null_throws_IllegalArgumentException() { + assertThrows(IllegalArgumentException.class, () -> CheckpointStatus.fromString(null)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/DefaultFileSystemOperationsTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/DefaultFileSystemOperationsTest.java new file mode 100644 index 0000000000..ee27afca00 --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/DefaultFileSystemOperationsTest.java @@ -0,0 +1,179 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class DefaultFileSystemOperationsTest { + + @TempDir + Path tempDir; + + private DefaultFileSystemOperations fileOps; + + @BeforeEach + void setUp() { + fileOps = new DefaultFileSystemOperations(); + } + + @Test + void openReadChannel_returns_readable_channel() throws IOException { + final Path file = tempDir.resolve("readable.txt"); + Files.writeString(file, "hello world"); + + try (FileChannel channel = fileOps.openReadChannel(file)) { + assertThat(channel, notNullValue()); + assertThat(channel.isOpen(), equalTo(true)); + assertThat(channel.size(), greaterThan(0L)); + } + } + + @Test + void openReadChannel_throws_on_nonexistent_file() { + final Path missing = tempDir.resolve("does-not-exist.txt"); + assertThrows(IOException.class, () -> fileOps.openReadChannel(missing)); + } + + @Test + void readAttributes_returns_valid_attributes() throws IOException { + final Path file = tempDir.resolve("attrs.txt"); + Files.writeString(file, "test content"); + + final BasicFileAttributes attrs = fileOps.readAttributes(file); + + assertThat(attrs, notNullValue()); + assertThat(attrs.isRegularFile(), equalTo(true)); + assertThat(attrs.isDirectory(), equalTo(false)); + assertThat(attrs.size(), greaterThan(0L)); + assertThat(attrs.creationTime(), notNullValue()); + } + + @Test + void readAttributes_throws_on_nonexistent_file() { + final Path missing = tempDir.resolve("no-attrs.txt"); + assertThrows(IOException.class, () -> fileOps.readAttributes(missing)); + } + + @Test + void listDirectory_returns_files_in_directory() throws IOException { + Files.createFile(tempDir.resolve("file1.txt")); + Files.createFile(tempDir.resolve("file2.txt")); + + try (Stream stream = fileOps.listDirectory(tempDir)) { + final List files = stream.collect(Collectors.toList()); + assertThat(files.size(), greaterThanOrEqualTo(2)); + assertThat(files, hasItem(tempDir.resolve("file1.txt"))); + assertThat(files, hasItem(tempDir.resolve("file2.txt"))); + } + } + + @Test + void listDirectory_throws_on_nonexistent_directory() { + final Path missing = tempDir.resolve("no-dir"); + assertThrows(IOException.class, () -> fileOps.listDirectory(missing)); + } + + @Test + void exists_returns_true_for_existing_file() throws IOException { + final Path file = tempDir.resolve("exists.txt"); + Files.writeString(file, "data"); + + assertThat(fileOps.exists(file), equalTo(true)); + } + + @Test + void exists_returns_false_for_nonexistent_file() { + final Path missing = tempDir.resolve("missing.txt"); + assertThat(fileOps.exists(missing), equalTo(false)); + } + + @Test + void size_returns_correct_file_size() throws IOException { + final Path file = tempDir.resolve("sized.txt"); + final String content = "twelve chars"; + Files.writeString(file, content); + + assertThat(fileOps.size(file), equalTo((long) content.getBytes().length)); + } + + @Test + void size_returns_zero_for_empty_file() throws IOException { + final Path file = tempDir.resolve("empty.txt"); + Files.createFile(file); + + assertThat(fileOps.size(file), equalTo(0L)); + } + + @Test + void size_throws_on_nonexistent_file() { + final Path missing = tempDir.resolve("no-size.txt"); + assertThrows(IOException.class, () -> fileOps.size(missing)); + } + + @Test + void readBytes_reads_exact_number_of_bytes() throws IOException { + final Path file = tempDir.resolve("bytes.txt"); + Files.writeString(file, "abcdefghij"); + + final byte[] bytes = fileOps.readBytes(file, 5); + + assertThat(bytes, notNullValue()); + assertThat(bytes.length, equalTo(5)); + assertThat(new String(bytes), equalTo("abcde")); + } + + @Test + void readBytes_reads_entire_file_when_length_exceeds_size() throws IOException { + final Path file = tempDir.resolve("short.txt"); + Files.writeString(file, "abc"); + + final byte[] bytes = fileOps.readBytes(file, 100); + + assertThat(bytes.length, equalTo(3)); + assertThat(new String(bytes), equalTo("abc")); + } + + @Test + void readBytes_returns_empty_array_for_empty_file() throws IOException { + final Path file = tempDir.resolve("empty-bytes.txt"); + Files.createFile(file); + + final byte[] bytes = fileOps.readBytes(file, 10); + + assertThat(bytes, notNullValue()); + assertThat(bytes.length, equalTo(0)); + } + + @Test + void readBytes_throws_on_nonexistent_file() { + final Path missing = tempDir.resolve("no-bytes.txt"); + assertThrows(IOException.class, () -> fileOps.readBytes(missing, 10)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/DirectoryWatcherTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/DirectoryWatcherTest.java new file mode 100644 index 0000000000..e98c857d3e --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/DirectoryWatcherTest.java @@ -0,0 +1,1059 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.IOException; +import java.nio.file.ClosedWatchServiceException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardWatchEventKinds; +import java.nio.file.FileSystems; +import java.nio.file.WatchEvent; +import java.nio.file.WatchKey; +import java.nio.file.WatchService; +import java.time.Duration; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; + +import static org.awaitility.Awaitility.await; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class DirectoryWatcherTest { + + @TempDir + Path tempDir; + + @Mock + private FileReaderPool readerPool; + + @Mock + private CheckpointRegistry checkpointRegistry; + + @Mock + private FileSourceConfig config; + + @Mock + private FileMetrics metrics; + + private FileSystemOperations realFileOps; + private GlobPathResolver globPathResolver; + + @BeforeEach + void setUp() throws IOException { + realFileOps = new DefaultFileSystemOperations(); + } + + private DirectoryWatcher createWatcher() { + globPathResolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + return new DirectoryWatcher(globPathResolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true); + } + + @Test + void start_discovers_existing_files_and_adds_to_pool() throws IOException { + Files.writeString(tempDir.resolve("initial.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final DirectoryWatcher watcher = createWatcher(); + try { + watcher.start(); + verify(readerPool, atLeastOnce()).addFile(any(FileIdentity.class), any(Path.class)); + } finally { + watcher.stop(); + } + } + + @Test + void start_then_pollScan_detects_new_files() throws IOException { + Files.writeString(tempDir.resolve("initial.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final DirectoryWatcher watcher = createWatcher(); + try { + watcher.start(); + + Files.writeString(tempDir.resolve("new-file.log"), "new content"); + watcher.pollScan(); + + verify(readerPool, atLeastOnce()).addFile(any(FileIdentity.class), any(Path.class)); + } finally { + watcher.stop(); + } + } + + @Test + void pollScan_does_nothing_when_not_running() { + final DirectoryWatcher watcher = createWatcher(); + watcher.stop(); + watcher.pollScan(); + } + + @Test + void stop_completes_without_error_before_start() { + final DirectoryWatcher watcher = createWatcher(); + watcher.stop(); + } + + @Test + void stop_completes_without_error_after_start() throws IOException { + Files.writeString(tempDir.resolve("initial.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final DirectoryWatcher watcher = createWatcher(); + watcher.start(); + watcher.stop(); + } + + @Test + void isNetworkFilesystem_returns_false_for_local_directory() { + assertThat(DirectoryWatcher.isNetworkFilesystem(tempDir), equalTo(false)); + } + + @Test + void isNetworkFilesystem_returns_false_on_IOException() { + Path nonexistent = Path.of("/nonexistent-path-" + System.nanoTime()); + assertThat(DirectoryWatcher.isNetworkFilesystem(nonexistent), equalTo(false)); + } + + @Test + void pollScan_detects_vanished_files_and_closes_when_close_removed_true() throws Exception { + Files.writeString(tempDir.resolve("vanish.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.resolve("*.log").toString()), Collections.emptyList()); + final DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(100), true); + try { + watcher.start(); + + Files.delete(tempDir.resolve("vanish.log")); + watcher.pollScan(); + + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(readerPool, atLeastOnce()).closeReaderForPath(any(Path.class))); + } finally { + watcher.stop(); + } + } + + @Test + void pollScan_handles_runtime_exception_from_glob_resolver() throws IOException { + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + GlobPathResolver badResolver = mock(GlobPathResolver.class); + when(badResolver.resolve()).thenReturn(Set.of()); + when(badResolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(badResolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true); + try { + watcher.start(); + + when(badResolver.resolve()).thenThrow(new RuntimeException("glob error")); + watcher.pollScan(); + } finally { + watcher.stop(); + } + } + + @Test + void addFileToPool_handles_runtime_exception() throws IOException { + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + doThrow(new RuntimeException("pool error")).when(readerPool).addFile(any(), any()); + + Files.writeString(tempDir.resolve("error.log"), "content"); + final DirectoryWatcher watcher = createWatcher(); + try { + watcher.start(); + } finally { + watcher.stop(); + } + } + + @Test + void pollScan_does_not_close_when_close_removed_false() throws IOException { + Files.writeString(tempDir.resolve("keep.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + globPathResolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + DirectoryWatcher watcher = new DirectoryWatcher(globPathResolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), false); + try { + watcher.start(); + + Files.delete(tempDir.resolve("keep.log")); + watcher.pollScan(); + + verify(readerPool, never()).closeReaderForPath(any(Path.class)); + } finally { + watcher.stop(); + } + } + + @Test + void stop_handles_interrupted_exception_on_poll_scheduler() throws Exception { + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final DirectoryWatcher watcher = createWatcher(); + watcher.start(); + + Thread stopThread = new Thread(() -> { + Thread.currentThread().interrupt(); + watcher.stop(); + }); + stopThread.start(); + stopThread.join(5000); + } + + @Test + void start_uses_supplementary_poll_interval_with_watch_service() throws IOException { + Files.writeString(tempDir.resolve("initial.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(1)); + + final DirectoryWatcher watcher = createWatcher(); + try { + watcher.start(); + } finally { + watcher.stop(); + } + } + + @Test + void watch_loop_exits_on_closed_watch_service() throws Exception { + Files.writeString(tempDir.resolve("watch.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final DirectoryWatcher watcher = createWatcher(); + watcher.start(); + watcher.stop(); + } + + @Test + void watch_loop_detects_new_file_created() throws Exception { + Files.writeString(tempDir.resolve("existing.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + final DirectoryWatcher watcher = createWatcher(); + try { + watcher.start(); + Files.writeString(tempDir.resolve("new-detected.log"), "new content"); + } finally { + watcher.stop(); + } + } + + @Test + void watch_loop_handles_file_deletion_with_close_removed_true() throws Exception { + Files.writeString(tempDir.resolve("delete-me.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + globPathResolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + DirectoryWatcher watcher = new DirectoryWatcher(globPathResolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(100), true); + try { + watcher.start(); + Files.delete(tempDir.resolve("delete-me.log")); + } finally { + watcher.stop(); + } + } + + @Test + void watch_loop_handles_file_deletion_with_close_removed_false() throws Exception { + Files.writeString(tempDir.resolve("keep-me.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + globPathResolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + DirectoryWatcher watcher = new DirectoryWatcher(globPathResolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(100), false); + try { + watcher.start(); + Files.delete(tempDir.resolve("keep-me.log")); + } finally { + watcher.stop(); + } + } + + @Test + void watch_loop_handles_file_reappearing_during_rotate_wait() throws Exception { + Files.writeString(tempDir.resolve("rotate-reappear.log"), "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + globPathResolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + DirectoryWatcher watcher = new DirectoryWatcher(globPathResolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(500), true); + try { + watcher.start(); + Files.delete(tempDir.resolve("rotate-reappear.log")); + Files.writeString(tempDir.resolve("rotate-reappear.log"), "new content"); + } finally { + watcher.stop(); + } + } + + @Test + void startWatchService_falls_back_on_exception() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> { throw new RuntimeException("cannot create WatchService"); }); + try { + watcher.start(); + } finally { + watcher.stop(); + } + } + + @Test + void startWatchService_falls_back_on_io_exception() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> { throw new IOException("cannot create WatchService"); }); + try { + watcher.start(); + } finally { + watcher.stop(); + } + } + + @Test + void stop_handles_pollScheduler_not_terminating() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true); + watcher.start(); + watcher.stop(); + } + + @Test + void shouldUseWatchService_returns_false_for_network_filesystem() throws Exception { + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true); + watcher.start(); + watcher.stop(); + } + + @Test + void stop_handles_IOException_closing_WatchService() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchService realWatchService = FileSystems.getDefault().newWatchService(); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> realWatchService); + watcher.start(); + + realWatchService.close(); + + watcher.stop(); + } + + @Test + void registerDirectory_handles_IOException() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + Path watchDir = tempDir.resolve("unreadable-watch"); + Files.createDirectory(watchDir); + watchDir.toFile().setReadable(false); + watchDir.toFile().setExecutable(false); + when(resolver.getWatchDirectories()).thenReturn(Set.of(watchDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true); + try { + watcher.start(); + } finally { + watchDir.toFile().setReadable(true); + watchDir.toFile().setExecutable(true); + watcher.stop(); + } + } + + @Test + void supplementary_poll_interval_when_not_macOS_and_watch_active() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(1)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> FileSystems.getDefault().newWatchService(), + false); + try { + watcher.start(); + } finally { + watcher.stop(); + } + } + + @Test + void watchLoop_handles_invalid_WatchKey() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchKey mockKey = mock(WatchKey.class); + when(mockKey.pollEvents()).thenReturn(Collections.emptyList()); + when(mockKey.reset()).thenReturn(false); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void handleWatchEvent_handles_OVERFLOW() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchEvent overflowEvent = mock(WatchEvent.class); + when(overflowEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.OVERFLOW); + + WatchKey mockKey = mock(WatchKey.class); + when(mockKey.pollEvents()).thenReturn(List.of(overflowEvent)); + when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void triggerFullRescan_handles_runtime_exception() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchEvent overflowEvent = mock(WatchEvent.class); + when(overflowEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.OVERFLOW); + + WatchKey mockKey = mock(WatchKey.class); + when(mockKey.pollEvents()).thenReturn(List.of(overflowEvent)); + when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()) + .thenReturn(Set.of()) + .thenThrow(new RuntimeException("rescan error")); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void handleDeletion_file_reappears_during_rotateWait() throws Exception { + Path testFile = tempDir.resolve("reappear.log"); + Files.writeString(testFile, "content"); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + WatchEvent deleteEvent = mock(WatchEvent.class); + lenient().when(deleteEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_DELETE); + lenient().when(deleteEvent.context()).thenReturn(testFile.getFileName()); + + WatchEvent createEvent = mock(WatchEvent.class); + lenient().when(createEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_CREATE); + lenient().when(createEvent.context()).thenReturn(testFile.getFileName()); + + WatchKey mockKey = mock(WatchKey.class); + lenient().when(mockKey.watchable()).thenReturn(tempDir); + lenient().when(mockKey.pollEvents()) + .thenReturn(List.of(createEvent)) + .thenReturn(List.of(deleteEvent)) + .thenReturn(Collections.emptyList()); + lenient().when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + lenient().when(mockWatchService.take()) + .thenReturn(mockKey) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of(testFile.toAbsolutePath().normalize())); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + when(resolver.matches(testFile.toAbsolutePath().normalize())).thenReturn(true); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(500), true, + () -> mockWatchService); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void stop_pollScheduler_shutdownNow_on_timeout() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> { throw new RuntimeException("no watch service"); }, + false); + watcher.start(); + watcher.stop(); + } + + @Test + void stop_handles_IOException_when_watchService_close_fails() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()).thenThrow(new ClosedWatchServiceException()); + doThrow(new IOException("close error")).when(mockWatchService).close(); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-ws-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void stop_handles_pollScheduler_awaitTermination_returning_false() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(false); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-ps-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> { throw new RuntimeException("no watch service"); }, + false, + () -> mockScheduler); + watcher.start(); + watcher.stop(); + } + + @Test + void stop_handles_pollScheduler_awaitTermination_throws_interrupted() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))) + .thenThrow(new InterruptedException("test")); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-psi-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> { throw new RuntimeException("no watch service"); }, + false, + () -> mockScheduler); + watcher.start(); + watcher.stop(); + + assertThat(Thread.currentThread().isInterrupted(), equalTo(true)); + Thread.interrupted(); + } + + @Test + void stop_handles_watchThread_join_interrupted() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + CountDownLatch watchStarted = new CountDownLatch(1); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()).thenAnswer(inv -> { + watchStarted.countDown(); + while (!Thread.currentThread().isInterrupted()) { + LockSupport.parkNanos(100_000_000L); + } + Thread.interrupted(); + Thread.sleep(2000); + throw new ClosedWatchServiceException(); + }); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-wtj-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + watchStarted.await(); + + Thread stopThread = new Thread(() -> { + Thread.currentThread().interrupt(); + watcher.stop(); + }); + stopThread.start(); + stopThread.join(10000); + + assertThat(stopThread.isAlive(), equalTo(false)); + } + + @Test + void watch_loop_exits_when_running_becomes_false() throws Exception { + lenient().when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + CountDownLatch firstIterDone = new CountDownLatch(1); + + WatchKey mockKey = mock(WatchKey.class); + lenient().when(mockKey.pollEvents()).thenReturn(Collections.emptyList()); + lenient().when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()).thenAnswer(inv -> { + firstIterDone.countDown(); + return mockKey; + }).thenAnswer(inv -> { + Thread.sleep(60000); + return mockKey; + }); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + lenient().when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + firstIterDone.await(); + watcher.stop(); + } + + @Test + void handleWatchEvent_ignores_file_not_matching_glob() throws Exception { + Path txtFile = tempDir.resolve("ignored.txt"); + Files.writeString(txtFile, "content"); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + lenient().when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + WatchEvent createEvent = mock(WatchEvent.class); + lenient().when(createEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_CREATE); + lenient().when(createEvent.context()).thenReturn(txtFile.getFileName()); + + WatchKey mockKey = mock(WatchKey.class); + lenient().when(mockKey.watchable()).thenReturn(tempDir); + lenient().when(mockKey.pollEvents()).thenReturn(List.of(createEvent)); + lenient().when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + lenient().when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + when(resolver.matches(any())).thenReturn(false); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + lenient().when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + await().pollDelay(500, TimeUnit.MILLISECONDS).atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(readerPool, never()).addFile(any(), any())); + watcher.stop(); + } + + @Test + void handleWatchEvent_ignores_ENTRY_MODIFY_event() throws Exception { + Path logFile = tempDir.resolve("modify.log"); + Files.writeString(logFile, "content"); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + lenient().when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + WatchEvent modifyEvent = mock(WatchEvent.class); + lenient().when(modifyEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_MODIFY); + lenient().when(modifyEvent.context()).thenReturn(logFile.getFileName()); + + WatchKey mockKey = mock(WatchKey.class); + lenient().when(mockKey.watchable()).thenReturn(tempDir); + lenient().when(mockKey.pollEvents()).thenReturn(List.of(modifyEvent)); + lenient().when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + lenient().when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + await().pollDelay(500, TimeUnit.MILLISECONDS).atMost(2, TimeUnit.SECONDS).untilAsserted(() -> { + verify(readerPool, never()).addFile(any(), any()); + verify(readerPool, never()).closeReaderForPath(any()); + }); + watcher.stop(); + } + + @Test + void handleWatchEvent_DELETE_for_unknown_file_does_nothing() throws Exception { + Path unknownFile = tempDir.resolve("unknown.log"); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + lenient().when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + WatchEvent deleteEvent = mock(WatchEvent.class); + lenient().when(deleteEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_DELETE); + lenient().when(deleteEvent.context()).thenReturn(unknownFile.getFileName()); + + WatchKey mockKey = mock(WatchKey.class); + lenient().when(mockKey.watchable()).thenReturn(tempDir); + lenient().when(mockKey.pollEvents()).thenReturn(List.of(deleteEvent)); + lenient().when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + lenient().when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + await().pollDelay(500, TimeUnit.MILLISECONDS).atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(readerPool, never()).closeReaderForPath(any())); + watcher.stop(); + } + + @Test + void shouldUseWatchService_returns_false_when_network_fs_detected() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> FileSystems.getDefault().newWatchService(), + false, + DirectoryWatcher::createDefaultPollScheduler, + path -> true); + watcher.start(); + watcher.stop(); + } + + @Test + void handleWatchEvent_DELETE_rejectedExecutionException_on_schedule() throws Exception { + Path logFile = tempDir.resolve("rej-delete.log"); + Files.writeString(logFile, "content"); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + lenient().when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + WatchEvent createEvent = mock(WatchEvent.class); + lenient().when(createEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_CREATE); + lenient().when(createEvent.context()).thenReturn(logFile.getFileName()); + + WatchEvent deleteEvent = mock(WatchEvent.class); + lenient().when(deleteEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.ENTRY_DELETE); + lenient().when(deleteEvent.context()).thenReturn(logFile.getFileName()); + + WatchKey mockKey = mock(WatchKey.class); + lenient().when(mockKey.watchable()).thenReturn(tempDir); + lenient().when(mockKey.pollEvents()) + .thenReturn(List.of(createEvent)) + .thenReturn(List.of(deleteEvent)); + lenient().when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + lenient().when(mockWatchService.take()) + .thenReturn(mockKey) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of(logFile.toAbsolutePath().normalize())); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + when(resolver.matches(any())).thenReturn(true); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + when(mockScheduler.schedule(any(Runnable.class), anyLong(), any(TimeUnit.class))) + .thenThrow(new RejectedExecutionException("shutting down")); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(100), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void triggerFullRescan_handles_rejectedExecutionException() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchEvent overflowEvent = mock(WatchEvent.class); + when(overflowEvent.kind()).thenReturn((WatchEvent.Kind) StandardWatchEventKinds.OVERFLOW); + + WatchKey mockKey = mock(WatchKey.class); + when(mockKey.pollEvents()).thenReturn(List.of(overflowEvent)); + when(mockKey.reset()).thenReturn(true); + + WatchService mockWatchService = mock(WatchService.class); + when(mockWatchService.take()) + .thenReturn(mockKey) + .thenThrow(new ClosedWatchServiceException()); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + lenient().when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + doThrow(new RejectedExecutionException("shutting down")) + .when(mockScheduler).execute(any(Runnable.class)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> mockWatchService, + false, + () -> mockScheduler); + watcher.start(); + await().atMost(2, TimeUnit.SECONDS).untilAsserted(() -> + verify(mockWatchService, atLeastOnce()).take()); + watcher.stop(); + } + + @Test + void pollScan_vanished_file_rejectedExecutionException_on_schedule() throws Exception { + Path vanishFile = tempDir.resolve("vanish-rej.log"); + Files.writeString(vanishFile, "content"); + when(config.getFingerprintBytes()).thenReturn(1024); + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + + ScheduledExecutorService mockScheduler = mock(ScheduledExecutorService.class); + lenient().when(mockScheduler.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(true); + lenient().when(mockScheduler.scheduleAtFixedRate(any(Runnable.class), anyLong(), anyLong(), any(TimeUnit.class))) + .thenReturn(mock(ScheduledFuture.class)); + when(mockScheduler.schedule(any(Runnable.class), anyLong(), any(TimeUnit.class))) + .thenThrow(new RejectedExecutionException("shutting down")); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()) + .thenReturn(Set.of(vanishFile.toAbsolutePath().normalize())) + .thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(Path.of("/nonexistent-dir-" + System.nanoTime()))); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofMillis(100), true, + () -> { throw new RuntimeException("no watch service"); }, + false, + () -> mockScheduler); + watcher.start(); + watcher.pollScan(); + watcher.stop(); + } + + @Test + void startWatchService_falls_back_to_polling_when_registration_fails() throws Exception { + when(config.getPollInterval()).thenReturn(Duration.ofSeconds(60)); + lenient().when(config.getFingerprintBytes()).thenReturn(1024); + + WatchService closedWatchService = FileSystems.getDefault().newWatchService(); + closedWatchService.close(); + + GlobPathResolver resolver = mock(GlobPathResolver.class); + when(resolver.resolve()).thenReturn(Set.of()); + when(resolver.getWatchDirectories()).thenReturn(Set.of(tempDir)); + + DirectoryWatcher watcher = new DirectoryWatcher(resolver, readerPool, checkpointRegistry, config, realFileOps, metrics, + Duration.ofSeconds(5), true, + () -> closedWatchService, + false, + () -> Executors.newSingleThreadScheduledExecutor(), + path -> false); + watcher.start(); + watcher.stop(); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileIdentityTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileIdentityTest.java new file mode 100644 index 0000000000..6c70401aab --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileIdentityTest.java @@ -0,0 +1,297 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileTime; +import java.time.Instant; +import java.util.zip.CRC32; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; +import static org.mockito.Mockito.when; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; + +@ExtendWith(MockitoExtension.class) +class FileIdentityTest { + + private static final int FINGERPRINT_BYTES = 256; + + @TempDir + Path tempDir; + + @Mock + private FileSystemOperations fileOps; + + @Mock + private BasicFileAttributes attrs; + + @BeforeEach + void setUp() { + lenient().when(attrs.creationTime()).thenReturn(FileTime.from(Instant.parse("2025-01-01T00:00:00Z"))); + } + + @Test + void fromReturnsInodeBasedIdentityWhenFileKeyPresent() throws IOException { + final Path testFile = tempDir.resolve("test.log"); + Files.writeString(testFile, "some content"); + + final Object fileKey = "12345"; + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(fileKey); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("inode:12345:created:")); + assertThat(identity.getPath(), equalTo(testFile)); + } + + @Test + void fromReturnsCrcFallbackWhenFileKeyIsNull() throws IOException { + final Path testFile = tempDir.resolve("test.log"); + final String content = "hello world data"; + Files.writeString(testFile, content); + final byte[] contentBytes = content.getBytes(); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(null); + when(fileOps.size(testFile)).thenReturn((long) contentBytes.length); + when(fileOps.readBytes(testFile, Math.min(FINGERPRINT_BYTES, contentBytes.length))).thenReturn(contentBytes); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.parse("2025-01-01T00:00:00Z"))); + + final CRC32 expectedCrc = new CRC32(); + expectedCrc.update(contentBytes); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("crc:" + expectedCrc.getValue())); + assertThat(identity.toString(), containsString("created:")); + } + + @Test + void fromReturnsPathFallbackWhenIOExceptionOnReadAttributes() throws IOException { + final Path testFile = tempDir.resolve("missing.log"); + + when(fileOps.readAttributes(testFile)).thenThrow(new IOException("cannot read")); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("path:")); + assertThat(identity.toString(), containsString(testFile.toAbsolutePath().toString())); + } + + @Test + void fromReturnsPathFallbackForEmptyFile() throws IOException { + final Path testFile = tempDir.resolve("empty.log"); + Files.writeString(testFile, ""); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(null); + when(fileOps.size(testFile)).thenReturn(0L); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("path:")); + } + + @Test + void fromReturnsPathFallbackWhenSizeThrowsIOException() throws IOException { + final Path testFile = tempDir.resolve("unreadable.log"); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(null); + when(fileOps.size(testFile)).thenThrow(new IOException("permission denied")); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("path:")); + } + + @Test + void equalIdentitiesAreEqual() throws IOException { + final Path testFile = tempDir.resolve("a.log"); + Files.writeString(testFile, "data"); + + final Object fileKey = "inode-99"; + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(fileKey); + + final FileIdentity first = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + final FileIdentity second = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(first, equalTo(second)); + assertThat(first.hashCode(), equalTo(second.hashCode())); + } + + @Test + void differentIdentitiesAreNotEqual() throws IOException { + final Path fileA = tempDir.resolve("a.log"); + final Path fileB = tempDir.resolve("b.log"); + Files.writeString(fileA, "data-a"); + Files.writeString(fileB, "data-b"); + + @SuppressWarnings("unchecked") + final BasicFileAttributes attrsB = mock(BasicFileAttributes.class); + + when(fileOps.readAttributes(fileA)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn("inode-1"); + + when(fileOps.readAttributes(fileB)).thenReturn(attrsB); + when(attrsB.fileKey()).thenReturn("inode-2"); + when(attrsB.creationTime()).thenReturn(FileTime.from(Instant.parse("2025-02-01T00:00:00Z"))); + + final FileIdentity identityA = FileIdentity.from(fileA, fileOps, FINGERPRINT_BYTES); + final FileIdentity identityB = FileIdentity.from(fileB, fileOps, FINGERPRINT_BYTES); + + assertThat(identityA, not(equalTo(identityB))); + } + + @Test + void identityIsNotEqualToNull() throws IOException { + final Path testFile = tempDir.resolve("file.log"); + Files.writeString(testFile, "content"); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn("inode-42"); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity.equals(null), equalTo(false)); + } + + @Test + void identityIsNotEqualToDifferentClass() throws IOException { + final Path testFile = tempDir.resolve("diffclass.log"); + Files.writeString(testFile, "content"); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn("inode-99"); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity.equals("a string object"), equalTo(false)); + } + + @Test + void identityIsEqualToItself() throws IOException { + final Path testFile = tempDir.resolve("self.log"); + Files.writeString(testFile, "content"); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn("inode-7"); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity.equals(identity), equalTo(true)); + } + + @Test + void getPathReturnsOriginalPath() throws IOException { + final Path testFile = tempDir.resolve("path-check.log"); + Files.writeString(testFile, "content"); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn("inode-10"); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity.getPath(), equalTo(testFile)); + } + + @Test + void crcFallbackUsesCreationTime() throws IOException { + final Path testFile = tempDir.resolve("created.log"); + final String content = "log data here"; + Files.writeString(testFile, content); + final byte[] bytes = content.getBytes(); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(null); + when(fileOps.size(testFile)).thenReturn((long) bytes.length); + when(fileOps.readBytes(testFile, Math.min(FINGERPRINT_BYTES, bytes.length))).thenReturn(bytes); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.parse("2024-06-15T12:30:00Z"))); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity.toString(), containsString("created:")); + } + + @Test + void crcFallbackHandlesReadBytesIOException() throws IOException { + final Path testFile = tempDir.resolve("read-fail.log"); + Files.writeString(testFile, "content"); + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(null); + when(fileOps.size(testFile)).thenReturn(100L); + when(fileOps.readBytes(testFile, 100)).thenThrow(new IOException("read failed")); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("path:")); + } + + @Test + void fromThrowsIllegalArgumentExceptionWhenFingerprintBytesIsZero() { + final Path testFile = tempDir.resolve("zero-fp.log"); + + assertThrows(IllegalArgumentException.class, + () -> FileIdentity.from(testFile, fileOps, 0)); + } + + @Test + void fromThrowsIllegalArgumentExceptionWhenFingerprintBytesIsNegative() { + final Path testFile = tempDir.resolve("neg-fp.log"); + + assertThrows(IllegalArgumentException.class, + () -> FileIdentity.from(testFile, fileOps, -1)); + } + + @Test + void fingerprintBytesLimitsReadSize() throws IOException { + final Path testFile = tempDir.resolve("large.log"); + Files.writeString(testFile, "a]".repeat(500)); + + final int smallFingerprint = 16; + + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(attrs.fileKey()).thenReturn(null); + when(fileOps.size(testFile)).thenReturn(1000L); + when(fileOps.readBytes(testFile, smallFingerprint)).thenReturn(new byte[smallFingerprint]); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.parse("2025-01-01T00:00:00Z"))); + + final FileIdentity identity = FileIdentity.from(testFile, fileOps, smallFingerprint); + + assertThat(identity, notNullValue()); + assertThat(identity.toString(), containsString("crc:")); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileMetricsTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileMetricsTest.java new file mode 100644 index 0000000000..f712d3a6af --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileMetricsTest.java @@ -0,0 +1,230 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.Timer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.dataprepper.metrics.PluginMetrics; + +import java.util.concurrent.atomic.AtomicLong; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class FileMetricsTest { + + @Mock + private PluginMetrics pluginMetrics; + + @Mock + private Counter linesReadCounter; + + @Mock + private Counter bytesReadCounter; + + @Mock + private Counter linesTruncatedCounter; + + @Mock + private Counter filesOpenedCounter; + + @Mock + private Counter filesClosedCounter; + + @Mock + private Counter filesRotatedCounter; + + @Mock + private Counter readErrorsCounter; + + @Mock + private Counter writeTimeoutsCounter; + + @Mock + private Counter checkpointFlushesCounter; + + @Mock + private Counter checkpointErrorsCounter; + + @Mock + private Counter eventsEmittedCounter; + + @Mock + private Counter dataLossEventsCounter; + + @Mock + private Counter acknowledgmentFailuresCounter; + + @Mock + private Counter truncationEventsCounter; + + @Mock + private Timer backpressureTimer; + + private FileMetrics fileTailMetrics; + + @BeforeEach + void setUp() { + when(pluginMetrics.counter("linesRead")).thenReturn(linesReadCounter); + when(pluginMetrics.counter("bytesRead")).thenReturn(bytesReadCounter); + when(pluginMetrics.counter("linesTruncated")).thenReturn(linesTruncatedCounter); + when(pluginMetrics.counter("filesOpened")).thenReturn(filesOpenedCounter); + when(pluginMetrics.counter("filesClosed")).thenReturn(filesClosedCounter); + when(pluginMetrics.counter("filesRotated")).thenReturn(filesRotatedCounter); + when(pluginMetrics.counter("readErrors")).thenReturn(readErrorsCounter); + when(pluginMetrics.counter("writeTimeouts")).thenReturn(writeTimeoutsCounter); + when(pluginMetrics.counter("checkpointFlushes")).thenReturn(checkpointFlushesCounter); + when(pluginMetrics.counter("checkpointErrors")).thenReturn(checkpointErrorsCounter); + when(pluginMetrics.counter("eventsEmitted")).thenReturn(eventsEmittedCounter); + when(pluginMetrics.counter("dataLossEvents")).thenReturn(dataLossEventsCounter); + when(pluginMetrics.counter("acknowledgmentFailures")).thenReturn(acknowledgmentFailuresCounter); + when(pluginMetrics.counter("truncationEvents")).thenReturn(truncationEventsCounter); + when(pluginMetrics.timer("backpressureTime")).thenReturn(backpressureTimer); + + fileTailMetrics = new FileMetrics(pluginMetrics); + } + + @Test + void getLinesReadReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getLinesRead(), equalTo(linesReadCounter)); + } + + @Test + void getBytesReadReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getBytesRead(), equalTo(bytesReadCounter)); + } + + @Test + void getLinesTruncatedReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getLinesTruncated(), equalTo(linesTruncatedCounter)); + } + + @Test + void getFilesOpenedReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getFilesOpened(), equalTo(filesOpenedCounter)); + } + + @Test + void getFilesClosedReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getFilesClosed(), equalTo(filesClosedCounter)); + } + + @Test + void getFilesRotatedReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getFilesRotated(), equalTo(filesRotatedCounter)); + } + + @Test + void getReadErrorsReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getReadErrors(), equalTo(readErrorsCounter)); + } + + @Test + void getWriteTimeoutsReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getWriteTimeouts(), equalTo(writeTimeoutsCounter)); + } + + @Test + void getCheckpointFlushesReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getCheckpointFlushes(), equalTo(checkpointFlushesCounter)); + } + + @Test + void getCheckpointErrorsReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getCheckpointErrors(), equalTo(checkpointErrorsCounter)); + } + + @Test + void getActiveFileCountReturnsAtomicLong() { + assertThat(fileTailMetrics.getActiveFileCount(), notNullValue()); + assertThat(fileTailMetrics.getActiveFileCount(), instanceOf(AtomicLong.class)); + } + + @Test + void activeFileCountInitializesToZero() { + assertThat(fileTailMetrics.getActiveFileCount().get(), equalTo(0L)); + } + + @Test + void activeFileCountCanBeIncremented() { + fileTailMetrics.getActiveFileCount().incrementAndGet(); + + assertThat(fileTailMetrics.getActiveFileCount().get(), equalTo(1L)); + } + + @Test + void activeFileCountCanBeDecrementedAfterIncrement() { + fileTailMetrics.getActiveFileCount().incrementAndGet(); + fileTailMetrics.getActiveFileCount().incrementAndGet(); + fileTailMetrics.getActiveFileCount().decrementAndGet(); + + assertThat(fileTailMetrics.getActiveFileCount().get(), equalTo(1L)); + } + + @Test + void allCounterGettersReturnInstanceOfCounter() { + assertThat(fileTailMetrics.getLinesRead(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getBytesRead(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getLinesTruncated(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getFilesOpened(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getFilesClosed(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getFilesRotated(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getReadErrors(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getWriteTimeouts(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getCheckpointFlushes(), instanceOf(Counter.class)); + assertThat(fileTailMetrics.getCheckpointErrors(), instanceOf(Counter.class)); + } + + @Test + void getEventsEmittedReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getEventsEmitted(), equalTo(eventsEmittedCounter)); + } + + @Test + void getBackpressureTimerReturnsRegisteredTimer() { + assertThat(fileTailMetrics.getBackpressureTimer(), equalTo(backpressureTimer)); + } + + @Test + void getFileLagBytesReturnsAtomicLong() { + assertThat(fileTailMetrics.getFileLagBytes(), notNullValue()); + assertThat(fileTailMetrics.getFileLagBytes(), instanceOf(AtomicLong.class)); + } + + @Test + void fileLagBytesInitializesToZero() { + assertThat(fileTailMetrics.getFileLagBytes().get(), equalTo(0L)); + } + + @Test + void getDataLossEventsReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getDataLossEvents(), equalTo(dataLossEventsCounter)); + } + + @Test + void getAcknowledgmentFailuresReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getAcknowledgmentFailures(), equalTo(acknowledgmentFailuresCounter)); + } + + @Test + void getTruncationEventsReturnsRegisteredCounter() { + assertThat(fileTailMetrics.getTruncationEvents(), equalTo(truncationEventsCounter)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileReaderPoolTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileReaderPoolTest.java new file mode 100644 index 0000000000..aaad9c0c2b --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileReaderPoolTest.java @@ -0,0 +1,623 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.Timer; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; +import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.event.Event; +import org.opensearch.dataprepper.model.event.EventBuilder; +import org.opensearch.dataprepper.model.event.EventFactory; +import org.opensearch.dataprepper.model.record.Record; + +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileTime; +import java.time.Duration; +import java.time.Instant; +import java.util.Map; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +import static org.awaitility.Awaitility.await; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class FileReaderPoolTest { + + @TempDir + Path tempDir; + + @Mock + private Buffer> buffer; + + @Mock + private EventFactory eventFactory; + + @Mock + private FileSystemOperations fileOps; + + @Mock + private CheckpointRegistry checkpointRegistry; + + @Mock + private FileMetrics metrics; + + @Mock + private RotationDetector rotationDetector; + + @Mock + private AcknowledgementSetManager acknowledgementSetManager; + + private FileReaderContext createReaderContext() { + return new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(5), + Duration.ofSeconds(30), StartPosition.END, false, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, null, true, null); + } + + private FileReaderPool createPool(final int maxActiveFiles, final int readerThreads) { + when(metrics.getActiveFileCount()).thenReturn(new AtomicLong(0)); + return new FileReaderPool( + checkpointRegistry, metrics, maxActiveFiles, readerThreads, + Duration.ofMinutes(30), createReaderContext()); + } + + private FileReaderPool createPoolWithoutMetrics(final int maxActiveFiles, final int readerThreads) { + return new FileReaderPool( + checkpointRegistry, metrics, maxActiveFiles, readerThreads, + Duration.ofMinutes(30), createReaderContext()); + } + + @Test + void addFile_submits_reader_when_under_max_active_files() { + FileReaderPool pool = createPool(10, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = mock(FileIdentity.class); + when(identity.toString()).thenReturn("test-identity"); + Path path = Paths.get("/tmp/test.log"); + + pool.addFile(identity, path); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + assertThat(pool.getPendingCount(), equalTo(0)); + } + + @Test + void addFile_is_idempotent_for_same_identity() { + FileReaderPool pool = createPool(10, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = mock(FileIdentity.class); + when(identity.toString()).thenReturn("dup-identity"); + Path path = Paths.get("/tmp/dup.log"); + + pool.addFile(identity, path); + pool.addFile(identity, path); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + } + + @Test + void addFile_queues_pending_when_at_max_active_files() { + FileReaderPool pool = createPool(1, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity1 = mock(FileIdentity.class); + when(identity1.toString()).thenReturn("id-1"); + FileIdentity identity2 = mock(FileIdentity.class); + + pool.addFile(identity1, Paths.get("/tmp/file1.log")); + pool.addFile(identity2, Paths.get("/tmp/file2.log")); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + assertThat(pool.getPendingCount(), equalTo(1)); + } + + @Test + void addFile_does_not_add_pending_duplicate_to_queue() { + FileReaderPool pool = createPool(1, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity1 = mock(FileIdentity.class); + when(identity1.toString()).thenReturn("id-1"); + FileIdentity identity2 = mock(FileIdentity.class); + + pool.addFile(identity1, Paths.get("/tmp/file1.log")); + pool.addFile(identity2, Paths.get("/tmp/file2.log")); + pool.addFile(identity2, Paths.get("/tmp/file2.log")); + + assertThat(pool.getPendingCount(), equalTo(1)); + } + + @Test + void addFile_queues_multiple_pending_files() { + FileReaderPool pool = createPool(1, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity1 = mock(FileIdentity.class); + when(identity1.toString()).thenReturn("id-1"); + FileIdentity identity2 = mock(FileIdentity.class); + FileIdentity identity3 = mock(FileIdentity.class); + + pool.addFile(identity1, Paths.get("/tmp/file1.log")); + pool.addFile(identity2, Paths.get("/tmp/file2.log")); + pool.addFile(identity3, Paths.get("/tmp/file3.log")); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + assertThat(pool.getPendingCount(), equalTo(2)); + } + + @Test + void shutdown_does_not_throw() { + FileReaderPool pool = createPoolWithoutMetrics(10, 1); + pool.shutdown(); + } + + @Test + void getActiveReaderCount_returns_zero_initially() { + FileReaderPool pool = createPoolWithoutMetrics(10, 2); + assertThat(pool.getActiveReaderCount(), equalTo(0)); + } + + @Test + void getPendingCount_returns_zero_initially() { + FileReaderPool pool = createPoolWithoutMetrics(10, 2); + assertThat(pool.getPendingCount(), equalTo(0)); + } + + @Test + void closeInactiveReaders_removes_inactive_readers() throws Exception { + Counter filesClosed = mock(Counter.class); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getActiveFileCount()).thenReturn(new AtomicLong(0)); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, 2, + Duration.ofMillis(1), createReaderContext()); + + pool.closeInactiveReaders(); + + assertThat(pool.getActiveReaderCount(), equalTo(0)); + } + + @Test + void closeReaderForPath_removes_matching_reader() { + Counter filesClosed = mock(Counter.class); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + FileReaderPool pool = createPool(10, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = mock(FileIdentity.class); + when(identity.toString()).thenReturn("/tmp/removable.log"); + Path path = Paths.get("/tmp/removable.log"); + + pool.addFile(identity, path); + assertThat(pool.getActiveReaderCount(), equalTo(1)); + + pool.closeReaderForPath(path); + assertThat(pool.getActiveReaderCount(), equalTo(0)); + } + + @Test + void closeReaderForPath_does_nothing_when_no_match() { + FileReaderPool pool = createPool(10, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = mock(FileIdentity.class); + when(identity.toString()).thenReturn("id-nomatch"); + pool.addFile(identity, Paths.get("/tmp/file1.log")); + + pool.closeReaderForPath(Paths.get("/tmp/other.log")); + assertThat(pool.getActiveReaderCount(), equalTo(1)); + } + + @Test + void closeReaderForPath_promotes_pending_files() { + Counter filesClosed = mock(Counter.class); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + FileReaderPool pool = createPool(1, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity1 = mock(FileIdentity.class); + when(identity1.toString()).thenReturn("id-close-promote-1"); + FileIdentity identity2 = mock(FileIdentity.class); + when(identity2.toString()).thenReturn("id-close-promote-2"); + + pool.addFile(identity1, Paths.get("/tmp/file1.log")); + pool.addFile(identity2, Paths.get("/tmp/file2.log")); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + assertThat(pool.getPendingCount(), equalTo(1)); + + pool.closeReaderForPath(Paths.get("/tmp/file1.log")); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + assertThat(pool.getPendingCount(), equalTo(0)); + } + + @Test + void shutdown_handles_interrupted_exception() throws Exception { + FileReaderPool pool = createPoolWithoutMetrics(10, 1); + + Thread shutdownThread = new Thread(() -> { + Thread.currentThread().interrupt(); + pool.shutdown(); + }); + shutdownThread.start(); + shutdownThread.join(5000); + + assertThat(shutdownThread.isAlive(), equalTo(false)); + } + + @Test + void onReaderComplete_with_create_rename_resubmits_reader() throws Exception { + Path testFile = tempDir.resolve("rotate.log"); + Files.writeString(testFile, "line1\n"); + + Counter filesRotated = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + lenient().when(metrics.getFilesRotated()).thenReturn(filesRotated); + lenient().when(metrics.getFilesOpened()).thenReturn(filesOpened); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-1"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(fileOps.size(testFile)).thenReturn(6L); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + + FileIdentity newIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.CREATE_RENAME, newIdentity)); + when(rotationDetector.getFingerprintBytes()).thenReturn(1024); + + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + lenient().when(eventFactory.eventBuilder(EventBuilder.class)).thenReturn(mockBuilder); + lenient().when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + lenient().when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + lenient().when(mockBuilder.build()).thenReturn(mockEvent); + + FileReaderPool pool = createPool(10, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = FileIdentity.from(testFile, fileOps, 1024); + + pool.addFile(identity, testFile); + + await().atMost(5, TimeUnit.SECONDS).until(() -> pool.getActiveReaderCount() == 0); + + pool.shutdown(); + } + + @Test + void closeInactiveReaders_with_real_reader() throws Exception { + Path testFile = tempDir.resolve("inactive.log"); + Files.writeString(testFile, "data\n"); + + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + lenient().when(metrics.getFilesOpened()).thenReturn(filesOpened); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + when(metrics.getActiveFileCount()).thenReturn(new AtomicLong(0)); + + BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-inactive"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(fileOps.size(testFile)).thenReturn((long) "data\n".length()); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + lenient().when(eventFactory.eventBuilder(EventBuilder.class)).thenReturn(mockBuilder); + lenient().when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + lenient().when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + lenient().when(mockBuilder.build()).thenReturn(mockEvent); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, 2, + Duration.ofMillis(1), createReaderContext()); + + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = FileIdentity.from(testFile, fileOps, 1024); + pool.addFile(identity, testFile); + + await().atMost(5, TimeUnit.SECONDS).until(() -> pool.getActiveReaderCount() == 0); + + pool.closeInactiveReaders(); + + pool.shutdown(); + } + + @Test + void shutdown_calls_shutdownNow_when_awaitTermination_returns_false() throws Exception { + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.awaitTermination(anyLong(), any(TimeUnit.class))).thenReturn(false); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, + Duration.ofMinutes(30), createReaderContext(), + () -> mockExecutor); + + pool.shutdown(); + } + + @Test + void shutdown_calls_shutdownNow_when_awaitTermination_throws_interrupted() throws Exception { + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.awaitTermination(anyLong(), any(TimeUnit.class))) + .thenThrow(new InterruptedException("test")); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, + Duration.ofMinutes(30), createReaderContext(), + () -> mockExecutor); + + pool.shutdown(); + + assertThat(Thread.currentThread().isInterrupted(), equalTo(true)); + Thread.interrupted(); + } + + @Test + void addFile_handles_rejected_execution_exception() { + ExecutorService mockExecutor = mock(ExecutorService.class); + when(mockExecutor.submit(any(Runnable.class))) + .thenThrow(new RejectedExecutionException("pool shut down")); + when(metrics.getActiveFileCount()).thenReturn(new AtomicLong(0)); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, + Duration.ofMinutes(30), createReaderContext(), + () -> mockExecutor); + + FileIdentity identity = mock(FileIdentity.class); + when(identity.toString()).thenReturn("rejected-id"); + pool.addFile(identity, Paths.get("/tmp/rejected.log")); + + assertThat(pool.getActiveReaderCount(), equalTo(0)); + } + + @Test + void closeInactiveReaders_removes_reader_past_threshold() throws Exception { + Counter filesClosed = mock(Counter.class); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getActiveFileCount()).thenReturn(new AtomicLong(0)); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + Counter filesOpened = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + lenient().when(metrics.getFilesOpened()).thenReturn(filesOpened); + lenient().when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + Path testFile = tempDir.resolve("inactive-test.log"); + Files.writeString(testFile, "data\n"); + + BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-close-inactive"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + lenient().when(fileOps.size(testFile)).thenReturn((long) "data\n".length()); + + CountDownLatch latch = new CountDownLatch(1); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenAnswer(inv -> { + latch.await(); + return RotationResult.NO_ROTATION; + }); + + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + lenient().when(eventFactory.eventBuilder(EventBuilder.class)).thenReturn(mockBuilder); + lenient().when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + lenient().when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + lenient().when(mockBuilder.build()).thenReturn(mockEvent); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, 2, + Duration.ofMillis(1), createReaderContext()); + + FileIdentity identity = FileIdentity.from(testFile, fileOps, 1024); + pool.addFile(identity, testFile); + + await().atMost(2, TimeUnit.SECONDS).until(() -> pool.getActiveReaderCount() > 0); + + pool.closeInactiveReaders(); + + latch.countDown(); + + pool.shutdown(); + } + + @Test + void closeInactiveReaders_keeps_active_readers() throws Exception { + Counter filesClosed = mock(Counter.class); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getActiveFileCount()).thenReturn(new AtomicLong(0)); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + lenient().when(metrics.getFilesOpened()).thenReturn(mock(Counter.class)); + lenient().when(metrics.getBytesRead()).thenReturn(mock(Counter.class)); + lenient().when(metrics.getLinesRead()).thenReturn(mock(Counter.class)); + lenient().when(metrics.getEventsEmitted()).thenReturn(mock(Counter.class)); + lenient().when(metrics.getBackpressureTimer()).thenReturn(mock(Timer.class)); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + Path testFile = tempDir.resolve("active-test.log"); + Files.writeString(testFile, "data\n"); + + BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-keep-active"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + lenient().when(fileOps.size(testFile)).thenReturn((long) "data\n".length()); + + CountDownLatch latch = new CountDownLatch(1); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenAnswer(inv -> { + latch.await(); + return RotationResult.NO_ROTATION; + }); + + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + lenient().when(eventFactory.eventBuilder(EventBuilder.class)).thenReturn(mockBuilder); + lenient().when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + lenient().when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + lenient().when(mockBuilder.build()).thenReturn(mockEvent); + + FileReaderPool pool = new FileReaderPool( + checkpointRegistry, metrics, 10, 2, + Duration.ofHours(1), createReaderContext()); + + FileIdentity identity = FileIdentity.from(testFile, fileOps, 1024); + pool.addFile(identity, testFile); + + await().atMost(2, TimeUnit.SECONDS).until(() -> pool.getActiveReaderCount() > 0); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + + pool.closeInactiveReaders(); + + assertThat(pool.getActiveReaderCount(), equalTo(1)); + + latch.countDown(); + pool.shutdown(); + } + + @Test + void onReaderComplete_with_deleted_rotation_marks_completed_and_processes_pending() throws Exception { + Path testFile = tempDir.resolve("deleted-rotate.log"); + Files.writeString(testFile, "line1\n"); + + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + lenient().when(metrics.getFilesOpened()).thenReturn(filesOpened); + lenient().when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-del"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(fileOps.size(testFile)).thenReturn(6L); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + lenient().when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.DELETED); + + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + lenient().when(eventFactory.eventBuilder(EventBuilder.class)).thenReturn(mockBuilder); + lenient().when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + lenient().when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + lenient().when(mockBuilder.build()).thenReturn(mockEvent); + + FileReaderPool pool = createPool(10, 2); + when(checkpointRegistry.getOrCreate(anyString())).thenReturn(new CheckpointEntry()); + + FileIdentity identity = FileIdentity.from(testFile, fileOps, 1024); + + Path pendingFile = tempDir.resolve("pending.log"); + Files.writeString(pendingFile, "pending\n"); + BasicFileAttributes pendingAttrs = mock(BasicFileAttributes.class); + when(pendingAttrs.fileKey()).thenReturn("inode-pending"); + when(pendingAttrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + lenient().when(fileOps.readAttributes(pendingFile)).thenReturn(pendingAttrs); + lenient().when(fileOps.size(pendingFile)).thenReturn(8L); + lenient().when(fileOps.openReadChannel(pendingFile)).thenReturn( + FileChannel.open(pendingFile, StandardOpenOption.READ)); + FileIdentity pendingIdentity = FileIdentity.from(pendingFile, fileOps, 1024); + + FileReaderPool limitedPool = new FileReaderPool( + checkpointRegistry, metrics, 1, 2, + Duration.ofMinutes(30), createReaderContext()); + + limitedPool.addFile(identity, testFile); + limitedPool.addFile(pendingIdentity, pendingFile); + + assertThat(limitedPool.getPendingCount(), equalTo(1)); + + await().atMost(5, TimeUnit.SECONDS).until(() -> limitedPool.getActiveReaderCount() == 0); + + limitedPool.shutdown(); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileReaderTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileReaderTest.java new file mode 100644 index 0000000000..f38e62f07f --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileReaderTest.java @@ -0,0 +1,1461 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import io.micrometer.core.instrument.Counter; +import io.micrometer.core.instrument.Timer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSet; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; +import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.codec.InputCodec; +import org.opensearch.dataprepper.model.event.Event; +import org.opensearch.dataprepper.model.event.EventBuilder; +import org.opensearch.dataprepper.model.event.EventFactory; +import org.opensearch.dataprepper.model.record.Record; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +import java.util.function.Consumer; + +import static org.awaitility.Awaitility.await; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +@ExtendWith(MockitoExtension.class) +class FileReaderTest { + + @TempDir + Path tempDir; + + @Mock + private Buffer> buffer; + + @Mock + private EventFactory eventFactory; + + @Mock + private FileSystemOperations fileOps; + + @Mock + private FileMetrics metrics; + + @Mock + private RotationDetector rotationDetector; + + @Mock + private AcknowledgementSetManager acknowledgementSetManager; + + private CheckpointEntry checkpointEntry; + private FileIdentity fileIdentity; + private AtomicBoolean onCompleteCalled; + + @BeforeEach + void setUp() { + checkpointEntry = new CheckpointEntry(); + onCompleteCalled = new AtomicBoolean(false); + } + + private FileReaderContext createContext(final StartPosition startPosition) { + return new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(30), + Duration.ofSeconds(30), startPosition, false, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, null, true, null); + } + + private FileReaderContext createContext(final int readBufferSize, final int maxLineLength, + final boolean includeMetadata, final StartPosition startPosition) { + return new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + readBufferSize, maxLineLength, 5000, Duration.ofSeconds(30), + Duration.ofSeconds(30), startPosition, includeMetadata, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, null, true, null); + } + + private FileReader createReader(final Path path) { + return createReader(path, 4096, 1048576, false, StartPosition.BEGINNING); + } + + private FileReader createReader(final Path path, final int readBufferSize, + final int maxLineLength, final boolean includeMetadata) { + return createReader(path, readBufferSize, maxLineLength, includeMetadata, StartPosition.BEGINNING); + } + + private FileReader createReader(final Path path, final int readBufferSize, + final int maxLineLength, final boolean includeMetadata, + final StartPosition startPosition) { + fileIdentity = mock(FileIdentity.class); + final FileReaderContext context = createContext(readBufferSize, maxLineLength, includeMetadata, startPosition); + return new FileReader(path, fileIdentity, checkpointEntry, context, + () -> onCompleteCalled.set(true)); + } + + private void stubEventFactory() { + when(eventFactory.eventBuilder(EventBuilder.class)).thenAnswer(invocation -> { + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + when(mockBuilder.build()).thenReturn(mockEvent); + return mockBuilder; + }); + } + + private void lenientStubEventFactory() { + lenient().when(eventFactory.eventBuilder(EventBuilder.class)).thenAnswer(invocation -> { + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + lenient().when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + lenient().when(mockBuilder.withData(any(Map.class))).thenReturn(mockBuilder); + lenient().when(mockBuilder.build()).thenReturn(mockEvent); + return mockBuilder; + }); + } + + private void stubReadMetrics() { + Counter linesRead = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + when(metrics.getLinesRead()).thenReturn(linesRead); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + } + + @Test + void run_reads_lines_from_file() throws Exception { + Path testFile = tempDir.resolve("test.log"); + Files.writeString(testFile, "line1\nline2\nline3\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(buffer, times(3)).write(any(Record.class), eq(5000)); + verify(metrics.getLinesRead(), times(3)).increment(); + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_tracks_read_offset() throws Exception { + Path testFile = tempDir.resolve("offset.log"); + final String content = "hello\nworld\n"; + Files.writeString(testFile, content); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + + final FileReader reader = createReader(testFile); + reader.run(); + + assertThat(reader.getReadOffset(), equalTo((long) content.getBytes(StandardCharsets.UTF_8).length)); + assertThat(checkpointEntry.getReadOffset(), equalTo((long) content.getBytes(StandardCharsets.UTF_8).length)); + } + + @Test + void run_handles_partial_line_without_trailing_newline() throws Exception { + Path testFile = tempDir.resolve("partial.log"); + Files.writeString(testFile, "complete\nno-newline-at-end"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(buffer, times(2)).write(any(Record.class), eq(5000)); + } + + @Test + void run_truncates_line_exceeding_max_line_length() throws Exception { + Path testFile = tempDir.resolve("longline.log"); + final String longContent = "A".repeat(200); + Files.writeString(testFile, longContent); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + Counter linesTruncated = mock(Counter.class); + when(metrics.getLinesTruncated()).thenReturn(linesTruncated); + stubEventFactory(); + + final FileReader reader = createReader(testFile, 4096, 50, false); + reader.run(); + + verify(linesTruncated).increment(); + } + + @Test + void run_truncates_complete_line_exceeding_max_line_length() throws Exception { + Path testFile = tempDir.resolve("longcomplete.log"); + Files.writeString(testFile, "B".repeat(200) + "\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + Counter linesTruncated = mock(Counter.class); + when(metrics.getLinesTruncated()).thenReturn(linesTruncated); + stubEventFactory(); + + final FileReader reader = createReader(testFile, 4096, 50, false); + reader.run(); + + verify(linesTruncated).increment(); + } + + @Test + void run_retries_on_buffer_back_pressure() throws Exception { + Path testFile = tempDir.resolve("backpressure.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + Counter writeTimeouts = mock(Counter.class); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + stubEventFactory(); + + doThrow(new TimeoutException("buffer full")) + .doNothing() + .when(buffer).write(any(Record.class), anyInt()); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(writeTimeouts).increment(); + verify(buffer, times(2)).write(any(Record.class), eq(5000)); + } + + @Test + void run_handles_deleted_file_via_rotation_detector() throws Exception { + Path testFile = tempDir.resolve("deleted.log"); + Files.writeString(testFile, "data\n"); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.DELETED); + + final FileReader reader = createReader(testFile); + reader.run(); + + assertThat(reader.getLastRotationType(), equalTo(RotationType.DELETED)); + verify(buffer, never()).write(any(), anyInt()); + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_handles_no_such_file_exception_during_read() throws Exception { + Path testFile = tempDir.resolve("gone.log"); + Files.writeString(testFile, "data\n"); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + when(fileOps.openReadChannel(testFile)).thenThrow(new NoSuchFileException(testFile.toString())); + Counter filesClosed = mock(Counter.class); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + + final FileReader reader = createReader(testFile); + reader.run(); + + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_handles_copytruncate_rotation() throws Exception { + Path testFile = tempDir.resolve("truncated.log"); + Files.writeString(testFile, "new-data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + Counter filesRotated = mock(Counter.class); + when(metrics.getFilesRotated()).thenReturn(filesRotated); + Counter truncationEvents = mock(Counter.class); + when(metrics.getTruncationEvents()).thenReturn(truncationEvents); + stubReadMetrics(); + stubEventFactory(); + + fileIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.COPYTRUNCATE, fileIdentity)); + + checkpointEntry.setReadOffset(500); + final FileReader reader = createReader(testFile); + reader.run(); + + verify(filesRotated).increment(); + assertThat(reader.getLastRotationType(), equalTo(RotationType.COPYTRUNCATE)); + } + + @Test + void run_handles_create_rename_rotation() throws Exception { + Path testFile = tempDir.resolve("renamed.log"); + Files.writeString(testFile, "tail-data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + Counter filesRotated = mock(Counter.class); + when(metrics.getFilesRotated()).thenReturn(filesRotated); + stubReadMetrics(); + stubEventFactory(); + + FileIdentity newIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.CREATE_RENAME, newIdentity)); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(filesRotated).increment(); + assertThat(reader.getLastRotationType(), equalTo(RotationType.CREATE_RENAME)); + } + + @Test + void run_resumes_from_checkpoint_offset() throws Exception { + Path testFile = tempDir.resolve("resume.log"); + Files.writeString(testFile, "line1\nline2\nline3\n"); + long offsetAfterFirstLine = "line1\n".getBytes(StandardCharsets.UTF_8).length; + checkpointEntry.setReadOffset(offsetAfterFirstLine); + + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(buffer, times(2)).write(any(Record.class), eq(5000)); + } + + @Test + void run_increments_read_errors_on_io_exception() throws Exception { + Path testFile = tempDir.resolve("ioerror.log"); + Files.writeString(testFile, "data\n"); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + when(fileOps.openReadChannel(testFile)).thenThrow(new IOException("disk error")); + Counter readErrors = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + when(metrics.getReadErrors()).thenReturn(readErrors); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(readErrors).increment(); + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_includes_file_metadata_when_enabled() throws Exception { + Path testFile = tempDir.resolve("meta.log"); + Files.writeString(testFile, "data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + + ArgumentCaptor dataCaptor = ArgumentCaptor.forClass(Map.class); + EventBuilder mockBuilder = mock(EventBuilder.class); + Event mockEvent = mock(Event.class); + when(eventFactory.eventBuilder(EventBuilder.class)).thenReturn(mockBuilder); + when(mockBuilder.withEventType(any())).thenReturn(mockBuilder); + when(mockBuilder.withData(dataCaptor.capture())).thenReturn(mockBuilder); + when(mockBuilder.build()).thenReturn(mockEvent); + + final FileReader reader = createReader(testFile, 4096, 1048576, true); + reader.run(); + + Map capturedData = dataCaptor.getValue(); + assertThat(capturedData.containsKey("file"), equalTo(true)); + @SuppressWarnings("unchecked") + final Map fileMetadata = (Map) capturedData.get("file"); + assertThat(fileMetadata.containsKey("path"), equalTo(true)); + assertThat(fileMetadata.containsKey("name"), equalTo(true)); + assertThat(capturedData.containsKey("offset"), equalTo(true)); + } + + @Test + void getFileIdentity_returns_identity_passed_in_constructor() throws Exception { + Path testFile = tempDir.resolve("identity.log"); + Files.writeString(testFile, ""); + + final FileReader reader = createReader(testFile); + assertThat(reader.getFileIdentity(), equalTo(fileIdentity)); + } + + @Test + void getLastRotationType_defaults_to_no_rotation() throws Exception { + Path testFile = tempDir.resolve("default.log"); + Files.writeString(testFile, ""); + + final FileReader reader = createReader(testFile); + assertThat(reader.getLastRotationType(), equalTo(RotationType.NO_ROTATION)); + } + + @Test + void start_position_end_seeks_to_end_of_file_for_new_files() throws Exception { + Path testFile = tempDir.resolve("startend.log"); + Files.writeString(testFile, "existing-line1\nexisting-line2\n"); + long fileSize = Files.size(testFile); + when(fileOps.size(testFile)).thenReturn(fileSize); + + final FileReader reader = createReader(testFile, 4096, 1048576, false, StartPosition.END); + + assertThat(reader.getReadOffset(), equalTo(fileSize)); + } + + @Test + void start_position_beginning_starts_from_offset_zero_for_new_files() throws Exception { + Path testFile = tempDir.resolve("startbegin.log"); + Files.writeString(testFile, "existing-line1\nexisting-line2\n"); + + final FileReader reader = createReader(testFile, 4096, 1048576, false, StartPosition.BEGINNING); + + assertThat(reader.getReadOffset(), equalTo(0L)); + } + + @Test + void start_position_end_does_not_seek_when_checkpoint_exists() throws Exception { + Path testFile = tempDir.resolve("checkpoint-end.log"); + Files.writeString(testFile, "existing-line1\nexisting-line2\n"); + checkpointEntry.setReadOffset(10); + + final FileReader reader = createReader(testFile, 4096, 1048576, false, StartPosition.END); + + assertThat(reader.getReadOffset(), equalTo(10L)); + } + + @Test + void run_increments_events_emitted_counter() throws Exception { + Path testFile = tempDir.resolve("emitted.log"); + Files.writeString(testFile, "line1\nline2\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + Counter eventsEmitted = mock(Counter.class); + when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(eventsEmitted, times(2)).increment(); + } + + private FileReaderContext createContextWithCodec(final InputCodec codec) { + return new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(30), + Duration.ofSeconds(30), StartPosition.BEGINNING, false, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, codec, true, null); + } + + private FileReaderContext createContextWithAcknowledgements(final int batchSize, final Duration batchTimeout, final int maxRetries) { + return new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, true, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(30), + Duration.ofSeconds(30), StartPosition.BEGINNING, false, + Duration.ofSeconds(30), batchSize, + batchTimeout, maxRetries, null, true, null); + } + + private FileReader createReaderWithContext(final Path path, final FileReaderContext context) { + fileIdentity = mock(FileIdentity.class); + return new FileReader(path, fileIdentity, checkpointEntry, context, + () -> onCompleteCalled.set(true)); + } + + @Test + void start_position_end_falls_back_to_zero_on_io_exception() throws Exception { + Path testFile = tempDir.resolve("startend-error.log"); + Files.writeString(testFile, "existing data\n"); + when(fileOps.size(testFile)).thenThrow(new IOException("disk error")); + + final FileReader reader = createReader(testFile, 4096, 1048576, false, StartPosition.END); + + assertThat(reader.getReadOffset(), equalTo(0L)); + } + + @Test + void run_increments_read_errors_on_runtime_exception() throws Exception { + Path testFile = tempDir.resolve("runtime-err.log"); + Files.writeString(testFile, "data\n"); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenThrow(new RuntimeException("unexpected")); + Counter readErrors = mock(Counter.class); + when(metrics.getReadErrors()).thenReturn(readErrors); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(readErrors).increment(); + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_drains_file_on_create_rename_and_handles_no_such_file() throws Exception { + Path testFile = tempDir.resolve("drain-nosuch.log"); + Files.writeString(testFile, "data\n"); + + FileIdentity newIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.CREATE_RENAME, newIdentity)); + when(fileOps.openReadChannel(testFile)).thenThrow(new NoSuchFileException(testFile.toString())); + Counter filesRotated = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + when(metrics.getFilesRotated()).thenReturn(filesRotated); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(filesRotated).increment(); + assertThat(reader.getLastRotationType(), equalTo(RotationType.CREATE_RENAME)); + } + + @Test + void run_drains_file_on_create_rename_and_handles_io_exception() throws Exception { + Path testFile = tempDir.resolve("drain-ioerr.log"); + Files.writeString(testFile, "data\n"); + + FileIdentity newIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.CREATE_RENAME, newIdentity)); + when(fileOps.openReadChannel(testFile)).thenThrow(new IOException("disk error")); + Counter filesRotated = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter readErrors = mock(Counter.class); + when(metrics.getFilesRotated()).thenReturn(filesRotated); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getReadErrors()).thenReturn(readErrors); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(readErrors).increment(); + } + + @Test + void run_drain_timeout_logs_data_loss_when_unread_data() throws Exception { + Path testFile = tempDir.resolve("drain-timeout.log"); + Files.writeString(testFile, "A".repeat(10000) + "\n"); + + FileIdentity newIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.CREATE_RENAME, newIdentity)); + + FileChannel mockChannel = mock(FileChannel.class); + when(fileOps.openReadChannel(testFile)).thenReturn(mockChannel); + when(mockChannel.position(anyLong())).thenReturn(mockChannel); + lenient().when(mockChannel.read(any(ByteBuffer.class))).thenAnswer(inv -> { + Thread.sleep(5); + ByteBuffer buf = inv.getArgument(0); + byte[] data = "A".repeat(buf.remaining()).getBytes(); + buf.put(data, 0, Math.min(data.length, buf.remaining())); + return buf.position(); + }); + when(mockChannel.size()).thenReturn(100000L); + + Counter filesRotated = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter dataLossEvents = mock(Counter.class); + when(metrics.getFilesRotated()).thenReturn(filesRotated); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getFilesOpened()).thenReturn(mock(Counter.class)); + lenient().when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getDataLossEvents()).thenReturn(dataLossEvents); + + FileReaderContext context = new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(30), + Duration.ofMillis(1), StartPosition.BEGINNING, false, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, null, true, null); + + lenientStubEventFactory(); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + + fileIdentity = mock(FileIdentity.class); + final FileReader reader = new FileReader(testFile, fileIdentity, checkpointEntry, context, + () -> onCompleteCalled.set(true)); + reader.run(); + + verify(dataLossEvents).increment(); + } + + @Test + void run_max_read_time_reached_breaks_without_data_loss() throws Exception { + Path testFile = tempDir.resolve("maxread.log"); + Files.writeString(testFile, "A".repeat(10000) + "\n"); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + FileChannel mockChannel = mock(FileChannel.class); + when(fileOps.openReadChannel(testFile)).thenReturn(mockChannel); + when(mockChannel.position(anyLong())).thenReturn(mockChannel); + when(mockChannel.read(any(ByteBuffer.class))).thenAnswer(inv -> { + Thread.sleep(5); + ByteBuffer buf = inv.getArgument(0); + byte[] data = "A".repeat(buf.remaining()).getBytes(); + buf.put(data, 0, Math.min(data.length, buf.remaining())); + return buf.position(); + }); + + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getFilesOpened()).thenReturn(mock(Counter.class)); + when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + stubEventFactory(); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + + FileReaderContext context = new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofMillis(1), + Duration.ofSeconds(30), StartPosition.BEGINNING, false, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, null, true, null); + + fileIdentity = mock(FileIdentity.class); + final FileReader reader = new FileReader(testFile, fileIdentity, checkpointEntry, context, + () -> onCompleteCalled.set(true)); + reader.run(); + + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_with_codec_parses_bytes_and_emits_records() throws Exception { + Path testFile = tempDir.resolve("codec.log"); + Files.writeString(testFile, "codec-data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getLinesRead()).thenReturn(linesRead); + when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + InputCodec mockCodec = mock(InputCodec.class); + doAnswer(inv -> { + Consumer> consumer = inv.getArgument(1); + Event mockEvent = mock(Event.class); + consumer.accept(new Record<>(mockEvent)); + return null; + }).when(mockCodec).parse(any(), any()); + + FileReaderContext context = createContextWithCodec(mockCodec); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(buffer, atLeastOnce()).write(any(Record.class), eq(5000)); + verify(linesRead, atLeastOnce()).increment(); + } + + @Test + void run_with_codec_handles_parse_io_exception() throws Exception { + Path testFile = tempDir.resolve("codec-error.log"); + Files.writeString(testFile, "bad-data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter readErrors = mock(Counter.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getReadErrors()).thenReturn(readErrors); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + InputCodec mockCodec = mock(InputCodec.class); + doThrow(new IOException("parse error")).when(mockCodec).parse(any(), any()); + + FileReaderContext context = createContextWithCodec(mockCodec); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(readErrors).increment(); + } + + @Test + void run_codec_record_retries_on_backpressure_and_records_timer() throws Exception { + Path testFile = tempDir.resolve("codec-backpressure.log"); + Files.writeString(testFile, "data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Counter writeTimeouts = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getLinesRead()).thenReturn(linesRead); + when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + doThrow(new TimeoutException("buffer full")) + .doNothing() + .when(buffer).write(any(Record.class), anyInt()); + + InputCodec mockCodec = mock(InputCodec.class); + doAnswer(inv -> { + Consumer> consumer = inv.getArgument(1); + Event mockEvent = mock(Event.class); + consumer.accept(new Record<>(mockEvent)); + return null; + }).when(mockCodec).parse(any(), any()); + + FileReaderContext context = createContextWithCodec(mockCodec); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(writeTimeouts).increment(); + verify(backpressureTimer).record(anyLong(), any(TimeUnit.class)); + } + + @Test + void run_with_acknowledgements_creates_ack_set_and_completes_on_batch_full() throws Exception { + Path testFile = tempDir.resolve("ack.log"); + Files.writeString(testFile, "line1\nline2\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + AcknowledgementSet ackSet = mock(AcknowledgementSet.class); + when(acknowledgementSetManager.create(any(), any(Duration.class))).thenReturn(ackSet); + + FileReaderContext context = createContextWithAcknowledgements(1, Duration.ofSeconds(5), 3); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(acknowledgementSetManager, atLeastOnce()).create(any(), any(Duration.class)); + verify(ackSet, atLeastOnce()).add(any(Event.class)); + verify(ackSet, atLeastOnce()).complete(); + } + + @Test + void run_with_acknowledgements_batch_timeout_triggers_complete() throws Exception { + Path testFile = tempDir.resolve("ack-timeout.log"); + Files.writeString(testFile, "line1\nline2\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + AcknowledgementSet ackSet = mock(AcknowledgementSet.class); + when(acknowledgementSetManager.create(any(), any(Duration.class))).thenReturn(ackSet); + + FileReaderContext context = createContextWithAcknowledgements(10000, Duration.ofMillis(0), 3); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(ackSet, atLeastOnce()).complete(); + } + + @SuppressWarnings("unchecked") + @Test + void handleAcknowledgement_positive_resets_retry_and_updates_checkpoint() throws Exception { + Path testFile = tempDir.resolve("ack-pos.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + AcknowledgementSet ackSet = mock(AcknowledgementSet.class); + ArgumentCaptor handlerCaptor = ArgumentCaptor.forClass(Consumer.class); + when(acknowledgementSetManager.create(handlerCaptor.capture(), any(Duration.class))).thenReturn(ackSet); + + FileReaderContext context = createContextWithAcknowledgements(1000, Duration.ofSeconds(5), 3); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + Consumer handler = handlerCaptor.getValue(); + handler.accept(true); + + assertThat(checkpointEntry.getCommittedOffset(), equalTo(reader.getReadOffset())); + } + + @SuppressWarnings("unchecked") + @Test + void handleAcknowledgement_negative_retries_and_eventually_advances() throws Exception { + Path testFile = tempDir.resolve("ack-neg.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + Counter ackFailures = mock(Counter.class); + when(metrics.getAcknowledgmentFailures()).thenReturn(ackFailures); + + AcknowledgementSet ackSet = mock(AcknowledgementSet.class); + ArgumentCaptor handlerCaptor = ArgumentCaptor.forClass(Consumer.class); + when(acknowledgementSetManager.create(handlerCaptor.capture(), any(Duration.class))).thenReturn(ackSet); + + FileReaderContext context = createContextWithAcknowledgements(1000, Duration.ofSeconds(5), 2); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + Consumer handler = handlerCaptor.getValue(); + handler.accept(false); + verify(ackFailures, times(1)).increment(); + + handler.accept(false); + verify(ackFailures, times(2)).increment(); + + handler.accept(false); + verify(ackFailures, times(3)).increment(); + assertThat(checkpointEntry.getCommittedOffset(), equalTo(reader.getReadOffset())); + } + + @Test + void run_with_codec_acknowledgements_creates_ack_set() throws Exception { + Path testFile = tempDir.resolve("codec-ack.log"); + Files.writeString(testFile, "data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getLinesRead()).thenReturn(linesRead); + when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + AcknowledgementSet ackSet = mock(AcknowledgementSet.class); + when(acknowledgementSetManager.create(any(), any(Duration.class))).thenReturn(ackSet); + + InputCodec mockCodec = mock(InputCodec.class); + doAnswer(inv -> { + Consumer> consumer = inv.getArgument(1); + Event mockEvent = mock(Event.class); + consumer.accept(new Record<>(mockEvent)); + return null; + }).when(mockCodec).parse(any(), any()); + + FileReaderContext context = new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, true, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(30), + Duration.ofSeconds(30), StartPosition.BEGINNING, false, + Duration.ofSeconds(30), 1, Duration.ofSeconds(5), 3, mockCodec, true, null); + + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(ackSet, atLeastOnce()).add(any(Event.class)); + verify(ackSet, atLeastOnce()).complete(); + } + + @Test + void run_update_file_lag_handles_io_exception() throws Exception { + Path testFile = tempDir.resolve("lag-error.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenThrow(new IOException("disk error")); + + final FileReader reader = createReader(testFile); + reader.run(); + + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void getLastActivityMillis_returns_initial_value() throws Exception { + Path testFile = tempDir.resolve("activity.log"); + Files.writeString(testFile, ""); + long before = System.currentTimeMillis(); + final FileReader reader = createReader(testFile); + long after = System.currentTimeMillis(); + + assertThat(reader.getLastActivityMillis() >= before, equalTo(true)); + assertThat(reader.getLastActivityMillis() <= after, equalTo(true)); + } + + @Test + void run_emitLine_backpressure_records_timer_after_recovery() throws Exception { + Path testFile = tempDir.resolve("bp-timer.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + Timer backpressureTimer = mock(Timer.class); + when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + Counter writeTimeouts = mock(Counter.class); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + + doThrow(new TimeoutException("buffer full")) + .doNothing() + .when(buffer).write(any(Record.class), anyInt()); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(backpressureTimer).record(anyLong(), any(TimeUnit.class)); + } + + @Test + void run_codec_backpressure_interrupt_stops_reader() throws Exception { + Path testFile = tempDir.resolve("codec-bp-interrupt.log"); + Files.writeString(testFile, "data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter writeTimeouts = mock(Counter.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + doThrow(new TimeoutException("buffer full")) + .when(buffer).write(any(Record.class), anyInt()); + + InputCodec mockCodec = mock(InputCodec.class); + doAnswer(inv -> { + Consumer> consumer = inv.getArgument(1); + Event mockEvent = mock(Event.class); + consumer.accept(new Record<>(mockEvent)); + return null; + }).when(mockCodec).parse(any(), any()); + + FileReaderContext context = createContextWithCodec(mockCodec); + + Thread readerThread = new Thread(() -> { + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + }); + readerThread.start(); + await().atMost(2, TimeUnit.SECONDS).until(() -> readerThread.getState() == Thread.State.TIMED_WAITING || readerThread.getState() == Thread.State.WAITING || !readerThread.isAlive()); + readerThread.interrupt(); + readerThread.join(5000); + + assertThat(readerThread.isAlive(), equalTo(false)); + } + + @Test + void run_emitLine_backpressure_interrupt_stops_reader() throws Exception { + Path testFile = tempDir.resolve("bp-interrupt.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Counter writeTimeouts = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + stubEventFactory(); + + doThrow(new TimeoutException("buffer full")) + .when(buffer).write(any(Record.class), anyInt()); + + Thread readerThread = new Thread(() -> { + final FileReader reader = createReader(testFile); + reader.run(); + }); + readerThread.start(); + await().atMost(2, TimeUnit.SECONDS).until(() -> readerThread.getState() == Thread.State.TIMED_WAITING || readerThread.getState() == Thread.State.WAITING || !readerThread.isAlive()); + readerThread.interrupt(); + readerThread.join(5000); + + assertThat(readerThread.isAlive(), equalTo(false)); + } + + @Test + void run_drain_timeout_handles_io_exception_on_channel_size() throws Exception { + Path testFile = tempDir.resolve("drain-size-err.log"); + Files.writeString(testFile, "A".repeat(10000) + "\n"); + + FileIdentity newIdentity = mock(FileIdentity.class); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(new RotationResult(RotationType.CREATE_RENAME, newIdentity)); + + FileChannel mockChannel = mock(FileChannel.class); + when(fileOps.openReadChannel(testFile)).thenReturn(mockChannel); + when(mockChannel.position(anyLong())).thenReturn(mockChannel); + when(mockChannel.read(any(ByteBuffer.class))).thenAnswer(inv -> { + Thread.sleep(5); + ByteBuffer buf = inv.getArgument(0); + byte[] data = "A".repeat(buf.remaining()).getBytes(); + buf.put(data, 0, Math.min(data.length, buf.remaining())); + return buf.position(); + }); + when(mockChannel.size()).thenThrow(new IOException("channel closed")); + + Counter filesRotated = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + Counter dataLossEvents = mock(Counter.class); + when(metrics.getFilesRotated()).thenReturn(filesRotated); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getFilesOpened()).thenReturn(mock(Counter.class)); + when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getDataLossEvents()).thenReturn(dataLossEvents); + + FileReaderContext context = new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofSeconds(30), + Duration.ofMillis(1), StartPosition.BEGINNING, false, + Duration.ofSeconds(30), 1000, + Duration.ofSeconds(5), 3, null, true, null); + + stubEventFactory(); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + lenient().when(metrics.getLinesRead()).thenReturn(linesRead); + lenient().when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + + fileIdentity = mock(FileIdentity.class); + final FileReader reader = new FileReader(testFile, fileIdentity, checkpointEntry, context, + () -> onCompleteCalled.set(true)); + reader.run(); + } + + @Test + void run_updateFileLagBytes_handles_io_exception_on_file_size() throws Exception { + Path testFile = tempDir.resolve("lag-err.log"); + Files.writeString(testFile, "line1\n"); + + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(fileOps.size(testFile)).thenThrow(new IOException("disk error")); + + stubEventFactory(); + stubReadMetrics(); + + final FileReader reader = createReader(testFile); + reader.run(); + + assertThat(onCompleteCalled.get(), equalTo(true)); + } + + @Test + void run_with_acks_enabled_batch_not_full_does_not_complete_ack_set() throws Exception { + Path testFile = tempDir.resolve("ack-notfull.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + AcknowledgementSet ackSet = mock(AcknowledgementSet.class); + when(acknowledgementSetManager.create(any(), any(Duration.class))).thenReturn(ackSet); + + FileReaderContext context = createContextWithAcknowledgements(10000, Duration.ofHours(1), 3); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(ackSet, atLeastOnce()).add(any(Event.class)); + verify(ackSet, atLeastOnce()).complete(); + } + + @Test + void run_without_acks_does_not_create_ack_set() throws Exception { + Path testFile = tempDir.resolve("no-ack.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + when(fileOps.size(testFile)).thenReturn(Files.size(testFile)); + + final FileReader reader = createReader(testFile); + reader.run(); + + verify(acknowledgementSetManager, never()).create(any(), any(Duration.class)); + } + + @Test + void run_with_acks_enabled_zero_batch_count_does_not_trigger_batch_timeout() throws Exception { + Path testFile = tempDir.resolve("ack-zero-batch.log"); + Files.writeString(testFile, ""); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter bytesRead = mock(Counter.class); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + lenient().when(metrics.getBytesRead()).thenReturn(bytesRead); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + FileReaderContext context = createContextWithAcknowledgements(1000, Duration.ofMillis(0), 3); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(acknowledgementSetManager, never()).create(any(), any(Duration.class)); + } + + @Test + void run_with_codec_acks_disabled_does_not_create_ack_set() throws Exception { + Path testFile = tempDir.resolve("codec-no-ack.log"); + Files.writeString(testFile, "data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter linesRead = mock(Counter.class); + Counter eventsEmitted = mock(Counter.class); + Timer backpressureTimer = mock(Timer.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getLinesRead()).thenReturn(linesRead); + when(metrics.getEventsEmitted()).thenReturn(eventsEmitted); + lenient().when(metrics.getBackpressureTimer()).thenReturn(backpressureTimer); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + InputCodec mockCodec = mock(InputCodec.class); + doAnswer(inv -> { + Consumer> consumer = inv.getArgument(1); + Event mockEvent = mock(Event.class); + consumer.accept(new Record<>(mockEvent)); + return null; + }).when(mockCodec).parse(any(), any()); + + FileReaderContext context = createContextWithCodec(mockCodec); + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + + verify(acknowledgementSetManager, never()).create(any(), any(Duration.class)); + } + + @Test + void run_emitLine_thread_interrupted_during_backpressure_exits() throws Exception { + Path testFile = tempDir.resolve("bp-interrupt-line.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter writeTimeouts = mock(Counter.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + stubEventFactory(); + + doThrow(new TimeoutException("buffer full")) + .when(buffer).write(any(Record.class), anyInt()); + + Thread readerThread = new Thread(() -> { + final FileReader reader = createReader(testFile); + reader.run(); + }); + readerThread.start(); + await().atMost(2, TimeUnit.SECONDS).until(() -> readerThread.getState() == Thread.State.TIMED_WAITING || readerThread.getState() == Thread.State.WAITING || !readerThread.isAlive()); + readerThread.interrupt(); + readerThread.join(5000); + + assertThat(readerThread.isAlive(), equalTo(false)); + } + + @Test + void run_codec_thread_interrupted_during_backpressure_exits() throws Exception { + Path testFile = tempDir.resolve("codec-bp-int.log"); + Files.writeString(testFile, "data\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter writeTimeouts = mock(Counter.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + + doThrow(new TimeoutException("buffer full")) + .when(buffer).write(any(Record.class), anyInt()); + + InputCodec mockCodec = mock(InputCodec.class); + doAnswer(inv -> { + Consumer> consumer = inv.getArgument(1); + Event mockEvent = mock(Event.class); + consumer.accept(new Record<>(mockEvent)); + return null; + }).when(mockCodec).parse(any(), any()); + + FileReaderContext context = createContextWithCodec(mockCodec); + + Thread readerThread = new Thread(() -> { + final FileReader reader = createReaderWithContext(testFile, context); + reader.run(); + }); + readerThread.start(); + await().atMost(2, TimeUnit.SECONDS).until(() -> readerThread.getState() == Thread.State.TIMED_WAITING || readerThread.getState() == Thread.State.WAITING || !readerThread.isAlive()); + readerThread.interrupt(); + readerThread.join(5000); + + assertThat(readerThread.isAlive(), equalTo(false)); + } + + @Test + void run_multi_byte_character_split_across_reads_triggers_decoder_carryover() throws Exception { + Path testFile = tempDir.resolve("multibyte.log"); + String multiByteContent = "\u00E9\u00E9\u00E9\u00E9\n"; + Files.writeString(testFile, multiByteContent); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + stubEventFactory(); + + final FileReader reader = createReader(testFile, 3, 1048576, false); + reader.run(); + + verify(buffer, atLeastOnce()).write(any(Record.class), eq(5000)); + } + + @Test + void run_skippingToNewline_skips_remainder_after_max_line_truncation() throws Exception { + Path testFile = tempDir.resolve("skip-newline.log"); + String longLine = "A".repeat(60) + "\nsecond\n"; + Files.writeString(testFile, longLine); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + stubReadMetrics(); + Counter linesTruncated = mock(Counter.class); + when(metrics.getLinesTruncated()).thenReturn(linesTruncated); + stubEventFactory(); + + final FileReader reader = createReader(testFile, 20, 10, false); + reader.run(); + + verify(linesTruncated, atLeastOnce()).increment(); + verify(buffer, atLeastOnce()).write(any(Record.class), eq(5000)); + } + + @Test + void run_backpressure_retry_timeout_exceeded_logs_data_loss() throws Exception { + Path testFile = tempDir.resolve("bp-timeout.log"); + Files.writeString(testFile, "line1\n"); + FileChannel realChannel = FileChannel.open(testFile, StandardOpenOption.READ); + when(fileOps.openReadChannel(testFile)).thenReturn(realChannel); + when(rotationDetector.checkRotation(any(), any(), any(long.class))) + .thenReturn(RotationResult.NO_ROTATION); + + Counter bytesRead = mock(Counter.class); + Counter filesOpened = mock(Counter.class); + Counter filesClosed = mock(Counter.class); + Counter writeTimeouts = mock(Counter.class); + Counter dataLossEvents = mock(Counter.class); + when(metrics.getBytesRead()).thenReturn(bytesRead); + when(metrics.getFilesOpened()).thenReturn(filesOpened); + when(metrics.getFilesClosed()).thenReturn(filesClosed); + when(metrics.getWriteTimeouts()).thenReturn(writeTimeouts); + when(metrics.getDataLossEvents()).thenReturn(dataLossEvents); + lenient().when(metrics.getFileLagBytes()).thenReturn(new AtomicLong(0)); + stubEventFactory(); + + doThrow(new TimeoutException("buffer full")) + .when(buffer).write(any(Record.class), anyInt()); + + FileReaderContext context = new FileReaderContext( + buffer, eventFactory, fileOps, metrics, rotationDetector, + acknowledgementSetManager, false, StandardCharsets.UTF_8, + 4096, 1048576, 5000, Duration.ofMillis(100), + Duration.ofSeconds(30), StartPosition.BEGINNING, false, + Duration.ofMillis(100), 1000, + Duration.ofSeconds(5), 3, null, true, null); + + fileIdentity = mock(FileIdentity.class); + final FileReader reader = new FileReader(testFile, fileIdentity, checkpointEntry, context, + () -> onCompleteCalled.set(true)); + reader.run(); + + verify(dataLossEvents).increment(); + assertThat(onCompleteCalled.get(), equalTo(true)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfigTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfigTest.java new file mode 100644 index 0000000000..649b191e93 --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceConfigTest.java @@ -0,0 +1,244 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.opensearch.dataprepper.model.configuration.PluginModel; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.time.Duration; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.hasSize; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class FileSourceConfigTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @ParameterizedTest + @ValueSource(strings = {FileSourceConfig.EVENT_TYPE, FileSourceConfig.DEFAULT_TYPE}) + void codeRequiresRecordTypeEvent_returns_true_if_no_codec(final String recordType) { + final Map fileConfigMap = Map.of(FileSourceConfig.ATTRIBUTE_TYPE, recordType); + final FileSourceConfig objectUnderTest = OBJECT_MAPPER.convertValue(fileConfigMap, FileSourceConfig.class); + + assertThat(objectUnderTest.codeRequiresRecordTypeEvent(), equalTo(true)); + } + + @ParameterizedTest + @CsvSource({ + FileSourceConfig.EVENT_TYPE + ",true", + FileSourceConfig.DEFAULT_TYPE + ",false" + }) + void codeRequiresRecordTypeEvent_returns_expected_value_when_there_is_a_codec(final String recordType, final boolean expected) { + final Map fileConfigMap = Map.of( + FileSourceConfig.ATTRIBUTE_TYPE, recordType, + "codec", new PluginModel("fake_codec", Collections.emptyMap()) + ); + final FileSourceConfig objectUnderTest = OBJECT_MAPPER.convertValue(fileConfigMap, FileSourceConfig.class); + + assertThat(objectUnderTest.codeRequiresRecordTypeEvent(), equalTo(expected)); + } + + @Test + void tail_defaults_to_false() { + final Map configMap = Map.of("path", "/tmp/test.log"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.isTail(), equalTo(false)); + } + + @Test + void tail_can_be_set_to_true() { + final Map configMap = Map.of("path", "/tmp/test.log", "tail", true); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.isTail(), equalTo(true)); + } + + @Test + void paths_defaults_to_empty_list() { + final Map configMap = Map.of("path", "/tmp/test.log"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getPaths(), empty()); + } + + @Test + void paths_returns_configured_values() { + final Map configMap = Map.of("paths", List.of("/var/log/*.log", "/tmp/*.log")); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getPaths(), hasSize(2)); + } + + @Test + void getAllPaths_merges_path_and_paths() { + final Map configMap = Map.of( + "path", "/tmp/single.log", + "paths", List.of("/var/log/*.log") + ); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getAllPaths(), hasSize(2)); + assertThat(config.getAllPaths(), containsInAnyOrder("/var/log/*.log", "/tmp/single.log")); + } + + @Test + void getAllPaths_deduplicates_when_path_is_in_paths() { + final Map configMap = Map.of( + "path", "/var/log/*.log", + "paths", List.of("/var/log/*.log") + ); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getAllPaths(), hasSize(1)); + } + + @Test + void validate_succeeds_with_path_when_tail_false() { + final Map configMap = Map.of("path", "/tmp/test.log"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + config.validate(); + } + + @Test + void validate_fails_without_any_path() { + final Map configMap = Map.of("format", "plain", "record_type", "string"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThrows(IllegalArgumentException.class, config::validate); + } + + @Test + void validate_succeeds_with_paths_when_tail_false() { + final Map configMap = Map.of("paths", List.of("/var/log/*.log")); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + config.validate(); + } + + @Test + void validate_succeeds_with_paths_when_tail_true() { + final Map configMap = Map.of("tail", true, "paths", List.of("/var/log/*.log")); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + config.validate(); + } + + @Test + void validate_succeeds_with_path_when_tail_true() { + final Map configMap = Map.of("tail", true, "path", "/tmp/test.log"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + config.validate(); + } + + @Test + void validate_fails_without_any_path_when_tail_true() { + final Map configMap = Map.of("tail", true); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThrows(IllegalArgumentException.class, config::validate); + } + + @Test + void default_config_returns_expected_values() { + final Map configMap = Map.of("path", "/tmp/test.log"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getStartPosition(), equalTo(StartPosition.BEGINNING)); + assertThat(config.getPollInterval(), equalTo(Duration.ofSeconds(1))); + assertThat(config.getEncoding(), equalTo("UTF-8")); + assertThat(config.getReadBufferSize(), equalTo(65536)); + assertThat(config.getMaxActiveFiles(), equalTo(1000)); + assertThat(config.getReaderThreads(), equalTo(4)); + assertThat(config.getMaxReadTimePerFile(), equalTo(Duration.ofSeconds(5))); + assertThat(config.getRotateWait(), equalTo(Duration.ofSeconds(5))); + assertThat(config.getRotationDrainTimeout(), equalTo(Duration.ofSeconds(30))); + assertThat(config.getCheckpointFile(), nullValue()); + assertThat(config.getCheckpointInterval(), equalTo(Duration.ofSeconds(5))); + assertThat(config.getCheckpointCleanupAfter(), equalTo(Duration.ofDays(7))); + assertThat(config.getFingerprintBytes(), equalTo(1024)); + assertThat(config.getCloseInactive(), equalTo(Duration.ofMinutes(30))); + assertThat(config.isCloseRemoved(), equalTo(true)); + assertThat(config.getBatchSize(), equalTo(100)); + assertThat(config.getBatchTimeout(), equalTo(Duration.ofSeconds(5))); + assertThat(config.getAcknowledgmentTimeout(), equalTo(Duration.ofSeconds(60))); + assertThat(config.getMaxAcknowledgmentRetries(), equalTo(3)); + assertThat(config.isIncludeFileMetadata(), equalTo(true)); + assertThat(config.getMaxLineLength(), equalTo(1048576)); + } + + @Test + void exclude_paths_defaults_to_empty_list() { + final Map configMap = Map.of("path", "/tmp/test.log"); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getExcludePaths(), empty()); + } + + @Test + void exclude_paths_returns_configured_values() { + final Map configMap = Map.of( + "path", "/tmp/test.log", + "exclude_paths", List.of("/tmp/exclude*.log") + ); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getExcludePaths(), hasSize(1)); + } + + @Test + void getAllPaths_with_null_filePathToRead_returns_only_paths() { + final Map configMap = Map.of( + "paths", List.of("/var/log/*.log") + ); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThat(config.getAllPaths(), hasSize(1)); + assertThat(config.getAllPaths(), containsInAnyOrder("/var/log/*.log")); + } + + @Test + void validate_fails_when_tail_true_and_filePathToRead_is_empty_and_paths_is_null() { + final Map configMap = Map.of("tail", true, "path", ""); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThrows(IllegalArgumentException.class, config::validate); + } + + @Test + void validate_fails_when_start_position_end_and_tail_false() { + final Map configMap = Map.of( + "path", "/tmp/test.log", + "start_position", "end" + ); + final FileSourceConfig config = OBJECT_MAPPER.convertValue(configMap, FileSourceConfig.class); + + assertThrows(IllegalArgumentException.class, config::validate); + } + + @Test + void getFormat_throws_when_format_is_null() { + assertThrows(IllegalArgumentException.class, () -> FileFormat.fromString(null)); + } +} diff --git a/data-prepper-plugins/common/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTests.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTests.java similarity index 54% rename from data-prepper-plugins/common/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTests.java rename to data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTests.java index 1a6b20a23a..81c53a52cf 100644 --- a/data-prepper-plugins/common/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTests.java +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/FileSourceTests.java @@ -1,6 +1,11 @@ /* * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * */ package org.opensearch.dataprepper.plugins.source.file; @@ -18,6 +23,7 @@ import org.opensearch.dataprepper.event.TestEventFactory; import org.opensearch.dataprepper.metrics.PluginMetrics; import org.opensearch.dataprepper.model.buffer.Buffer; +import org.opensearch.dataprepper.model.acknowledgements.AcknowledgementSetManager; import org.opensearch.dataprepper.model.codec.DecompressionEngine; import org.opensearch.dataprepper.model.codec.InputCodec; import org.opensearch.dataprepper.model.configuration.PipelineDescription; @@ -30,10 +36,7 @@ import org.opensearch.dataprepper.plugins.buffer.blockingbuffer.BlockingBuffer; import org.opensearch.dataprepper.plugins.buffer.blockingbuffer.BlockingBufferConfig; import org.opensearch.dataprepper.plugins.codec.CompressionOption; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -41,6 +44,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Consumer; @@ -50,22 +54,25 @@ import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.after; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) public class FileSourceTests { - private static final Logger LOG = LoggerFactory.getLogger(FileSourceTests.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private static final TypeReference> MAP_TYPE_REFERENCE = new TypeReference>() { }; private static final String TEST_FILE_PATH_PLAIN = "src/test/resources/test-file-source-plain.tst"; + private static final String MESSAGE_KEY = "message"; private FileSourceConfig fileSourceConfig; @@ -90,16 +97,9 @@ void setUp() { private FileSource createObjectUnderTest() { fileSourceConfig = OBJECT_MAPPER.convertValue(pluginSettings, FileSourceConfig.class); - return new FileSource(fileSourceConfig, pluginMetrics, pluginFactory, TestEventFactory.getTestEventFactory()); + return new FileSource(fileSourceConfig, pluginMetrics, pluginFactory, TestEventFactory.getTestEventFactory(), null); } - /** - * Variant of creatgeObjectUnderTest that uses mocks for the configuration instead of object mapper, so we can - * pass concrete mocks to the FileSource through the FileSourceConfig. - * @param codec the codec to use in the configuration - * @param engine the {@link DecompressionEngine} to use in the configuration - * @return - */ private FileSource createObjectUnderTest(PluginModel codec, DecompressionEngine engine) { FileSourceConfig fileSourceConfig = mock(FileSourceConfig.class); @@ -115,7 +115,7 @@ private FileSource createObjectUnderTest(PluginModel codec, DecompressionEngine when(fileSourceConfig.getCompression()).thenReturn(compressionOption); } - return new FileSource(fileSourceConfig, pluginMetrics, pluginFactory, TestEventFactory.getTestEventFactory()); + return new FileSource(fileSourceConfig, pluginMetrics, pluginFactory, TestEventFactory.getTestEventFactory(), null); } @Nested @@ -140,30 +140,27 @@ public void setup() throws JsonProcessingException { expectedEventsJson = new ArrayList<>(); expectedEventsInvalidJson = new ArrayList<>(); - // plain final String expectedPlainFirstLine = "THIS IS A PLAINTEXT LINE"; final String expectedPlainSecondLine = "THIS IS ANOTHER PLAINTEXT LINE"; - final Record firstEventPlain = createRecordEventWithKeyValuePair(FileSource.MESSAGE_KEY, expectedPlainFirstLine); - final Record secondEventPlain = createRecordEventWithKeyValuePair(FileSource.MESSAGE_KEY, expectedPlainSecondLine); + final Record firstEventPlain = createRecordEventWithKeyValuePair(MESSAGE_KEY, expectedPlainFirstLine); + final Record secondEventPlain = createRecordEventWithKeyValuePair(MESSAGE_KEY, expectedPlainSecondLine); expectedEventsPlain.add(firstEventPlain); expectedEventsPlain.add(secondEventPlain); - //json final Record firstEventJson = createRecordEventWithKeyValuePair("test_key", "test_value"); final Record secondEventJson = createRecordEventWithKeyValuePair("second_test_key", "second_test_value"); expectedEventsJson.add(firstEventJson); expectedEventsJson.add(secondEventJson); - // invalid json final String expectedInvalidJsonFirstLine = "{\"test_key: test_value\"}"; final String expectedInvalidJsonSecondLine = "{\"second_test_key\": \"second_test_value\""; - final Record firstEventInvalidJson = createRecordEventWithKeyValuePair(FileSource.MESSAGE_KEY, expectedInvalidJsonFirstLine); - final Record secondEventInvalidJson = createRecordEventWithKeyValuePair(FileSource.MESSAGE_KEY, expectedInvalidJsonSecondLine); + final Record firstEventInvalidJson = createRecordEventWithKeyValuePair(MESSAGE_KEY, expectedInvalidJsonFirstLine); + final Record secondEventInvalidJson = createRecordEventWithKeyValuePair(MESSAGE_KEY, expectedInvalidJsonSecondLine); expectedEventsInvalidJson.add(firstEventInvalidJson); expectedEventsInvalidJson.add(secondEventInvalidJson); @@ -184,33 +181,37 @@ private BlockingBuffer> getBuffer() throws JsonProcessingExceptio } @Test - public void testFileSourceWithEmptyFilePathDoesNotWriteToBuffer() throws InterruptedException { - buffer = mock(Buffer.class); + public void testFileSourceWithEmptyFilePathThrowsValidationError() { pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, ""); - fileSource = createObjectUnderTest(); - fileSource.start(buffer); - Thread.sleep(500); - verifyNoInteractions(buffer); + assertThrows(IllegalArgumentException.class, () -> createObjectUnderTest()); } @Test - public void testFileSourceWithNonexistentFilePathDoesNotWriteToBuffer() throws InterruptedException { + public void testFileSourceWithNonexistentFilePathDoesNotWriteToBuffer() throws TimeoutException { buffer = mock(Buffer.class); pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, FILE_DOES_NOT_EXIST); fileSource = createObjectUnderTest(); fileSource.start(buffer); - Thread.sleep(500); - verifyNoInteractions(buffer); + verify(buffer, after(500).never()).write(any(Record.class), anyInt()); } @Test public void testFileSourceWithNullFilePathThrowsNullPointerException() { pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, null); - assertThrows(NullPointerException.class, FileSourceTests.this::createObjectUnderTest); + assertThrows(IllegalArgumentException.class, FileSourceTests.this::createObjectUnderTest); } @Test - public void testFileWithPlainTextAddsEventsToBufferCorrectly() { + public void testStopBeforeStartPreventsProcessing() throws TimeoutException { + buffer = mock(Buffer.class); + fileSource = createObjectUnderTest(); + fileSource.stop(); + fileSource.start(buffer); + verify(buffer, after(500).never()).write(any(Record.class), anyInt()); + } + + @Test + public void testFileWithPlainTextAddsEventsToBufferCorrectly() throws JsonProcessingException { fileSource = createObjectUnderTest(); fileSource.start(buffer); @@ -221,7 +222,7 @@ public void testFileWithPlainTextAddsEventsToBufferCorrectly() { } @Test - public void testFileWithJSONAddsEventsToBufferCorrectly() { + public void testFileWithJSONAddsEventsToBufferCorrectly() throws JsonProcessingException { pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, TEST_FILE_PATH_JSON); pluginSettings.put(FileSourceConfig.ATTRIBUTE_FORMAT, "json"); @@ -235,7 +236,7 @@ public void testFileWithJSONAddsEventsToBufferCorrectly() { } @Test - public void testFileWithInvalidJSONAddsEventsToBufferAsPlainText() { + public void testFileWithInvalidJSONAddsEventsToBufferAsPlainText() throws JsonProcessingException { pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, TEST_FILE_PATH_INVALID_JSON); pluginSettings.put(FileSourceConfig.ATTRIBUTE_FORMAT, "json"); fileSource = createObjectUnderTest(); @@ -273,7 +274,7 @@ public void testNonSupportedFileTypeThrowsIllegalArgumentException() { assertThrows(IllegalArgumentException.class, FileSourceTests.this::createObjectUnderTest); } - void assertExpectedRecordsAreEqual(final List> expectedEvents, final List> actualEvents) { + void assertExpectedRecordsAreEqual(final List> expectedEvents, final List> actualEvents) throws JsonProcessingException { for (int i = 0; i < expectedEvents.size(); i++) { assertThat(actualEvents.get(i), notNullValue()); assertThat(actualEvents.get(i).getData(), notNullValue()); @@ -281,16 +282,12 @@ void assertExpectedRecordsAreEqual(final List> expectedEvents, fi } } - void assertEventRecordsAreEqual(final Record first, final Record second) { - try { - final Event firstEvent = (Event) first.getData(); - final Event secondEvent = (Event) second.getData(); - final Map recordMapFirst = OBJECT_MAPPER.readValue(firstEvent.toJsonString(), MAP_TYPE_REFERENCE); - final Map recordMapSecond = OBJECT_MAPPER.readValue(secondEvent.toJsonString(), MAP_TYPE_REFERENCE); - assertThat(recordMapFirst, is(equalTo(recordMapSecond))); - } catch (JsonProcessingException e) { - LOG.error("Unable to parse Event as JSON"); - } + void assertEventRecordsAreEqual(final Record first, final Record second) throws JsonProcessingException { + final Event firstEvent = (Event) first.getData(); + final Event secondEvent = (Event) second.getData(); + final Map recordMapFirst = OBJECT_MAPPER.readValue(firstEvent.toJsonString(), MAP_TYPE_REFERENCE); + final Map recordMapSecond = OBJECT_MAPPER.readValue(secondEvent.toJsonString(), MAP_TYPE_REFERENCE); + assertThat(recordMapFirst, is(equalTo(recordMapSecond))); } private Record createRecordEventWithKeyValuePair(final String key, final String value) { @@ -313,33 +310,25 @@ class WithCodec { @Mock private Buffer buffer; - @Mock - private DecompressionEngine decompressionEngine; - @BeforeEach void setUp() { + pluginMetrics = PluginMetrics.fromNames("file", "test-codec-pipeline"); + Map codecConfiguration = Map.of(UUID.randomUUID().toString(), UUID.randomUUID().toString()); Map> codecSettings = Map.of("fake_codec", codecConfiguration); pluginSettings.put("codec", codecSettings); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_TYPE, FileSourceConfig.EVENT_TYPE); when(pluginFactory.loadPlugin(eq(InputCodec.class), any(PluginSetting.class))) .thenReturn(inputCodec); } @Test - void start_will_parse_codec_with_correct_inputStream() throws IOException { - final FileInputStream decompressedStream = new FileInputStream(TEST_FILE_PATH_PLAIN); - DecompressionEngine mockEngine = mock(DecompressionEngine.class); - when(mockEngine.createInputStream(any(InputStream.class))).thenReturn(decompressedStream); - - PluginModel fakeCodec = mock(PluginModel.class); - when(fakeCodec.getPluginName()).thenReturn("fake_codec"); - when(fakeCodec.getPluginSettings()).thenReturn(Map.of()); - - createObjectUnderTest(fakeCodec, mockEngine).start(buffer); + void start_will_parse_codec_with_inputStream() throws IOException { + createObjectUnderTest().start(buffer); - await().atMost(2, TimeUnit.SECONDS) - .untilAsserted(() -> verify(inputCodec).parse(eq(decompressedStream), any(Consumer.class))); + await().atMost(5, TimeUnit.SECONDS) + .untilAsserted(() -> verify(inputCodec).parse(any(InputStream.class), any(Consumer.class))); } @Test @@ -348,7 +337,7 @@ void start_will_parse_codec_with_a_Consumer_that_writes_to_the_buffer() throws I final ArgumentCaptor consumerArgumentCaptor = ArgumentCaptor.forClass(Consumer.class); - await().atMost(2, TimeUnit.SECONDS) + await().atMost(5, TimeUnit.SECONDS) .untilAsserted(() -> verify(inputCodec).parse(any(InputStream.class), any(Consumer.class))); verify(inputCodec).parse(any(InputStream.class), consumerArgumentCaptor.capture()); @@ -362,20 +351,201 @@ void start_will_parse_codec_with_a_Consumer_that_writes_to_the_buffer() throws I } @Test - void start_will_throw_exception_if_codec_throws() throws IOException, TimeoutException, InterruptedException { - - final IOException mockedException = mock(IOException.class); - doThrow(mockedException) + void start_will_not_crash_if_codec_throws() throws IOException { + doThrow(new IOException("parse failed")) .when(inputCodec).parse(any(InputStream.class), any(Consumer.class)); FileSource objectUnderTest = createObjectUnderTest(); - objectUnderTest.start(buffer); - Thread.sleep(2_000); + await().atMost(5, TimeUnit.SECONDS) + .untilAsserted(() -> verify(inputCodec).parse(any(InputStream.class), any(Consumer.class))); + } + + @Test + void start_codec_consumer_wraps_timeout_exception() throws IOException, TimeoutException { + doThrow(new TimeoutException("buffer full")) + .when(buffer).write(any(Record.class), eq(FileSourceConfig.DEFAULT_TIMEOUT)); + + createObjectUnderTest().start(buffer); + + final ArgumentCaptor consumerArgumentCaptor = ArgumentCaptor.forClass(Consumer.class); + + await().atMost(5, TimeUnit.SECONDS) + .untilAsserted(() -> verify(inputCodec).parse(any(InputStream.class), any(Consumer.class))); + + verify(inputCodec).parse(any(InputStream.class), consumerArgumentCaptor.capture()); + + final Consumer> actualConsumer = consumerArgumentCaptor.getValue(); + final Record record = mock(Record.class); + + assertThrows(RuntimeException.class, () -> actualConsumer.accept(record)); + } + } + + @Nested + class TailMode { + + @Mock + private Buffer> buffer; + + @Test + void start_in_tail_mode_invokes_startTailing() throws Exception { + pluginSettings.put("tail", true); + pluginSettings.put("paths", List.of("/tmp/nonexistent-test-glob-*.log")); + pluginSettings.remove(FileSourceConfig.ATTRIBUTE_PATH); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, "/tmp/nonexistent-test-glob-single.log"); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } + + @Test + void start_in_tail_mode_with_codec_invokes_startTailing() throws Exception { + pluginSettings.put("tail", true); + pluginSettings.put("paths", List.of("/tmp/nonexistent-codec-glob-*.log")); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, "/tmp/nonexistent-codec-glob-single.log"); + + Map codecConfiguration = Map.of(UUID.randomUUID().toString(), UUID.randomUUID().toString()); + Map> codecSettings = Map.of("fake_codec", codecConfiguration); + pluginSettings.put("codec", codecSettings); + + InputCodec mockCodec = mock(InputCodec.class); + when(pluginFactory.loadPlugin(eq(InputCodec.class), any(PluginSetting.class))) + .thenReturn(mockCodec); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } + + @Test + void start_in_tail_mode_with_high_ratio_logs_warning() throws Exception { + pluginSettings.put("tail", true); + pluginSettings.put("paths", List.of("/tmp/nonexistent-ratio-*.log")); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, "/tmp/nonexistent-ratio-single.log"); + pluginSettings.put("max_active_files", 1000); + pluginSettings.put("reader_threads", 1); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } + + @Test + void start_in_tail_mode_with_checkpoint_path() throws Exception { + pluginSettings.put("tail", true); + pluginSettings.put("paths", List.of("/tmp/nonexistent-cp-*.log")); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, "/tmp/nonexistent-cp-single.log"); + pluginSettings.put("checkpoint_file", "/tmp/test-checkpoint-" + UUID.randomUUID() + ".json"); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } + + @Test + void stop_before_start_does_not_throw() { + FileSource fileSource = createObjectUnderTest(); + assertDoesNotThrow(fileSource::stop); + } + + @Test + void start_in_tail_mode_rethrows_runtime_exception_from_startTailing() { + pluginSettings.put("tail", true); + pluginSettings.put("paths", List.of("/tmp/nonexistent-err-*.log")); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, "/tmp/nonexistent-err-single.log"); + pluginSettings.put("fingerprint_bytes", 0); + + FileSource fileSource = createObjectUnderTest(); + assertThrows(IllegalArgumentException.class, () -> fileSource.start(buffer)); + } + + @Test + void stop_after_classic_start_joins_thread() throws Exception { + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } - verifyNoInteractions(buffer); + @Test + void stop_with_interrupt_during_join() throws Exception { + buffer = mock(Buffer.class); + final CountDownLatch writeStarted = new CountDownLatch(1); + doAnswer(inv -> { + writeStarted.countDown(); + Thread.sleep(5000); + return null; + }).when(buffer).write(any(Record.class), eq(FileSourceConfig.DEFAULT_TIMEOUT)); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + writeStarted.await(2, TimeUnit.SECONDS); + + Thread stopThread = new Thread(() -> { + Thread.currentThread().interrupt(); + fileSource.stop(); + }); + stopThread.start(); + stopThread.join(5000); } + @Test + void areAcknowledgementsEnabled_returns_false_by_default() { + FileSource fileSource = createObjectUnderTest(); + assertThat(fileSource.areAcknowledgementsEnabled(), equalTo(false)); + } + + @Test + void areAcknowledgementsEnabled_returns_true_when_configured() { + pluginSettings.put("acknowledgments", true); + fileSourceConfig = OBJECT_MAPPER.convertValue(pluginSettings, FileSourceConfig.class); + AcknowledgementSetManager mockAckManager = mock(AcknowledgementSetManager.class); + FileSource fileSource = new FileSource(fileSourceConfig, pluginMetrics, pluginFactory, + TestEventFactory.getTestEventFactory(), mockAckManager); + assertThat(fileSource.areAcknowledgementsEnabled(), equalTo(true)); + } + + @Test + void start_in_tail_mode_with_safe_ratio_does_not_warn() throws Exception { + pluginSettings.put("tail", true); + pluginSettings.put("paths", List.of("/tmp/nonexistent-ratio-safe-*.log")); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, "/tmp/nonexistent-ratio-safe-single.log"); + pluginSettings.put("max_active_files", 100); + pluginSettings.put("reader_threads", 2); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } + + @Test + void stop_mid_read_stops_processing_lines() throws Exception { + pluginSettings.put(FileSourceConfig.ATTRIBUTE_TYPE, FileSourceConfig.EVENT_TYPE); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, TEST_FILE_PATH_PLAIN); + + buffer = mock(Buffer.class); + final CountDownLatch writeStarted = new CountDownLatch(1); + doAnswer(inv -> { + writeStarted.countDown(); + Thread.sleep(2000); + return null; + }).when(buffer).write(any(Record.class), eq(FileSourceConfig.DEFAULT_TIMEOUT)); + + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + writeStarted.await(2, TimeUnit.SECONDS); + fileSource.stop(); + } + + @Test + void writeLineAsEventOrString_with_non_matching_type_does_not_write() throws Exception { + pluginSettings.put(FileSourceConfig.ATTRIBUTE_TYPE, FileSourceConfig.EVENT_TYPE); + pluginSettings.put(FileSourceConfig.ATTRIBUTE_PATH, TEST_FILE_PATH_PLAIN); + FileSource fileSource = createObjectUnderTest(); + fileSource.start(buffer); + fileSource.stop(); + } } } diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/GlobPathResolverTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/GlobPathResolverTest.java new file mode 100644 index 0000000000..7bdd1d2b22 --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/GlobPathResolverTest.java @@ -0,0 +1,259 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.FileVisitResult; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class GlobPathResolverTest { + + @TempDir + Path tempDir; + + @BeforeEach + void setUp() throws IOException { + Files.createDirectories(tempDir.resolve("subdir")); + Files.createFile(tempDir.resolve("app.log")); + Files.createFile(tempDir.resolve("error.log")); + Files.createFile(tempDir.resolve("app.txt")); + Files.createFile(tempDir.resolve("subdir/nested.log")); + } + + @Test + void resolve_matches_glob_pattern_for_log_files() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + assertThat(result, hasSize(2)); + assertThat(result, hasItem(tempDir.resolve("app.log").toAbsolutePath().normalize())); + assertThat(result, hasItem(tempDir.resolve("error.log").toAbsolutePath().normalize())); + } + + @Test + void resolve_excludes_files_matching_exclude_patterns() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + List.of(tempDir.toString() + "/error.*")); + + final Set result = resolver.resolve(); + + assertThat(result, hasSize(1)); + assertThat(result, hasItem(tempDir.resolve("app.log").toAbsolutePath().normalize())); + } + + @Test + void resolve_matches_recursive_double_star_pattern() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/" + "**/*.log"), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + assertThat(result, hasItem(tempDir.resolve("subdir/nested.log").toAbsolutePath().normalize())); + } + + @Test + void resolve_returns_empty_set_when_no_files_match() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.csv"), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + assertThat(result, empty()); + } + + @Test + void matches_returns_true_for_matching_path() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + + assertThat(resolver.matches(tempDir.resolve("app.log")), equalTo(true)); + } + + @Test + void matches_returns_false_for_non_matching_path() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + + assertThat(resolver.matches(tempDir.resolve("app.txt")), equalTo(false)); + } + + @Test + void matches_returns_false_for_excluded_path() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + List.of(tempDir.toString() + "/error.*")); + + assertThat(resolver.matches(tempDir.resolve("error.log")), equalTo(false)); + } + + @Test + void getWatchDirectories_returns_base_directories() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + Collections.emptyList()); + + final Set watchDirs = resolver.getWatchDirectories(); + + assertThat(watchDirs, hasSize(1)); + assertThat(watchDirs, hasItem(tempDir.toAbsolutePath().normalize())); + } + + @Test + void extractBaseDirectory_stops_at_first_wildcard() { + final Path baseDir = GlobPathResolver.extractBaseDirectory(tempDir.toString() + "/logs/*.log"); + final Path expected = tempDir.resolve("logs").toAbsolutePath().normalize(); + final Path expectedParent = expected.getParent(); + + assertThat(baseDir, notNullValue()); + assertThat(baseDir.toString().startsWith(tempDir.toAbsolutePath().normalize().toString()), equalTo(true)); + } + + @Test + void constructor_throws_on_invalid_glob_pattern() { + assertThrows(IllegalArgumentException.class, () -> + new GlobPathResolver(List.of(tempDir.toString() + "/[invalid"), Collections.emptyList())); + } + + @Test + void resolve_handles_null_exclude_patterns() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log"), + null); + + final Set result = resolver.resolve(); + + assertThat(result, hasSize(2)); + } + + @Test + void resolve_handles_multiple_include_patterns() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log", tempDir.toString() + "/*.txt"), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + assertThat(result, hasSize(greaterThanOrEqualTo(3))); + } + + @Test + void getWatchDirectories_returns_multiple_directories_for_multiple_patterns() { + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/*.log", tempDir.toString() + "/subdir/*.log"), + Collections.emptyList()); + + final Set watchDirs = resolver.getWatchDirectories(); + + assertThat(watchDirs.size(), greaterThanOrEqualTo(1)); + } + + @Test + void resolve_returns_empty_set_for_nonexistent_base_directory() { + Path nonexistent = tempDir.resolve("nonexistent-subdir"); + final GlobPathResolver resolver = new GlobPathResolver( + List.of(nonexistent.toString() + "/*.log"), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + assertThat(result, empty()); + } + + @Test + void resolve_warns_when_base_directory_does_not_exist() { + String nonexistentPath = "/nonexistent-dir-" + System.nanoTime() + "/sub/deep/*.log"; + final GlobPathResolver resolver = new GlobPathResolver( + List.of(nonexistentPath), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + assertThat(result, empty()); + } + + @Test + void resolve_handles_visitFileFailed_gracefully() throws IOException { + Path unreadableDir = tempDir.resolve("unreadable"); + Files.createDirectory(unreadableDir); + Files.createFile(unreadableDir.resolve("secret.log")); + unreadableDir.toFile().setReadable(false); + + final GlobPathResolver resolver = new GlobPathResolver( + List.of(tempDir.toString() + "/" + "**/*.log"), + Collections.emptyList()); + + final Set result = resolver.resolve(); + + unreadableDir.toFile().setReadable(true); + + assertThat(result, not(hasItem(unreadableDir.resolve("secret.log").toAbsolutePath().normalize()))); + } + + @Test + void walkDirectory_handles_ioException_from_walkFileTree() throws IOException { + Path dir = tempDir.resolve("walk-test"); + Files.createDirectory(dir); + Files.createFile(dir.resolve("file.log")); + + final GlobPathResolver resolver = new GlobPathResolver( + List.of(dir.toString() + "/*.log"), + Collections.emptyList()); + + SimpleFileVisitor throwingVisitor = new SimpleFileVisitor<>() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException { + throw new IOException("simulated walk error"); + } + }; + + resolver.walkDirectory(dir, throwingVisitor); + } + + @Test + void extractBaseDirectory_with_no_separator_in_pattern() { + final Path baseDir = GlobPathResolver.extractBaseDirectory("/*.log"); + assertThat(baseDir, notNullValue()); + } + + @Test + void extractBaseDirectory_with_nonexistent_path_returns_parent_as_fallback() { + final Path baseDir = GlobPathResolver.extractBaseDirectory("/nonexistent-test-dir-" + System.nanoTime() + "/data.log"); + assertThat(baseDir, notNullValue()); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/PendingFileTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/PendingFileTest.java new file mode 100644 index 0000000000..0b2654244f --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/PendingFileTest.java @@ -0,0 +1,83 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.nio.file.Path; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@ExtendWith(MockitoExtension.class) +class PendingFileTest { + + @Mock + private FileIdentity fileIdentity; + + @Test + void constructorSetsFileIdentity() { + final Path path = Path.of("/var/log/test.log"); + final PendingFile pendingFile = new PendingFile(fileIdentity, path); + + assertThat(pendingFile.getFileIdentity(), equalTo(fileIdentity)); + } + + @Test + void constructorSetsPath() { + final Path path = Path.of("/var/log/test.log"); + final PendingFile pendingFile = new PendingFile(fileIdentity, path); + + assertThat(pendingFile.getPath(), equalTo(path)); + } + + @Test + void constructorSetsEnqueuedTimeMillis() { + final long before = System.currentTimeMillis(); + final PendingFile pendingFile = new PendingFile(fileIdentity, Path.of("/tmp/file.log")); + final long after = System.currentTimeMillis(); + + assertThat(pendingFile.getEnqueuedTimeMillis(), greaterThan(0L)); + assertThat(pendingFile.getEnqueuedTimeMillis(), greaterThanOrEqualTo(before)); + assertThat(pendingFile.getEnqueuedTimeMillis(), lessThanOrEqualTo(after)); + } + + @Test + void constructorThrowsWhenFileIdentityIsNull() { + assertThrows(NullPointerException.class, () -> new PendingFile(null, Path.of("/tmp/test.log"))); + } + + @Test + void constructorThrowsWhenPathIsNull() { + assertThrows(NullPointerException.class, () -> new PendingFile(fileIdentity, null)); + } + + @Test + void toStringContainsPathAndIdentity() { + final Path path = Path.of("/var/log/app.log"); + final PendingFile pendingFile = new PendingFile(fileIdentity, path); + + final String result = pendingFile.toString(); + + assertThat(result, notNullValue()); + assertThat(result, containsString("path=")); + assertThat(result, containsString("identity=")); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RecordTypeTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RecordTypeTest.java new file mode 100644 index 0000000000..fd4376f431 --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RecordTypeTest.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class RecordTypeTest { + + @Test + void fromString_returns_string() { + assertThat(RecordType.fromString("string"), equalTo(RecordType.STRING)); + } + + @Test + void fromString_returns_event() { + assertThat(RecordType.fromString("event"), equalTo(RecordType.EVENT)); + } + + @Test + void fromString_is_case_insensitive() { + assertThat(RecordType.fromString("STRING"), equalTo(RecordType.STRING)); + assertThat(RecordType.fromString("EVENT"), equalTo(RecordType.EVENT)); + } + + @ParameterizedTest + @ValueSource(strings = {"invalid", "record", ""}) + void fromString_throws_for_invalid_value(final String value) { + assertThrows(IllegalArgumentException.class, () -> RecordType.fromString(value)); + } + + @Test + void fromString_with_null_throws_IllegalArgumentException() { + assertThrows(IllegalArgumentException.class, () -> RecordType.fromString(null)); + } + + @Test + void toString_returns_name() { + assertThat(RecordType.STRING.toString(), equalTo("string")); + assertThat(RecordType.EVENT.toString(), equalTo("event")); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationDetectorTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationDetectorTest.java new file mode 100644 index 0000000000..af5b467a8e --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationDetectorTest.java @@ -0,0 +1,292 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileTime; +import java.time.Instant; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.junit.jupiter.api.Assertions.assertThrows; + +@ExtendWith(MockitoExtension.class) +class RotationDetectorTest { + + private static final int FINGERPRINT_BYTES = 256; + + @TempDir + Path tempDir; + + @Mock + private FileSystemOperations fileOps; + + private RotationDetector rotationDetector; + + @BeforeEach + void setUp() { + rotationDetector = new RotationDetector(fileOps, FINGERPRINT_BYTES); + lenient().when(fileOps.exists(any())).thenReturn(true); + } + + @Test + void getFingerprintBytesReturnsConfiguredValue() { + assertThat(rotationDetector.getFingerprintBytes(), equalTo(FINGERPRINT_BYTES)); + } + + @Test + void noRotationWhenIdentityMatchesAndSizeNotShrunk() throws IOException { + final Path testFile = tempDir.resolve("app.log"); + Files.writeString(testFile, "log data"); + + final BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-42"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + when(fileOps.size(testFile)).thenReturn(100L); + + final FileIdentity knownIdentity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 50L); + + assertThat(result.getRotationType(), equalTo(RotationType.NO_ROTATION)); + assertThat(result.getNewFileIdentity(), nullValue()); + } + + @Test + void createRenameRotationWhenIdentityChanges() throws IOException { + final Path testFile = tempDir.resolve("app.log"); + Files.writeString(testFile, "original"); + + final BasicFileAttributes oldAttrs = mock(BasicFileAttributes.class); + when(oldAttrs.fileKey()).thenReturn("inode-1"); + when(oldAttrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(oldAttrs); + final FileIdentity knownIdentity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + final BasicFileAttributes newAttrs = mock(BasicFileAttributes.class); + when(newAttrs.fileKey()).thenReturn("inode-2"); + when(newAttrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(newAttrs); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.CREATE_RENAME)); + assertThat(result.getNewFileIdentity(), notNullValue()); + } + + @Test + void copytruncateRotationWhenFileSizeShrinks() throws IOException { + final Path testFile = tempDir.resolve("app.log"); + Files.writeString(testFile, "data"); + + final BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-5"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + + final FileIdentity knownIdentity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + when(fileOps.size(testFile)).thenReturn(10L); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 500L); + + assertThat(result.getRotationType(), equalTo(RotationType.COPYTRUNCATE)); + assertThat(result.getNewFileIdentity(), notNullValue()); + } + + @Test + void deletedFileWhenWrappedNoSuchFileExceptionFromIdentityResolution() throws IOException { + final Path testFile = tempDir.resolve("gone.log"); + when(fileOps.exists(testFile)).thenReturn(false); + + final FileIdentity knownIdentity = mock(FileIdentity.class); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.DELETED)); + } + + @Test + void deletedFileWhenSizeCheckThrowsNoSuchFileException() throws IOException { + final Path testFile = tempDir.resolve("removed.log"); + Files.writeString(testFile, "temp"); + + final BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-10"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + + final FileIdentity knownIdentity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + when(fileOps.size(testFile)).thenThrow(new NoSuchFileException(testFile.toString())); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.DELETED)); + } + + @Test + void noRotationWhenSizeCheckThrowsGenericIOException() throws IOException { + final Path testFile = tempDir.resolve("error.log"); + Files.writeString(testFile, "content"); + + final BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-20"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + + final FileIdentity knownIdentity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + when(fileOps.size(testFile)).thenThrow(new IOException("disk error")); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.NO_ROTATION)); + } + + @Test + void noRotationWhenReadAttributesThrowsGenericRuntimeException() throws IOException { + final Path testFile = tempDir.resolve("runtime-err.log"); + + when(fileOps.readAttributes(testFile)).thenThrow(new RuntimeException("unexpected")); + + final FileIdentity knownIdentity = mock(FileIdentity.class); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.NO_ROTATION)); + } + + @Test + void deletedFileWhenWrappedNoSuchFileExceptionInCause() throws IOException { + final Path testFile = tempDir.resolve("wrapped.log"); + when(fileOps.exists(testFile)).thenReturn(false); + + final FileIdentity knownIdentity = mock(FileIdentity.class); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.DELETED)); + } + + @Test + void noRotationWhenSizeEqualsCurrentOffset() throws IOException { + final Path testFile = tempDir.resolve("exact.log"); + Files.writeString(testFile, "data"); + + final BasicFileAttributes attrs = mock(BasicFileAttributes.class); + when(attrs.fileKey()).thenReturn("inode-30"); + when(attrs.creationTime()).thenReturn(FileTime.from(Instant.EPOCH)); + when(fileOps.readAttributes(testFile)).thenReturn(attrs); + + final FileIdentity knownIdentity = FileIdentity.from(testFile, fileOps, FINGERPRINT_BYTES); + + when(fileOps.size(testFile)).thenReturn(100L); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.NO_ROTATION)); + } + + @Test + void deletedFileWhenDeeplyNestedNoSuchFileException() throws IOException { + final Path testFile = tempDir.resolve("deep-nested.log"); + when(fileOps.exists(testFile)).thenReturn(false); + + final FileIdentity knownIdentity = mock(FileIdentity.class); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.DELETED)); + } + + @Test + void isCausedByNoSuchFile_returns_true_when_exception_itself_is_NoSuchFileException() { + final NoSuchFileException noSuchFile = new NoSuchFileException("test.log"); + + assertThat(rotationDetector.isCausedByNoSuchFile(noSuchFile), equalTo(true)); + } + + @Test + void constructorThrowsIllegalArgumentExceptionWhenFingerprintBytesIsZero() { + assertThrows(IllegalArgumentException.class, + () -> new RotationDetector(fileOps, 0)); + } + + @Test + void constructorThrowsIllegalArgumentExceptionWhenFingerprintBytesIsNegative() { + assertThrows(IllegalArgumentException.class, + () -> new RotationDetector(fileOps, -5)); + } + + @Test + void checkRotationReturnsDeletedWhenFileDoesNotExist() { + final Path testFile = tempDir.resolve("not-exists.log"); + when(fileOps.exists(testFile)).thenReturn(false); + + final FileIdentity knownIdentity = mock(FileIdentity.class); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.DELETED)); + } + + @Test + void isCausedByNoSuchFile_returns_true_when_deeply_nested_in_cause_chain() { + final NoSuchFileException noSuchFile = new NoSuchFileException("deep.log"); + final RuntimeException mid = new RuntimeException("mid", noSuchFile); + final RuntimeException outer = new RuntimeException("outer", mid); + + assertThat(rotationDetector.isCausedByNoSuchFile(outer), equalTo(true)); + } + + @Test + void isCausedByNoSuchFile_returns_false_when_no_NoSuchFileException_in_chain() { + final IOException ioException = new IOException("generic"); + final RuntimeException outer = new RuntimeException("outer", ioException); + + assertThat(rotationDetector.isCausedByNoSuchFile(outer), equalTo(false)); + } + + @Test + void checkRotationReturnsDeletedWhenRuntimeExceptionWrapsNoSuchFileExceptionDeeply() throws IOException { + final Path testFile = tempDir.resolve("deep-cause.log"); + + final NoSuchFileException noSuchFile = new NoSuchFileException(testFile.toString()); + final RuntimeException mid = new RuntimeException("mid", noSuchFile); + final RuntimeException outer = new RuntimeException("outer", mid); + when(fileOps.readAttributes(testFile)).thenThrow(outer); + + final FileIdentity knownIdentity = mock(FileIdentity.class); + + final RotationResult result = rotationDetector.checkRotation(testFile, knownIdentity, 100L); + + assertThat(result.getRotationType(), equalTo(RotationType.DELETED)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationResultTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationResultTest.java new file mode 100644 index 0000000000..5412db72be --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationResultTest.java @@ -0,0 +1,93 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; + +@ExtendWith(MockitoExtension.class) +class RotationResultTest { + + @Mock + private FileIdentity fileIdentity; + + @Test + void noRotationConstantHasCorrectType() { + assertThat(RotationResult.NO_ROTATION.getRotationType(), equalTo(RotationType.NO_ROTATION)); + } + + @Test + void noRotationConstantHasNullNewFileIdentity() { + assertThat(RotationResult.NO_ROTATION.getNewFileIdentity(), nullValue()); + } + + @Test + void deletedConstantHasCorrectType() { + assertThat(RotationResult.DELETED.getRotationType(), equalTo(RotationType.DELETED)); + } + + @Test + void deletedConstantHasNullNewFileIdentity() { + assertThat(RotationResult.DELETED.getNewFileIdentity(), nullValue()); + } + + @Test + void constructorSetsRotationType() { + final RotationResult result = new RotationResult(RotationType.CREATE_RENAME, fileIdentity); + + assertThat(result.getRotationType(), equalTo(RotationType.CREATE_RENAME)); + } + + @Test + void constructorSetsNewFileIdentity() { + final RotationResult result = new RotationResult(RotationType.COPYTRUNCATE, fileIdentity); + + assertThat(result.getNewFileIdentity(), equalTo(fileIdentity)); + } + + @Test + void constructorAllowsNullNewFileIdentity() { + final RotationResult result = new RotationResult(RotationType.NO_ROTATION, null); + + assertThat(result.getNewFileIdentity(), nullValue()); + } + + @Test + void toStringContainsRotationType() { + final RotationResult result = new RotationResult(RotationType.CREATE_RENAME, fileIdentity); + + assertThat(result.toString(), notNullValue()); + assertThat(result.toString(), containsString("CREATE_RENAME")); + } + + @Test + void toStringContainsNewIdentityWhenPresent() { + final RotationResult result = new RotationResult(RotationType.CREATE_RENAME, fileIdentity); + + assertThat(result.toString(), containsString("newIdentity=")); + } + + @Test + void toStringOmitsNewIdentityWhenNull() { + final RotationResult result = new RotationResult(RotationType.NO_ROTATION, null); + + final String str = result.toString(); + assertThat(str.contains("newIdentity="), equalTo(false)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationTypeTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationTypeTest.java new file mode 100644 index 0000000000..a1fd41785d --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/RotationTypeTest.java @@ -0,0 +1,52 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.Test; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.notNullValue; + +class RotationTypeTest { + + @Test + void noRotationEnumValueExists() { + assertThat(RotationType.valueOf("NO_ROTATION"), equalTo(RotationType.NO_ROTATION)); + } + + @Test + void createRenameEnumValueExists() { + assertThat(RotationType.valueOf("CREATE_RENAME"), equalTo(RotationType.CREATE_RENAME)); + } + + @Test + void copytruncateEnumValueExists() { + assertThat(RotationType.valueOf("COPYTRUNCATE"), equalTo(RotationType.COPYTRUNCATE)); + } + + @Test + void deletedEnumValueExists() { + assertThat(RotationType.valueOf("DELETED"), equalTo(RotationType.DELETED)); + } + + @Test + void valuesContainsFourEntries() { + assertThat(RotationType.values().length, equalTo(4)); + } + + @Test + void allValuesAreNotNull() { + for (final RotationType type : RotationType.values()) { + assertThat(type, notNullValue()); + } + } +} diff --git a/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/StartPositionTest.java b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/StartPositionTest.java new file mode 100644 index 0000000000..6358ee0750 --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/java/org/opensearch/dataprepper/plugins/source/file/StartPositionTest.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + */ + +package org.opensearch.dataprepper.plugins.source.file; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class StartPositionTest { + + @Test + void fromString_returns_beginning_for_beginning() { + assertThat(StartPosition.fromString("beginning"), equalTo(StartPosition.BEGINNING)); + } + + @Test + void fromString_returns_end_for_end() { + assertThat(StartPosition.fromString("end"), equalTo(StartPosition.END)); + } + + @Test + void fromString_is_case_insensitive() { + assertThat(StartPosition.fromString("BEGINNING"), equalTo(StartPosition.BEGINNING)); + assertThat(StartPosition.fromString("END"), equalTo(StartPosition.END)); + } + + @ParameterizedTest + @ValueSource(strings = {"invalid", "start", "middle", ""}) + void fromString_throws_for_invalid_value(final String value) { + assertThrows(IllegalArgumentException.class, () -> StartPosition.fromString(value)); + } + + @Test + void toString_returns_name() { + assertThat(StartPosition.BEGINNING.toString(), equalTo("beginning")); + assertThat(StartPosition.END.toString(), equalTo("end")); + } + + @Test + void fromString_with_null_throws_IllegalArgumentException() { + assertThrows(IllegalArgumentException.class, () -> StartPosition.fromString(null)); + } +} diff --git a/data-prepper-plugins/file-source/src/test/resources/test-file-source-invalid-json.tst b/data-prepper-plugins/file-source/src/test/resources/test-file-source-invalid-json.tst new file mode 100644 index 0000000000..5cdfe97552 --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/resources/test-file-source-invalid-json.tst @@ -0,0 +1,2 @@ +{"test_key: test_value"} +{"second_test_key": "second_test_value" \ No newline at end of file diff --git a/data-prepper-plugins/file-source/src/test/resources/test-file-source-json.tst b/data-prepper-plugins/file-source/src/test/resources/test-file-source-json.tst new file mode 100644 index 0000000000..0d0d8037ab --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/resources/test-file-source-json.tst @@ -0,0 +1,2 @@ +{"test_key": "test_value"} +{"second_test_key": "second_test_value"} \ No newline at end of file diff --git a/data-prepper-plugins/file-source/src/test/resources/test-file-source-plain.tst b/data-prepper-plugins/file-source/src/test/resources/test-file-source-plain.tst new file mode 100644 index 0000000000..226beb530d --- /dev/null +++ b/data-prepper-plugins/file-source/src/test/resources/test-file-source-plain.tst @@ -0,0 +1,2 @@ +THIS IS A PLAINTEXT LINE +THIS IS ANOTHER PLAINTEXT LINE \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 99988e8249..12fc4bdc29 100644 --- a/settings.gradle +++ b/settings.gradle @@ -191,6 +191,7 @@ include 'data-prepper-plugins:dissect-processor' include 'data-prepper-plugins:dynamodb-source' include 'data-prepper-plugins:decompress-processor' include 'data-prepper-plugins:split-event-processor' +include 'data-prepper-plugins:file-source' include 'data-prepper-plugins:flatten-processor' include 'data-prepper-plugins:mongodb' include 'data-prepper-plugins:rds-source'