diff --git a/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/DatabaseSourceIdentification.java b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/DatabaseSourceIdentification.java index afca3ed5d4..f3bb720645 100644 --- a/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/DatabaseSourceIdentification.java +++ b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/DatabaseSourceIdentification.java @@ -50,14 +50,21 @@ public static boolean isS3Uri(final String uriString) { * @return boolean */ public static boolean isURL(final String input) { + if (input == null) { + return false; + } try { final URI uri = new URI(input); final URL url = new URL(input); + final String host = uri.getHost(); + final String urlHost = url.getHost(); return !input.endsWith(MANIFEST_ENDPOINT_PATH) && - !uri.getHost().contains("geoip.maps.opensearch") && - uri.getHost().equals("download.maxmind.com") && + host != null && + !host.contains("geoip.maps.opensearch") && + host.equals("download.maxmind.com") && uri.getScheme() != null && - !Pattern.matches(S3_DOMAIN_PATTERN, url.getHost()) && + urlHost != null && + !Pattern.matches(S3_DOMAIN_PATTERN, urlHost) && (uri.getScheme().equals("http") || uri.getScheme().equals("https")); } catch (URISyntaxException | MalformedURLException e) { return false; @@ -70,6 +77,9 @@ public static boolean isURL(final String input) { * @return boolean */ public static boolean isFilePath(final String input) { + if (input == null) { + return false; + } final File file = new File(input); return file.exists() && file.isFile(); } @@ -80,10 +90,13 @@ public static boolean isFilePath(final String input) { * @return boolean */ public static boolean isCDNEndpoint(final String input) { + if (input == null) { + return false; + } if (input.endsWith(MANIFEST_ENDPOINT_PATH)) { try { final URI uri = new URI(input); - return uri.getScheme().equals("http") || uri.getScheme().equals("https"); + return uri.getScheme() != null && (uri.getScheme().equals("http") || uri.getScheme().equals("https")); } catch (final URISyntaxException e) { return false; } diff --git a/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfig.java b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfig.java index 5eb7ebf0af..b83c996f63 100644 --- a/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfig.java +++ b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfig.java @@ -7,13 +7,12 @@ import com.fasterxml.jackson.annotation.JsonProperty; import jakarta.validation.constraints.AssertTrue; -import org.opensearch.dataprepper.plugins.geoip.extension.databasedownload.DBSourceOptions; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +@ValidDatabasePaths public class MaxMindDatabaseConfig { static final String DEFAULT_CITY_ENDPOINT = "https://geoip.maps.opensearch.org/v1/mmdb/geolite2-city/manifest.json"; static final String DEFAULT_COUNTRY_ENDPOINT = "https://geoip.maps.opensearch.org/v1/mmdb/geolite2-country/manifest.json"; @@ -40,12 +39,12 @@ public boolean isDatabasesValid() { return enterpriseDatabase == null || (cityDatabase == null && countryDatabase == null && asnDatabase == null); } - @AssertTrue(message = "database_paths should be S3 URI or HTTP endpoint or local directory") public boolean isPathsValid() { - final List databasePaths = new ArrayList<>(getDatabasePaths().values()); + return getDatabasePathValidationErrors().isEmpty(); + } - final DBSourceOptions dbSourceOptions = DatabaseSourceIdentification.getDatabasePathType(databasePaths); - return dbSourceOptions != null; + List getDatabasePathValidationErrors() { + return ValidDatabasePathsValidator.getDatabasePathValidationErrors(getDatabasePaths()); } public Map getDatabasePaths() { diff --git a/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/ValidDatabasePaths.java b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/ValidDatabasePaths.java new file mode 100644 index 0000000000..2ca196476b --- /dev/null +++ b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/ValidDatabasePaths.java @@ -0,0 +1,27 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.dataprepper.plugins.geoip.extension; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.Documented; +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Documented +@Constraint(validatedBy = ValidDatabasePathsValidator.class) +@Target({ElementType.TYPE}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidDatabasePaths { + String message() default "database paths must be valid"; + + Class[] groups() default {}; + + Class[] payload() default {}; +} diff --git a/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/ValidDatabasePathsValidator.java b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/ValidDatabasePathsValidator.java new file mode 100644 index 0000000000..db900e1051 --- /dev/null +++ b/data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/ValidDatabasePathsValidator.java @@ -0,0 +1,152 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.dataprepper.plugins.geoip.extension; + +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; +import org.opensearch.dataprepper.plugins.geoip.extension.databasedownload.DBSourceOptions; + +import java.io.File; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class ValidDatabasePathsValidator implements ConstraintValidator { + + @Override + public boolean isValid(final MaxMindDatabaseConfig value, final ConstraintValidatorContext context) { + if (value == null) { + return true; + } + + final List validationErrors = value.getDatabasePathValidationErrors(); + if (validationErrors.isEmpty()) { + return true; + } + + context.disableDefaultConstraintViolation(); + validationErrors.forEach(validationError -> + context.buildConstraintViolationWithTemplate(validationError) + .addConstraintViolation()); + return false; + } + + static List getDatabasePathValidationErrors(final Map databasePaths) { + final List validationResults = new ArrayList<>(); + final List validationErrors = new ArrayList<>(); + + databasePaths.forEach((databaseName, databasePath) -> { + final DBSourceOptions sourceType = getDatabasePathSourceType(databasePath); + if (sourceType == null) { + validationErrors.add(getInvalidDatabasePathMessage(databaseName, databasePath)); + } + validationResults.add(new PathValidationResult(databasePath, sourceType)); + }); + + if (validationErrors.isEmpty()) { + validationErrors.addAll(getMixedSourceTypeMessages(validationResults)); + } + + return validationErrors; + } + + private static DBSourceOptions getDatabasePathSourceType(final String databasePath) { + if (databasePath == null || databasePath.trim().isEmpty()) { + return null; + } + if (DatabaseSourceIdentification.isFilePath(databasePath)) { + return DBSourceOptions.PATH; + } + if (DatabaseSourceIdentification.isCDNEndpoint(databasePath)) { + return DBSourceOptions.HTTP_MANIFEST; + } + if (DatabaseSourceIdentification.isURL(databasePath)) { + return DBSourceOptions.URL; + } + if (DatabaseSourceIdentification.isS3Uri(databasePath)) { + return DBSourceOptions.S3; + } + return null; + } + + private static List getMixedSourceTypeMessages(final List validationResults) { + final Set sourceTypes = new LinkedHashSet<>(); + validationResults.forEach(validationResult -> sourceTypes.add(validationResult.sourceType)); + + if (sourceTypes.size() <= 1) { + return List.of(); + } + + final List validationErrors = new ArrayList<>(); + validationResults.forEach(validationResult -> + validationErrors.add("Mixed database path source types are not supported. Found " + + getSourceTypeDescription(validationResult.sourceType) + ": " + validationResult.databasePath)); + return validationErrors; + } + + private static String getInvalidDatabasePathMessage(final String databaseName, final String databasePath) { + if (databasePath == null) { + return "Database path must not be null: " + databaseName; + } + if (databasePath.trim().isEmpty()) { + return "Database path must not be blank: " + databaseName; + } + + final File databaseFile = new File(databasePath); + if (databaseFile.exists()) { + if (databaseFile.isDirectory()) { + return "Directory provided, but a file is required: " + databasePath; + } + return "Path is not a regular file: " + databasePath; + } + + final String uriScheme = getUriScheme(databasePath); + if (uriScheme == null) { + return "Path does not exist: " + databasePath; + } + if (uriScheme.equalsIgnoreCase("http") || uriScheme.equalsIgnoreCase("https")) { + return "HTTP endpoint must be a MaxMind download URL or manifest endpoint: " + databasePath; + } + return "Unsupported URI scheme for database path: " + databasePath; + } + + private static String getUriScheme(final String databasePath) { + try { + return new URI(databasePath).getScheme(); + } catch (final URISyntaxException e) { + return null; + } + } + + private static String getSourceTypeDescription(final DBSourceOptions sourceType) { + switch (sourceType) { + case PATH: + return "local file path"; + case HTTP_MANIFEST: + return "HTTP manifest endpoint"; + case URL: + return "MaxMind download URL"; + case S3: + return "S3 URI"; + default: + throw new IllegalArgumentException("Unsupported database path source type: " + sourceType); + } + } + + private static class PathValidationResult { + private final String databasePath; + private final DBSourceOptions sourceType; + + private PathValidationResult(final String databasePath, final DBSourceOptions sourceType) { + this.databasePath = databasePath; + this.sourceType = sourceType; + } + } +} diff --git a/data-prepper-plugins/geoip-processor/src/test/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfigTest.java b/data-prepper-plugins/geoip-processor/src/test/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfigTest.java index 72ce2f2474..0e730549d5 100644 --- a/data-prepper-plugins/geoip-processor/src/test/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfigTest.java +++ b/data-prepper-plugins/geoip-processor/src/test/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfigTest.java @@ -5,13 +5,25 @@ package org.opensearch.dataprepper.plugins.geoip.extension; +import jakarta.validation.ConstraintViolation; +import jakarta.validation.Validation; +import jakarta.validation.Validator; +import org.hibernate.validator.messageinterpolation.ParameterMessageInterpolator; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import org.opensearch.dataprepper.test.helper.ReflectivelySetField; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Set; +import java.util.stream.Collectors; + import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.opensearch.dataprepper.plugins.geoip.extension.MaxMindDatabaseConfig.DEFAULT_ASN_ENDPOINT; import static org.opensearch.dataprepper.plugins.geoip.extension.MaxMindDatabaseConfig.DEFAULT_CITY_ENDPOINT; @@ -23,9 +35,16 @@ class MaxMindDatabaseConfigTest { private MaxMindDatabaseConfig maxMindDatabaseConfig; + private Validator validator; + @BeforeEach void setup() { maxMindDatabaseConfig = new MaxMindDatabaseConfig(); + validator = Validation.byDefaultProvider() + .configure() + .messageInterpolator(new ParameterMessageInterpolator()) + .buildValidatorFactory() + .getValidator(); } @Test @@ -94,4 +113,55 @@ void test_isPathsValid(final String path, final boolean result) throws NoSuchFie assertThat(maxMindDatabaseConfig.isPathsValid(), equalTo(result)); } -} \ No newline at end of file + @Test + void validate_should_include_path_specific_message_when_path_does_not_exist(@TempDir final Path tempDirectory) + throws NoSuchFieldException, IllegalAccessException { + final Path missingDatabase = tempDirectory.resolve("geoip.mmdb"); + ReflectivelySetField.setField( + MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", missingDatabase.toString()); + + assertThat(getValidationMessages(), containsInAnyOrder("Path does not exist: " + missingDatabase)); + } + + @Test + void validate_should_include_path_specific_message_when_directory_is_configured(@TempDir final Path tempDirectory) + throws NoSuchFieldException, IllegalAccessException { + ReflectivelySetField.setField( + MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", tempDirectory.toString()); + + assertThat(getValidationMessages(), + containsInAnyOrder("Directory provided, but a file is required: " + tempDirectory)); + } + + @Test + void validate_should_include_path_specific_message_when_http_endpoint_is_not_supported() + throws NoSuchFieldException, IllegalAccessException { + ReflectivelySetField.setField( + MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", "https://example.com/"); + + assertThat(getValidationMessages(), containsInAnyOrder( + "HTTP endpoint must be a MaxMind download URL or manifest endpoint: https://example.com/")); + } + + @Test + void validate_should_include_path_specific_messages_when_source_types_are_mixed(@TempDir final Path tempDirectory) + throws NoSuchFieldException, IllegalAccessException, IOException { + final Path cityDatabase = Files.createFile(tempDirectory.resolve("GeoLite2-City.mmdb")); + final String s3DatabasePath = "s3://geoip/GeoLite2-ASN.mmdb"; + ReflectivelySetField.setField( + MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "cityDatabase", cityDatabase.toString()); + ReflectivelySetField.setField( + MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", s3DatabasePath); + + assertThat(getValidationMessages(), containsInAnyOrder( + "Mixed database path source types are not supported. Found local file path: " + cityDatabase, + "Mixed database path source types are not supported. Found S3 URI: " + s3DatabasePath)); + } + + private Set getValidationMessages() { + return validator.validate(maxMindDatabaseConfig).stream() + .map(ConstraintViolation::getMessage) + .collect(Collectors.toSet()); + } + +}