Skip to content

Commit f8efcd9

Browse files
Sungyoun-KimCodex (AI-generated)
andcommitted
Add detailed GeoIP database path validation
Signed-off-by: Peter Kim <peter@rtzr.ai> Co-authored-by: Codex (AI-generated) <codex@rtzr.ai>
1 parent 506a04f commit f8efcd9

5 files changed

Lines changed: 272 additions & 11 deletions

File tree

data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/DatabaseSourceIdentification.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,21 @@ public static boolean isS3Uri(final String uriString) {
5050
* @return boolean
5151
*/
5252
public static boolean isURL(final String input) {
53+
if (input == null) {
54+
return false;
55+
}
5356
try {
5457
final URI uri = new URI(input);
5558
final URL url = new URL(input);
59+
final String host = uri.getHost();
60+
final String urlHost = url.getHost();
5661
return !input.endsWith(MANIFEST_ENDPOINT_PATH) &&
57-
!uri.getHost().contains("geoip.maps.opensearch") &&
58-
uri.getHost().equals("download.maxmind.com") &&
62+
host != null &&
63+
!host.contains("geoip.maps.opensearch") &&
64+
host.equals("download.maxmind.com") &&
5965
uri.getScheme() != null &&
60-
!Pattern.matches(S3_DOMAIN_PATTERN, url.getHost()) &&
66+
urlHost != null &&
67+
!Pattern.matches(S3_DOMAIN_PATTERN, urlHost) &&
6168
(uri.getScheme().equals("http") || uri.getScheme().equals("https"));
6269
} catch (URISyntaxException | MalformedURLException e) {
6370
return false;
@@ -70,6 +77,9 @@ public static boolean isURL(final String input) {
7077
* @return boolean
7178
*/
7279
public static boolean isFilePath(final String input) {
80+
if (input == null) {
81+
return false;
82+
}
7383
final File file = new File(input);
7484
return file.exists() && file.isFile();
7585
}
@@ -80,10 +90,13 @@ public static boolean isFilePath(final String input) {
8090
* @return boolean
8191
*/
8292
public static boolean isCDNEndpoint(final String input) {
93+
if (input == null) {
94+
return false;
95+
}
8396
if (input.endsWith(MANIFEST_ENDPOINT_PATH)) {
8497
try {
8598
final URI uri = new URI(input);
86-
return uri.getScheme().equals("http") || uri.getScheme().equals("https");
99+
return uri.getScheme() != null && (uri.getScheme().equals("http") || uri.getScheme().equals("https"));
87100
} catch (final URISyntaxException e) {
88101
return false;
89102
}

data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfig.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77

88
import com.fasterxml.jackson.annotation.JsonProperty;
99
import jakarta.validation.constraints.AssertTrue;
10-
import org.opensearch.dataprepper.plugins.geoip.extension.databasedownload.DBSourceOptions;
1110

12-
import java.util.ArrayList;
1311
import java.util.HashMap;
1412
import java.util.List;
1513
import java.util.Map;
1614

15+
@ValidDatabasePaths
1716
public class MaxMindDatabaseConfig {
1817
static final String DEFAULT_CITY_ENDPOINT = "https://geoip.maps.opensearch.org/v1/mmdb/geolite2-city/manifest.json";
1918
static final String DEFAULT_COUNTRY_ENDPOINT = "https://geoip.maps.opensearch.org/v1/mmdb/geolite2-country/manifest.json";
@@ -40,12 +39,12 @@ public boolean isDatabasesValid() {
4039
return enterpriseDatabase == null || (cityDatabase == null && countryDatabase == null && asnDatabase == null);
4140
}
4241

43-
@AssertTrue(message = "database_paths should be S3 URI or HTTP endpoint or local directory")
4442
public boolean isPathsValid() {
45-
final List<String> databasePaths = new ArrayList<>(getDatabasePaths().values());
43+
return getDatabasePathValidationErrors().isEmpty();
44+
}
4645

47-
final DBSourceOptions dbSourceOptions = DatabaseSourceIdentification.getDatabasePathType(databasePaths);
48-
return dbSourceOptions != null;
46+
List<String> getDatabasePathValidationErrors() {
47+
return ValidDatabasePathsValidator.getDatabasePathValidationErrors(getDatabasePaths());
4948
}
5049

5150
public Map<String, String> getDatabasePaths() {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.dataprepper.plugins.geoip.extension;
7+
8+
import jakarta.validation.Constraint;
9+
import jakarta.validation.Payload;
10+
11+
import java.lang.annotation.Documented;
12+
import java.lang.annotation.ElementType;
13+
import java.lang.annotation.Retention;
14+
import java.lang.annotation.RetentionPolicy;
15+
import java.lang.annotation.Target;
16+
17+
@Documented
18+
@Constraint(validatedBy = ValidDatabasePathsValidator.class)
19+
@Target({ElementType.TYPE})
20+
@Retention(RetentionPolicy.RUNTIME)
21+
public @interface ValidDatabasePaths {
22+
String message() default "database paths must be valid";
23+
24+
Class<?>[] groups() default {};
25+
26+
Class<? extends Payload>[] payload() default {};
27+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.dataprepper.plugins.geoip.extension;
7+
8+
import jakarta.validation.ConstraintValidator;
9+
import jakarta.validation.ConstraintValidatorContext;
10+
import org.opensearch.dataprepper.plugins.geoip.extension.databasedownload.DBSourceOptions;
11+
12+
import java.io.File;
13+
import java.net.URI;
14+
import java.net.URISyntaxException;
15+
import java.util.ArrayList;
16+
import java.util.LinkedHashSet;
17+
import java.util.List;
18+
import java.util.Map;
19+
import java.util.Set;
20+
21+
public class ValidDatabasePathsValidator implements ConstraintValidator<ValidDatabasePaths, MaxMindDatabaseConfig> {
22+
23+
@Override
24+
public boolean isValid(final MaxMindDatabaseConfig value, final ConstraintValidatorContext context) {
25+
if (value == null) {
26+
return true;
27+
}
28+
29+
final List<String> validationErrors = value.getDatabasePathValidationErrors();
30+
if (validationErrors.isEmpty()) {
31+
return true;
32+
}
33+
34+
context.disableDefaultConstraintViolation();
35+
validationErrors.forEach(validationError ->
36+
context.buildConstraintViolationWithTemplate(validationError)
37+
.addConstraintViolation());
38+
return false;
39+
}
40+
41+
static List<String> getDatabasePathValidationErrors(final Map<String, String> databasePaths) {
42+
final List<PathValidationResult> validationResults = new ArrayList<>();
43+
final List<String> validationErrors = new ArrayList<>();
44+
45+
databasePaths.forEach((databaseName, databasePath) -> {
46+
final DBSourceOptions sourceType = getDatabasePathSourceType(databasePath);
47+
if (sourceType == null) {
48+
validationErrors.add(getInvalidDatabasePathMessage(databaseName, databasePath));
49+
}
50+
validationResults.add(new PathValidationResult(databasePath, sourceType));
51+
});
52+
53+
if (validationErrors.isEmpty()) {
54+
validationErrors.addAll(getMixedSourceTypeMessages(validationResults));
55+
}
56+
57+
return validationErrors;
58+
}
59+
60+
private static DBSourceOptions getDatabasePathSourceType(final String databasePath) {
61+
if (databasePath == null || databasePath.trim().isEmpty()) {
62+
return null;
63+
}
64+
if (DatabaseSourceIdentification.isFilePath(databasePath)) {
65+
return DBSourceOptions.PATH;
66+
}
67+
if (DatabaseSourceIdentification.isCDNEndpoint(databasePath)) {
68+
return DBSourceOptions.HTTP_MANIFEST;
69+
}
70+
if (DatabaseSourceIdentification.isURL(databasePath)) {
71+
return DBSourceOptions.URL;
72+
}
73+
if (DatabaseSourceIdentification.isS3Uri(databasePath)) {
74+
return DBSourceOptions.S3;
75+
}
76+
return null;
77+
}
78+
79+
private static List<String> getMixedSourceTypeMessages(final List<PathValidationResult> validationResults) {
80+
final Set<DBSourceOptions> sourceTypes = new LinkedHashSet<>();
81+
validationResults.forEach(validationResult -> sourceTypes.add(validationResult.sourceType));
82+
83+
if (sourceTypes.size() <= 1) {
84+
return List.of();
85+
}
86+
87+
final List<String> validationErrors = new ArrayList<>();
88+
validationResults.forEach(validationResult ->
89+
validationErrors.add("Mixed database path source types are not supported. Found "
90+
+ getSourceTypeDescription(validationResult.sourceType) + ": " + validationResult.databasePath));
91+
return validationErrors;
92+
}
93+
94+
private static String getInvalidDatabasePathMessage(final String databaseName, final String databasePath) {
95+
if (databasePath == null) {
96+
return "Database path must not be null: " + databaseName;
97+
}
98+
if (databasePath.trim().isEmpty()) {
99+
return "Database path must not be blank: " + databaseName;
100+
}
101+
102+
final File databaseFile = new File(databasePath);
103+
if (databaseFile.exists()) {
104+
if (databaseFile.isDirectory()) {
105+
return "Directory provided, but a file is required: " + databasePath;
106+
}
107+
return "Path is not a regular file: " + databasePath;
108+
}
109+
110+
final String uriScheme = getUriScheme(databasePath);
111+
if (uriScheme == null) {
112+
return "Path does not exist: " + databasePath;
113+
}
114+
if (uriScheme.equalsIgnoreCase("http") || uriScheme.equalsIgnoreCase("https")) {
115+
return "HTTP endpoint must be a MaxMind download URL or manifest endpoint: " + databasePath;
116+
}
117+
return "Unsupported URI scheme for database path: " + databasePath;
118+
}
119+
120+
private static String getUriScheme(final String databasePath) {
121+
try {
122+
return new URI(databasePath).getScheme();
123+
} catch (final URISyntaxException e) {
124+
return null;
125+
}
126+
}
127+
128+
private static String getSourceTypeDescription(final DBSourceOptions sourceType) {
129+
switch (sourceType) {
130+
case PATH:
131+
return "local file path";
132+
case HTTP_MANIFEST:
133+
return "HTTP manifest endpoint";
134+
case URL:
135+
return "MaxMind download URL";
136+
case S3:
137+
return "S3 URI";
138+
default:
139+
throw new IllegalArgumentException("Unsupported database path source type: " + sourceType);
140+
}
141+
}
142+
143+
private static class PathValidationResult {
144+
private final String databasePath;
145+
private final DBSourceOptions sourceType;
146+
147+
private PathValidationResult(final String databasePath, final DBSourceOptions sourceType) {
148+
this.databasePath = databasePath;
149+
this.sourceType = sourceType;
150+
}
151+
}
152+
}

data-prepper-plugins/geoip-processor/src/test/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfigTest.java

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,25 @@
55

66
package org.opensearch.dataprepper.plugins.geoip.extension;
77

8+
import jakarta.validation.ConstraintViolation;
9+
import jakarta.validation.Validation;
10+
import jakarta.validation.Validator;
11+
import org.hibernate.validator.messageinterpolation.ParameterMessageInterpolator;
812
import org.junit.jupiter.api.BeforeEach;
913
import org.junit.jupiter.api.Test;
14+
import org.junit.jupiter.api.io.TempDir;
1015
import org.junit.jupiter.params.ParameterizedTest;
1116
import org.junit.jupiter.params.provider.CsvSource;
1217
import org.opensearch.dataprepper.test.helper.ReflectivelySetField;
1318

19+
import java.io.IOException;
20+
import java.nio.file.Files;
21+
import java.nio.file.Path;
22+
import java.util.Set;
23+
import java.util.stream.Collectors;
24+
1425
import static org.hamcrest.MatcherAssert.assertThat;
26+
import static org.hamcrest.Matchers.containsInAnyOrder;
1527
import static org.hamcrest.Matchers.equalTo;
1628
import static org.opensearch.dataprepper.plugins.geoip.extension.MaxMindDatabaseConfig.DEFAULT_ASN_ENDPOINT;
1729
import static org.opensearch.dataprepper.plugins.geoip.extension.MaxMindDatabaseConfig.DEFAULT_CITY_ENDPOINT;
@@ -23,9 +35,16 @@
2335

2436
class MaxMindDatabaseConfigTest {
2537
private MaxMindDatabaseConfig maxMindDatabaseConfig;
38+
private Validator validator;
39+
2640
@BeforeEach
2741
void setup() {
2842
maxMindDatabaseConfig = new MaxMindDatabaseConfig();
43+
validator = Validation.byDefaultProvider()
44+
.configure()
45+
.messageInterpolator(new ParameterMessageInterpolator())
46+
.buildValidatorFactory()
47+
.getValidator();
2948
}
3049

3150
@Test
@@ -94,4 +113,55 @@ void test_isPathsValid(final String path, final boolean result) throws NoSuchFie
94113
assertThat(maxMindDatabaseConfig.isPathsValid(), equalTo(result));
95114
}
96115

97-
}
116+
@Test
117+
void validate_should_include_path_specific_message_when_path_does_not_exist(@TempDir final Path tempDirectory)
118+
throws NoSuchFieldException, IllegalAccessException {
119+
final Path missingDatabase = tempDirectory.resolve("geoip.mmdb");
120+
ReflectivelySetField.setField(
121+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", missingDatabase.toString());
122+
123+
assertThat(getValidationMessages(), containsInAnyOrder("Path does not exist: " + missingDatabase));
124+
}
125+
126+
@Test
127+
void validate_should_include_path_specific_message_when_directory_is_configured(@TempDir final Path tempDirectory)
128+
throws NoSuchFieldException, IllegalAccessException {
129+
ReflectivelySetField.setField(
130+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", tempDirectory.toString());
131+
132+
assertThat(getValidationMessages(),
133+
containsInAnyOrder("Directory provided, but a file is required: " + tempDirectory));
134+
}
135+
136+
@Test
137+
void validate_should_include_path_specific_message_when_http_endpoint_is_not_supported()
138+
throws NoSuchFieldException, IllegalAccessException {
139+
ReflectivelySetField.setField(
140+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", "https://example.com/");
141+
142+
assertThat(getValidationMessages(), containsInAnyOrder(
143+
"HTTP endpoint must be a MaxMind download URL or manifest endpoint: https://example.com/"));
144+
}
145+
146+
@Test
147+
void validate_should_include_path_specific_messages_when_source_types_are_mixed(@TempDir final Path tempDirectory)
148+
throws NoSuchFieldException, IllegalAccessException, IOException {
149+
final Path cityDatabase = Files.createFile(tempDirectory.resolve("GeoLite2-City.mmdb"));
150+
final String s3DatabasePath = "s3://geoip/GeoLite2-ASN.mmdb";
151+
ReflectivelySetField.setField(
152+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "cityDatabase", cityDatabase.toString());
153+
ReflectivelySetField.setField(
154+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", s3DatabasePath);
155+
156+
assertThat(getValidationMessages(), containsInAnyOrder(
157+
"Mixed database path source types are not supported. Found local file path: " + cityDatabase,
158+
"Mixed database path source types are not supported. Found S3 URI: " + s3DatabasePath));
159+
}
160+
161+
private Set<String> getValidationMessages() {
162+
return validator.validate(maxMindDatabaseConfig).stream()
163+
.map(ConstraintViolation::getMessage)
164+
.collect(Collectors.toSet());
165+
}
166+
167+
}

0 commit comments

Comments
 (0)