Skip to content

Commit c4f5d9d

Browse files
Sungyoun-KimCodex (AI-generated)
andcommitted
Add detailed GeoIP database path validation
Signed-off-by: Peter Kim <peter@rtzr.ai> Co-authored-by: Codex (AI-generated) <codex@rtzr.ai>
1 parent 506a04f commit c4f5d9d

5 files changed

Lines changed: 267 additions & 11 deletions

File tree

data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/DatabaseSourceIdentification.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,21 @@ public static boolean isS3Uri(final String uriString) {
5050
* @return boolean
5151
*/
5252
public static boolean isURL(final String input) {
53+
if (input == null) {
54+
return false;
55+
}
5356
try {
5457
final URI uri = new URI(input);
5558
final URL url = new URL(input);
59+
final String host = uri.getHost();
60+
final String urlHost = url.getHost();
5661
return !input.endsWith(MANIFEST_ENDPOINT_PATH) &&
57-
!uri.getHost().contains("geoip.maps.opensearch") &&
58-
uri.getHost().equals("download.maxmind.com") &&
62+
host != null &&
63+
!host.contains("geoip.maps.opensearch") &&
64+
host.equals("download.maxmind.com") &&
5965
uri.getScheme() != null &&
60-
!Pattern.matches(S3_DOMAIN_PATTERN, url.getHost()) &&
66+
urlHost != null &&
67+
!Pattern.matches(S3_DOMAIN_PATTERN, urlHost) &&
6168
(uri.getScheme().equals("http") || uri.getScheme().equals("https"));
6269
} catch (URISyntaxException | MalformedURLException e) {
6370
return false;
@@ -70,6 +77,9 @@ public static boolean isURL(final String input) {
7077
* @return boolean
7178
*/
7279
public static boolean isFilePath(final String input) {
80+
if (input == null) {
81+
return false;
82+
}
7383
final File file = new File(input);
7484
return file.exists() && file.isFile();
7585
}
@@ -80,10 +90,13 @@ public static boolean isFilePath(final String input) {
8090
* @return boolean
8191
*/
8292
public static boolean isCDNEndpoint(final String input) {
93+
if (input == null) {
94+
return false;
95+
}
8396
if (input.endsWith(MANIFEST_ENDPOINT_PATH)) {
8497
try {
8598
final URI uri = new URI(input);
86-
return uri.getScheme().equals("http") || uri.getScheme().equals("https");
99+
return uri.getScheme() != null && (uri.getScheme().equals("http") || uri.getScheme().equals("https"));
87100
} catch (final URISyntaxException e) {
88101
return false;
89102
}

data-prepper-plugins/geoip-processor/src/main/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfig.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@
77

88
import com.fasterxml.jackson.annotation.JsonProperty;
99
import jakarta.validation.constraints.AssertTrue;
10-
import org.opensearch.dataprepper.plugins.geoip.extension.databasedownload.DBSourceOptions;
1110

12-
import java.util.ArrayList;
1311
import java.util.HashMap;
1412
import java.util.List;
1513
import java.util.Map;
1614

15+
@ValidDatabasePaths
1716
public class MaxMindDatabaseConfig {
1817
static final String DEFAULT_CITY_ENDPOINT = "https://geoip.maps.opensearch.org/v1/mmdb/geolite2-city/manifest.json";
1918
static final String DEFAULT_COUNTRY_ENDPOINT = "https://geoip.maps.opensearch.org/v1/mmdb/geolite2-country/manifest.json";
@@ -40,12 +39,12 @@ public boolean isDatabasesValid() {
4039
return enterpriseDatabase == null || (cityDatabase == null && countryDatabase == null && asnDatabase == null);
4140
}
4241

43-
@AssertTrue(message = "database_paths should be S3 URI or HTTP endpoint or local directory")
4442
public boolean isPathsValid() {
45-
final List<String> databasePaths = new ArrayList<>(getDatabasePaths().values());
43+
return getDatabasePathValidationErrors().isEmpty();
44+
}
4645

47-
final DBSourceOptions dbSourceOptions = DatabaseSourceIdentification.getDatabasePathType(databasePaths);
48-
return dbSourceOptions != null;
46+
List<String> getDatabasePathValidationErrors() {
47+
return ValidDatabasePathsValidator.getDatabasePathValidationErrors(getDatabasePaths());
4948
}
5049

5150
public Map<String, String> getDatabasePaths() {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.dataprepper.plugins.geoip.extension;
7+
8+
import jakarta.validation.Constraint;
9+
import jakarta.validation.Payload;
10+
11+
import java.lang.annotation.Documented;
12+
import java.lang.annotation.ElementType;
13+
import java.lang.annotation.Retention;
14+
import java.lang.annotation.RetentionPolicy;
15+
import java.lang.annotation.Target;
16+
17+
@Documented
18+
@Constraint(validatedBy = ValidDatabasePathsValidator.class)
19+
@Target({ElementType.TYPE})
20+
@Retention(RetentionPolicy.RUNTIME)
21+
public @interface ValidDatabasePaths {
22+
String message() default "database paths must be valid";
23+
24+
Class<?>[] groups() default {};
25+
26+
Class<? extends Payload>[] payload() default {};
27+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.dataprepper.plugins.geoip.extension;
7+
8+
import jakarta.validation.ConstraintValidator;
9+
import jakarta.validation.ConstraintValidatorContext;
10+
import org.opensearch.dataprepper.plugins.geoip.extension.databasedownload.DBSourceOptions;
11+
12+
import java.io.File;
13+
import java.net.URI;
14+
import java.net.URISyntaxException;
15+
import java.util.ArrayList;
16+
import java.util.LinkedHashSet;
17+
import java.util.List;
18+
import java.util.Map;
19+
import java.util.Set;
20+
21+
public class ValidDatabasePathsValidator implements ConstraintValidator<ValidDatabasePaths, MaxMindDatabaseConfig> {
22+
23+
@Override
24+
public boolean isValid(final MaxMindDatabaseConfig value, final ConstraintValidatorContext context) {
25+
if (value == null) {
26+
return true;
27+
}
28+
29+
final List<String> validationErrors = value.getDatabasePathValidationErrors();
30+
if (validationErrors.isEmpty()) {
31+
return true;
32+
}
33+
34+
context.disableDefaultConstraintViolation();
35+
validationErrors.forEach(validationError ->
36+
context.buildConstraintViolationWithTemplate(validationError)
37+
.addConstraintViolation());
38+
return false;
39+
}
40+
41+
static List<String> getDatabasePathValidationErrors(final Map<String, String> databasePaths) {
42+
final List<PathValidationResult> validationResults = new ArrayList<>();
43+
final List<String> validationErrors = new ArrayList<>();
44+
45+
databasePaths.forEach((databaseName, databasePath) -> {
46+
final DBSourceOptions sourceType = getDatabasePathSourceType(databasePath);
47+
if (sourceType == null) {
48+
validationErrors.add(getInvalidDatabasePathMessage(databaseName, databasePath));
49+
}
50+
validationResults.add(new PathValidationResult(databasePath, sourceType));
51+
});
52+
53+
if (validationErrors.isEmpty()) {
54+
validationErrors.addAll(getMixedSourceTypeMessages(validationResults));
55+
}
56+
57+
return validationErrors;
58+
}
59+
60+
private static DBSourceOptions getDatabasePathSourceType(final String databasePath) {
61+
if (databasePath == null || databasePath.trim().isEmpty()) {
62+
return null;
63+
}
64+
if (DatabaseSourceIdentification.isFilePath(databasePath)) {
65+
return DBSourceOptions.PATH;
66+
}
67+
if (DatabaseSourceIdentification.isCDNEndpoint(databasePath)) {
68+
return DBSourceOptions.HTTP_MANIFEST;
69+
}
70+
if (DatabaseSourceIdentification.isURL(databasePath)) {
71+
return DBSourceOptions.URL;
72+
}
73+
if (DatabaseSourceIdentification.isS3Uri(databasePath)) {
74+
return DBSourceOptions.S3;
75+
}
76+
return null;
77+
}
78+
79+
private static List<String> getMixedSourceTypeMessages(final List<PathValidationResult> validationResults) {
80+
final Set<DBSourceOptions> sourceTypes = new LinkedHashSet<>();
81+
validationResults.forEach(validationResult -> sourceTypes.add(validationResult.sourceType));
82+
83+
if (sourceTypes.size() <= 1) {
84+
return List.of();
85+
}
86+
87+
final List<String> validationErrors = new ArrayList<>();
88+
validationResults.forEach(validationResult ->
89+
validationErrors.add("Mixed database path source types are not supported. Found "
90+
+ getSourceTypeDescription(validationResult.sourceType) + ": " + validationResult.databasePath));
91+
return validationErrors;
92+
}
93+
94+
private static String getInvalidDatabasePathMessage(final String databaseName, final String databasePath) {
95+
if (databasePath == null) {
96+
return "Database path must not be null: " + databaseName;
97+
}
98+
if (databasePath.trim().isEmpty()) {
99+
return "Database path must not be blank: " + databaseName;
100+
}
101+
102+
final File databaseFile = new File(databasePath);
103+
if (databaseFile.exists()) {
104+
if (databaseFile.isDirectory()) {
105+
return "Directory provided, but a file is required: " + databasePath;
106+
}
107+
return "Path is not a regular file: " + databasePath;
108+
}
109+
110+
final String uriScheme = getUriScheme(databasePath);
111+
if (uriScheme == null) {
112+
return "Path does not exist: " + databasePath;
113+
}
114+
if (uriScheme.equalsIgnoreCase("http") || uriScheme.equalsIgnoreCase("https")) {
115+
return "HTTP endpoint must be a MaxMind download URL or manifest endpoint: " + databasePath;
116+
}
117+
return "Unsupported URI scheme for database path: " + databasePath;
118+
}
119+
120+
private static String getUriScheme(final String databasePath) {
121+
try {
122+
return new URI(databasePath).getScheme();
123+
} catch (final URISyntaxException e) {
124+
return null;
125+
}
126+
}
127+
128+
private static String getSourceTypeDescription(final DBSourceOptions sourceType) {
129+
switch (sourceType) {
130+
case PATH:
131+
return "local file path";
132+
case HTTP_MANIFEST:
133+
return "HTTP manifest endpoint";
134+
case URL:
135+
return "MaxMind download URL";
136+
case S3:
137+
return "S3 URI";
138+
default:
139+
throw new IllegalArgumentException("Unsupported database path source type: " + sourceType);
140+
}
141+
}
142+
143+
private static class PathValidationResult {
144+
private final String databasePath;
145+
private final DBSourceOptions sourceType;
146+
147+
private PathValidationResult(final String databasePath, final DBSourceOptions sourceType) {
148+
this.databasePath = databasePath;
149+
this.sourceType = sourceType;
150+
}
151+
}
152+
}

data-prepper-plugins/geoip-processor/src/test/java/org/opensearch/dataprepper/plugins/geoip/extension/MaxMindDatabaseConfigTest.java

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,24 @@
55

66
package org.opensearch.dataprepper.plugins.geoip.extension;
77

8+
import jakarta.validation.ConstraintViolation;
9+
import jakarta.validation.Validation;
10+
import jakarta.validation.Validator;
811
import org.junit.jupiter.api.BeforeEach;
912
import org.junit.jupiter.api.Test;
13+
import org.junit.jupiter.api.io.TempDir;
1014
import org.junit.jupiter.params.ParameterizedTest;
1115
import org.junit.jupiter.params.provider.CsvSource;
1216
import org.opensearch.dataprepper.test.helper.ReflectivelySetField;
1317

18+
import java.io.IOException;
19+
import java.nio.file.Files;
20+
import java.nio.file.Path;
21+
import java.util.Set;
22+
import java.util.stream.Collectors;
23+
1424
import static org.hamcrest.MatcherAssert.assertThat;
25+
import static org.hamcrest.Matchers.containsInAnyOrder;
1526
import static org.hamcrest.Matchers.equalTo;
1627
import static org.opensearch.dataprepper.plugins.geoip.extension.MaxMindDatabaseConfig.DEFAULT_ASN_ENDPOINT;
1728
import static org.opensearch.dataprepper.plugins.geoip.extension.MaxMindDatabaseConfig.DEFAULT_CITY_ENDPOINT;
@@ -23,9 +34,12 @@
2334

2435
class MaxMindDatabaseConfigTest {
2536
private MaxMindDatabaseConfig maxMindDatabaseConfig;
37+
private Validator validator;
38+
2639
@BeforeEach
2740
void setup() {
2841
maxMindDatabaseConfig = new MaxMindDatabaseConfig();
42+
validator = Validation.buildDefaultValidatorFactory().getValidator();
2943
}
3044

3145
@Test
@@ -94,4 +108,55 @@ void test_isPathsValid(final String path, final boolean result) throws NoSuchFie
94108
assertThat(maxMindDatabaseConfig.isPathsValid(), equalTo(result));
95109
}
96110

97-
}
111+
@Test
112+
void validate_should_include_path_specific_message_when_path_does_not_exist(@TempDir final Path tempDirectory)
113+
throws NoSuchFieldException, IllegalAccessException {
114+
final Path missingDatabase = tempDirectory.resolve("geoip.mmdb");
115+
ReflectivelySetField.setField(
116+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", missingDatabase.toString());
117+
118+
assertThat(getValidationMessages(), containsInAnyOrder("Path does not exist: " + missingDatabase));
119+
}
120+
121+
@Test
122+
void validate_should_include_path_specific_message_when_directory_is_configured(@TempDir final Path tempDirectory)
123+
throws NoSuchFieldException, IllegalAccessException {
124+
ReflectivelySetField.setField(
125+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", tempDirectory.toString());
126+
127+
assertThat(getValidationMessages(),
128+
containsInAnyOrder("Directory provided, but a file is required: " + tempDirectory));
129+
}
130+
131+
@Test
132+
void validate_should_include_path_specific_message_when_http_endpoint_is_not_supported()
133+
throws NoSuchFieldException, IllegalAccessException {
134+
ReflectivelySetField.setField(
135+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", "https://example.com/");
136+
137+
assertThat(getValidationMessages(), containsInAnyOrder(
138+
"HTTP endpoint must be a MaxMind download URL or manifest endpoint: https://example.com/"));
139+
}
140+
141+
@Test
142+
void validate_should_include_path_specific_messages_when_source_types_are_mixed(@TempDir final Path tempDirectory)
143+
throws NoSuchFieldException, IllegalAccessException, IOException {
144+
final Path cityDatabase = Files.createFile(tempDirectory.resolve("GeoLite2-City.mmdb"));
145+
final String s3DatabasePath = "s3://geoip/GeoLite2-ASN.mmdb";
146+
ReflectivelySetField.setField(
147+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "cityDatabase", cityDatabase.toString());
148+
ReflectivelySetField.setField(
149+
MaxMindDatabaseConfig.class, maxMindDatabaseConfig, "asnDatabase", s3DatabasePath);
150+
151+
assertThat(getValidationMessages(), containsInAnyOrder(
152+
"Mixed database path source types are not supported. Found local file path: " + cityDatabase,
153+
"Mixed database path source types are not supported. Found S3 URI: " + s3DatabasePath));
154+
}
155+
156+
private Set<String> getValidationMessages() {
157+
return validator.validate(maxMindDatabaseConfig).stream()
158+
.map(ConstraintViolation::getMessage)
159+
.collect(Collectors.toSet());
160+
}
161+
162+
}

0 commit comments

Comments
 (0)