Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ public void executePartition(SaasWorkerProgressState state,
.withData(t)
.build())
.map(Record::new)
.peek(record -> record.getData().getMetadata().setAttribute(SPACE, space.toLowerCase()))
.collect(Collectors.toList());

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
import java.util.Queue;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.opensearch.dataprepper.plugins.source.confluence.configuration.NameConfig.VALID_SPACE_KEY_REGEX;
import static org.opensearch.dataprepper.plugins.source.confluence.utils.Constants.LAST_MODIFIED;
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.CLOSING_ROUND_BRACKET;
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.CONTENT_TYPE_IN;
Expand Down Expand Up @@ -238,20 +238,19 @@ private void validateSpaceFilters(ConfluenceSourceConfig configuration) {
List<String> badFilters = new ArrayList<>();
Set<String> includedSpaces = new HashSet<>();
List<String> includedAndExcludedSpaces = new ArrayList<>();
Pattern regex = Pattern.compile("[^A-Z0-9]");
ConfluenceConfigHelper.getSpacesNameIncludeFilter(configuration).forEach(spaceFilter -> {
Matcher matcher = regex.matcher(spaceFilter);
Matcher matcher = VALID_SPACE_KEY_REGEX.matcher(spaceFilter);
includedSpaces.add(spaceFilter);
if (matcher.find() || spaceFilter.length() <= 1 || spaceFilter.length() > 100) {
if (!matcher.find() || spaceFilter.length() <= 1 || spaceFilter.length() > 100) {
badFilters.add(spaceFilter);
}
});
ConfluenceConfigHelper.getSpacesNameExcludeFilter(configuration).forEach(spaceFilter -> {
Matcher matcher = regex.matcher(spaceFilter);
Matcher matcher = VALID_SPACE_KEY_REGEX.matcher(spaceFilter);
if (includedSpaces.contains(spaceFilter)) {
includedAndExcludedSpaces.add(spaceFilter);
}
if (matcher.find() || spaceFilter.length() <= 1 || spaceFilter.length() > 100) {
if (!matcher.find() || spaceFilter.length() <= 1 || spaceFilter.length() > 100) {
badFilters.add(spaceFilter);
}
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
@Getter
public class NameConfig {

Pattern projectKeysRegex = Pattern.compile("^[A-Z0-9]+$");
public static final Pattern VALID_SPACE_KEY_REGEX = Pattern.compile("^[A-Za-z0-9]+$");

@JsonProperty("include")
@Size(max = 100, message = "Space name type filter should not be more than 100")
Expand All @@ -40,7 +40,7 @@ boolean isValidSpaceKeys() {

boolean checkGivenListForRegex(List<String> list) {
for (String value : list) {
if (value != null && !projectKeysRegex.matcher(value).matches()) {
if (value == null || !VALID_SPACE_KEY_REGEX.matcher(value).matches()) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,43 @@
package org.opensearch.dataprepper.plugins.source.confluence.configuration;

import com.fasterxml.jackson.annotation.JsonProperty;
import jakarta.validation.constraints.AssertTrue;
import jakarta.validation.constraints.Size;
import lombok.Getter;
import org.opensearch.dataprepper.plugins.source.confluence.utils.ConfluenceContentType;

import java.util.ArrayList;
import java.util.List;

@Getter
public class PageTypeConfig {

@JsonProperty("include")
@Size(max = 1000, message = "Page type filter should not be more than 1000")
private List<String> include = new ArrayList<>();
List<String> include = new ArrayList<>();

@JsonProperty("exclude")
@Size(max = 1000, message = "Page type filter should not be more than 1000")
private List<String> exclude = new ArrayList<>();
List<String> exclude = new ArrayList<>();

@AssertTrue(message = "Confluence PageType should be one of [page, blogpost, comment, attachment]")
boolean isValidPageType() {
return checkGivenListForValidPageTypes(include)
&& checkGivenListForValidPageTypes(exclude)
&& noOverlapBetweenIncludeAndExclude();

@oeyh oeyh Apr 16, 2025

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: this noOverlapBetweenIncludeAndExclude() doesn't seem necessary here since there's a separate validation for it just below this. It also doesn't match the error message here.

}

@AssertTrue(message = "There should be no overlap between include and exclude values under PageType filter")
boolean noOverlapBetweenIncludeAndExclude() {
return include.stream().noneMatch(exclude::contains);
}

boolean checkGivenListForValidPageTypes(List<String> list) {
for (String value : list) {
if (ConfluenceContentType.fromString(value) == null) {
return false;
}
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@

@AllArgsConstructor
public enum ConfluenceContentType {
SPACE("SPACE"),
PAGE("PAGE"),
BLOGPOST("BLOGPOST"),
COMMENT("COMMENT"),
ATTACHMENT("ATTACHMENT");
SPACE("space"),
PAGE("page"),
BLOGPOST("blogpost"),
COMMENT("comment"),
ATTACHMENT("attachment");

@Getter
private final String type;

public static ConfluenceContentType fromString(String value) {
for (ConfluenceContentType contentType : ConfluenceContentType.values()) {
if (contentType.type.equalsIgnoreCase(value)) {
if (contentType.type.equals(value)) {
return contentType;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class ConfluenceNextLinkValidator {
"start", Pattern.compile("^\\d+$"),
"startAt", Pattern.compile("^\\d+$"),
"maxResults", Pattern.compile("^\\d+$"),
"cql", Pattern.compile("^[\\w\\s=\"()><%\\-.:]+$")
"cql", Pattern.compile("^[\\w\\s=\"()><%\\-.:,]+$")
);

public static String validateAndSanitizeURL(String urlString) throws MalformedURLException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ void testListItems() {
void testExecutePartition() throws Exception {
ConfluenceClient confluenceClient = new ConfluenceClient(confluenceService, confluenceIterator, executorServiceProvider, confluenceSourceConfig);
Map<String, Object> keyAttributes = new HashMap<>();
keyAttributes.put("project", "test");
keyAttributes.put("space", "test");
when(saasWorkerProgressState.getKeyAttributes()).thenReturn(keyAttributes);
List<String> itemIds = new ArrayList<>();
itemIds.add(null);
Expand Down Expand Up @@ -127,7 +127,7 @@ void testExecutePartitionError() throws Exception {
void bufferWriteRuntimeTest() throws Exception {
ConfluenceClient confluenceClient = new ConfluenceClient(confluenceService, confluenceIterator, executorServiceProvider, confluenceSourceConfig);
Map<String, Object> keyAttributes = new HashMap<>();
keyAttributes.put("project", "test");
keyAttributes.put("space", "test");
when(saasWorkerProgressState.getKeyAttributes()).thenReturn(keyAttributes);
List<String> itemIds = List.of("ID1", "ID2", "ID3", "ID4");
when(saasWorkerProgressState.getItemIds()).thenReturn(itemIds);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void testConfluenceServiceInitialization() throws JsonProcessingException {
public void testGetPages() throws JsonProcessingException {
List<String> contentType = new ArrayList<>();
List<String> spaceKey = new ArrayList<>();
contentType.add("PAGE");
contentType.add("page");
spaceKey.add("KAN");
ConfluenceSourceConfig confluenceSourceConfig = createConfluenceConfiguration(BASIC, contentType, spaceKey);
ConfluenceService confluenceService = spy(new ConfluenceService(confluenceSourceConfig, confluenceRestClient, pluginMetrics));
Expand Down Expand Up @@ -220,7 +220,7 @@ public void testGetPages() throws JsonProcessingException {
public void buildIssueItemInfoMultipleFutureThreads() throws JsonProcessingException {
List<String> pageType = new ArrayList<>();
List<String> spaceKey = new ArrayList<>();
pageType.add("PAGE");
pageType.add("page");
ConfluenceSourceConfig confluenceSourceConfig = createConfluenceConfiguration(BASIC, pageType, spaceKey);
ConfluenceService confluenceService = spy(new ConfluenceService(confluenceSourceConfig, confluenceRestClient, pluginMetrics));
List<ConfluenceItem> mockIssues = new ArrayList<>();
Expand All @@ -247,7 +247,7 @@ public void buildIssueItemInfoMultipleFutureThreads() throws JsonProcessingExcep
public void testBadSpaceKeys() throws JsonProcessingException {
List<String> pageType = new ArrayList<>();
List<String> spaceKey = new ArrayList<>();
pageType.add("PAGE");
pageType.add("page");
spaceKey.add("Bad Project Key");
spaceKey.add("A");
spaceKey.add("!@#$");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ void testValidSpaceKeys_WithValidInput() {
nameConfig.include.add("ABC123");
nameConfig.include.add("XYZ789");
nameConfig.exclude.add("TEST123");
nameConfig.exclude.add("Test123");

assertTrue(nameConfig.isValidSpaceKeys());
}
Expand All @@ -41,6 +42,8 @@ void testValidSpaceKeys_WithInvalidInclude() {
NameConfig nameConfig = new NameConfig();
nameConfig.include.add("ABC-123"); // Contains invalid character
nameConfig.exclude.add("TEST123");
nameConfig.exclude.add("Test123");
nameConfig.exclude.add("<<space-key>>");

assertFalse(nameConfig.isValidSpaceKeys());
}
Expand All @@ -49,7 +52,8 @@ void testValidSpaceKeys_WithInvalidInclude() {
void testValidSpaceKeys_WithInvalidExclude() {
NameConfig nameConfig = new NameConfig();
nameConfig.include.add("ABC123");
nameConfig.exclude.add("TEST@123"); // Contains invalid character
nameConfig.exclude.add("TEST@123");
nameConfig.exclude.add("Test@123");// Contains invalid character

assertFalse(nameConfig.isValidSpaceKeys());
}
Expand All @@ -60,7 +64,7 @@ void testCheckGivenListForRegex_WithNullValue() {
List<String> testList = new ArrayList<>();
testList.add(null);

assertTrue(nameConfig.checkGivenListForRegex(testList));
assertFalse(nameConfig.checkGivenListForRegex(testList));
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*
*/
package org.opensearch.dataprepper.plugins.source.confluence.configuration;


import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

class PageTypeConfigTest {

@Test
void testValidPageType_WithValidInput() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.include.add("page");
pageTypeConfig.include.add("comment");
pageTypeConfig.exclude.add("blogpost");

assertTrue(pageTypeConfig.isValidPageType());
}

@Test
void testValidPageType_WithOverlappingInput() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.include.add("page");
pageTypeConfig.include.add("comment");
pageTypeConfig.exclude.add("page");
pageTypeConfig.exclude.add("blogpost");

assertFalse(pageTypeConfig.isValidPageType());
}

@Test
void testValidPageType_WithNoOverlappingInput_but_null_include() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.exclude.add("page");
pageTypeConfig.exclude.add("blogpost");
assertTrue(pageTypeConfig.isValidPageType());
}

@Test
void testValidPageType_WithNoOverlappingInput_but_null_exclude() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.include.add("page");
pageTypeConfig.include.add("blogpost");
assertTrue(pageTypeConfig.isValidPageType());
}

@Test
void testValidSpaceKeys_WithEmptyLists() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
assertTrue(pageTypeConfig.isValidPageType());
}

@Test
void testValidSpaceKeys_WithInvalidInclude() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.include.add("BlogPost"); // Contains invalid character
pageTypeConfig.exclude.add("Page");
pageTypeConfig.exclude.add("page");
pageTypeConfig.include.add("<<page>>");

assertFalse(pageTypeConfig.isValidPageType());
}

@Test
void testValidSpaceKeys_WithInvalidExclude() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.include.add("page");
pageTypeConfig.exclude.add("BlogPost");
pageTypeConfig.exclude.add("Test@123");// Contains invalid character

assertFalse(pageTypeConfig.isValidPageType());
}

@Test
void testCheckGivenListForRegex_WithNullValue() {
PageTypeConfig pageTypeConfig = new PageTypeConfig();
pageTypeConfig.include.add(null);
assertFalse(pageTypeConfig.isValidPageType());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ void testEnumConstants() {

@Test
void testTypeValues() {
assertEquals("SPACE", ConfluenceContentType.SPACE.getType());
assertEquals("PAGE", ConfluenceContentType.PAGE.getType());
assertEquals("COMMENT", ConfluenceContentType.COMMENT.getType());
assertEquals("ATTACHMENT", ConfluenceContentType.ATTACHMENT.getType());
assertEquals("BLOGPOST", ConfluenceContentType.BLOGPOST.getType());
assertEquals("space", ConfluenceContentType.SPACE.getType());
assertEquals("page", ConfluenceContentType.PAGE.getType());
assertEquals("comment", ConfluenceContentType.COMMENT.getType());
assertEquals("attachment", ConfluenceContentType.ATTACHMENT.getType());
assertEquals("blogpost", ConfluenceContentType.BLOGPOST.getType());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,21 @@

class ConfluenceNextLinkValidatorTest {

@Test
void testValidURL_with_multile_filter_values_in_cql() throws Exception {
String validUrl = "https://hostname/wiki/rest/api/content/search?next=true&cursor=_f_NTA%3D_sa_WzE3Mzk4MTM4MzIwMDAsIlx0NTAzOTcyOTMgciFcImAoQz47PVtPZSlHR2lDcEgqIGNwIl0%3D&expand=all,space,history.lastUpdated&limit=50&start=50&startAt=0&cql=lastModified%3E%221969-12-31%2016:00%22%20AND%20space%20in%20(%22SD%22)%20AND%20type%20in%20(%22page%22,%22blogpost%22,%22comment%22)%20order%20by%20lastModified";
String sanitized = ConfluenceNextLinkValidator.validateAndSanitizeURL(validUrl);
assertNotNull(sanitized);
assertTrue(sanitized.contains("next=true"));
assertTrue(sanitized.contains("limit=50"));
assertTrue(sanitized.contains("start=50"));
assertTrue(sanitized.contains("cql="));
assertTrue(sanitized.contains("cursor=_f_NTA%3D_sa_WzE3Mzk4MTM4MzIwMDAsIlx0NTAzOTcyOTMgciFcImAoQz47PVtPZSlHR2lDcEgqIGNwIl0"));
assertTrue(sanitized.contains("expand=all%2Cspace%2Chistory.lastUpdated"));
assertTrue(sanitized.contains("cql=lastModified%3E%221969-12-31+16%3A00%22+AND+space+in+%28%22SD%22%29+AND+type+in+%28%22page%22%2C%22blogpost%22%2C%22comment%22%29+order+by+lastModified"));

}

@Test
void testValidURL() throws Exception {
String validUrl = "http://hostname/rest/api/content/search?next=true&limit=25&start=25";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public void executePartition(SaasWorkerProgressState state,
.withData(t)
.build())
.map(Record::new)
.peek(record -> record.getData().getMetadata().setAttribute(PROJECT, project.toLowerCase()))
.collect(Collectors.toList());

try {
Expand Down