Skip to content

Commit 327a638

Browse files
authored
Extracting Confluence page history (#5456)
* Extracting page history Signed-off-by: Santhosh Gandhe <1909520+san81@users.noreply.github.com>
1 parent be45f3a commit 327a638

17 files changed

Lines changed: 364 additions & 20 deletions

File tree

data-prepper-plugins/saas-source-plugins/atlassian-commons/src/main/java/org/opensearch/dataprepper/plugins/source/atlassian/rest/AtlassianRestClient.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ protected <T> ResponseEntity<T> invokeRestApi(URI uri, Class<T> responseType) th
5050
} catch (HttpClientErrorException ex) {
5151
HttpStatus statusCode = ex.getStatusCode();
5252
String statusMessage = ex.getMessage();
53-
log.error("An exception has occurred while getting response from Jira search API {}", ex.getMessage());
53+
log.error("An exception has occurred while getting response from search API {}", ex.getMessage());
5454
if (statusCode == HttpStatus.FORBIDDEN) {
5555
throw new UnauthorizedException(statusMessage);
5656
} else if (statusCode == HttpStatus.UNAUTHORIZED) {

data-prepper-plugins/saas-source-plugins/confluence-source/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ dependencies {
2323

2424
testImplementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.4'
2525
testImplementation project(path: ':data-prepper-test-common')
26+
testImplementation 'com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.13.0'
2627

2728
implementation(libs.spring.context) {
2829
exclude group: 'commons-logging', module: 'commons-logging'

data-prepper-plugins/saas-source-plugins/confluence-source/src/main/java/org/opensearch/dataprepper/plugins/source/confluence/ConfluenceService.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.CONTENT_TYPE_IN;
4343
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.CONTENT_TYPE_NOT_IN;
4444
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.DELIMITER;
45-
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.GREATER_THAN_EQUALS;
45+
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.GREATER_THAN;
4646
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.PREFIX;
4747
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.SPACE_IN;
4848
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.SPACE_NOT_IN;
@@ -148,7 +148,7 @@ private StringBuilder createContentFilterCriteria(ConfluenceSourceConfig configu
148148

149149
String formattedTimeStamp = LocalDateTime.ofInstant(ts, ZoneId.systemDefault())
150150
.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm"));
151-
StringBuilder cQl = new StringBuilder(LAST_MODIFIED + GREATER_THAN_EQUALS + "\"" + formattedTimeStamp + "\"");
151+
StringBuilder cQl = new StringBuilder(LAST_MODIFIED + GREATER_THAN + "\"" + formattedTimeStamp + "\"");
152152
if (!CollectionUtils.isEmpty(ConfluenceConfigHelper.getSpacesNameIncludeFilter(configuration))) {
153153
cQl.append(SPACE_IN).append(ConfluenceConfigHelper.getSpacesNameIncludeFilter(configuration).stream()
154154
.collect(Collectors.joining(DELIMITER, PREFIX, SUFFIX)))
@@ -169,7 +169,7 @@ private StringBuilder createContentFilterCriteria(ConfluenceSourceConfig configu
169169
.collect(Collectors.joining(DELIMITER, PREFIX, SUFFIX)))
170170
.append(CLOSING_ROUND_BRACKET);
171171
}
172-
172+
cQl.append(" order by " + LAST_MODIFIED);
173173
log.info("Created content filter criteria ConfluenceQl query: {}", cQl);
174174
return cQl;
175175
}

data-prepper-plugins/saas-source-plugins/confluence-source/src/main/java/org/opensearch/dataprepper/plugins/source/confluence/models/ConfluenceItem.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,23 @@ public class ConfluenceItem {
5252
@JsonProperty("space")
5353
private SpaceItem spaceItem;
5454

55+
@JsonProperty("history")
56+
private ContentHistory history;
57+
5558
@JsonIgnore
5659
public long getCreatedTimeMillis() {
57-
return 0L;
60+
if (history == null) {
61+
return 0L;
62+
}
63+
return history.getCreatedDateInMillis();
5864
}
5965

6066
@JsonIgnore
6167
public long getUpdatedTimeMillis() {
62-
return 0L;
68+
if (history == null) {
69+
return 0L;
70+
}
71+
return history.getLastUpdatedInMillis();
6372
}
6473

6574
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*
9+
*/
10+
package org.opensearch.dataprepper.plugins.source.confluence.models;
11+
12+
import com.fasterxml.jackson.annotation.JsonProperty;
13+
import lombok.Getter;
14+
import lombok.Setter;
15+
16+
import java.time.Instant;
17+
18+
@Setter
19+
@Getter
20+
public class ContentHistory {
21+
22+
// Example format "createdDate": "2025-02-17T23:34:44.633Z"
23+
// Jackson converts to Instant type
24+
@JsonProperty("createdDate")
25+
Instant createdDate;
26+
27+
@JsonProperty("lastUpdated")
28+
LastUpdated lastUpdated;
29+
30+
/**
31+
* @return milliseconds since epoch, or 0 if createdDate is null or invalid
32+
*/
33+
public long getCreatedDateInMillis() {
34+
return (createdDate != null) ? createdDate.toEpochMilli() : 0L;
35+
}
36+
37+
public long getLastUpdatedInMillis() {
38+
return (lastUpdated != null && lastUpdated.when != null) ? lastUpdated.when.toEpochMilli() : 0L;
39+
}
40+
41+
@Setter
42+
@Getter
43+
public static class LastUpdated {
44+
// Example format "when": "2025-02-17T23:34:44.633Z"
45+
@JsonProperty("when")
46+
Instant when;
47+
}
48+
49+
50+
}

data-prepper-plugins/saas-source-plugins/confluence-source/src/main/java/org/opensearch/dataprepper/plugins/source/confluence/rest/ConfluenceRestClient.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,11 @@
2323
import org.springframework.web.util.UriComponentsBuilder;
2424

2525
import javax.inject.Named;
26+
import java.net.MalformedURLException;
2627
import java.net.URI;
2728
import java.net.URISyntaxException;
2829

30+
import static org.opensearch.dataprepper.plugins.source.confluence.utils.ConfluenceNextLinkValidator.validateAndSanitizeURL;
2931
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.CQL_FIELD;
3032
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.EXPAND_FIELD;
3133
import static org.opensearch.dataprepper.plugins.source.confluence.utils.CqlConstants.EXPAND_VALUE;
@@ -38,6 +40,7 @@ public class ConfluenceRestClient extends AtlassianRestClient {
3840
public static final String REST_API_FETCH_CONTENT = "wiki/rest/api/content/";
3941
public static final String REST_API_CONTENT_EXPAND_PARAM = "?expand=body.view";
4042
//public static final String REST_API_SPACES = "/rest/api/api/spaces";
43+
public static final String WIKI_PARAM = "wiki";
4144
public static final String FIFTY = "50";
4245
public static final String START_AT = "startAt";
4346
public static final String LIMIT_PARAM = "limit";
@@ -78,8 +81,10 @@ public ConfluenceSearchResults getAllContent(StringBuilder cql, int startAt,
7881
URI uri;
7982
if (null != paginationLinks && null != paginationLinks.getNext()) {
8083
try {
81-
uri = new URI(authConfig.getUrl() + paginationLinks.getNext());
82-
} catch (URISyntaxException e) {
84+
String urlString = authConfig.getUrl() + WIKI_PARAM + paginationLinks.getNext();
85+
urlString = validateAndSanitizeURL(urlString);
86+
uri = new URI(urlString);
87+
} catch (URISyntaxException | MalformedURLException e) {
8388
throw new RuntimeException("Failed to construct pagination url.", e);
8489
}
8590
} else {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package org.opensearch.dataprepper.plugins.source.confluence.utils;
2+
3+
import java.net.MalformedURLException;
4+
import java.net.URL;
5+
import java.net.URLDecoder;
6+
import java.net.URLEncoder;
7+
import java.nio.charset.StandardCharsets;
8+
import java.util.HashMap;
9+
import java.util.Map;
10+
import java.util.regex.Pattern;
11+
import java.util.stream.Collectors;
12+
13+
public class ConfluenceNextLinkValidator {
14+
// Define allowed parameters and their patterns
15+
private static final Map<String, Pattern> ALLOWED_PARAMS = Map.of(
16+
"next", Pattern.compile("^(true|false)$"),
17+
"cursor", Pattern.compile("^[A-Za-z0-9+/=_%\\-]+$"),
18+
"expand", Pattern.compile("^[A-Za-z0-9+/=_%\\-.,]+$"),
19+
"limit", Pattern.compile("^\\d{1,3}$"),
20+
"start", Pattern.compile("^\\d+$"),
21+
"startAt", Pattern.compile("^\\d+$"),
22+
"maxResults", Pattern.compile("^\\d+$"),
23+
"cql", Pattern.compile("^[\\w\\s=\"()><%\\-.:]+$")
24+
);
25+
26+
public static String validateAndSanitizeURL(String urlString) throws MalformedURLException {
27+
URL url = new URL(urlString);
28+
String query = url.getQuery();
29+
30+
if (query == null || query.isEmpty()) {
31+
return urlString;
32+
}
33+
34+
// Parse and validate parameters
35+
Map<String, String> validatedParams = new HashMap<>();
36+
String[] pairs = query.split("&");
37+
38+
for (String pair : pairs) {
39+
40+
String key = URLDecoder.decode(pair.substring(0, pair.indexOf("=")), StandardCharsets.UTF_8);
41+
String value = URLDecoder.decode(pair.substring(pair.indexOf("=") + 1), StandardCharsets.UTF_8);
42+
43+
// Check if parameter is allowed and matches pattern
44+
if (ALLOWED_PARAMS.containsKey(key) &&
45+
ALLOWED_PARAMS.get(key).matcher(value).matches()) {
46+
validatedParams.put(key, value);
47+
}
48+
}
49+
50+
// Rebuild URL with validated parameters
51+
StringBuilder sanitizedURL = new StringBuilder();
52+
sanitizedURL.append(url.getProtocol()).append("://")
53+
.append(url.getHost())
54+
.append(url.getPath())
55+
.append("?");
56+
57+
// Add validated parameters
58+
String params = validatedParams.entrySet().stream()
59+
.map(e -> URLEncoder.encode(e.getKey(), StandardCharsets.UTF_8) + "=" +
60+
URLEncoder.encode(e.getValue(), StandardCharsets.UTF_8))
61+
.collect(Collectors.joining("&"));
62+
63+
sanitizedURL.append(params);
64+
65+
return sanitizedURL.toString();
66+
}
67+
}
68+

data-prepper-plugins/saas-source-plugins/confluence-source/src/main/java/org/opensearch/dataprepper/plugins/source/confluence/utils/CqlConstants.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
package org.opensearch.dataprepper.plugins.source.confluence.utils;
1212

1313
public class CqlConstants {
14-
public static final String GREATER_THAN_EQUALS = ">=";
14+
public static final String GREATER_THAN = ">";
1515
public static final String CLOSING_ROUND_BRACKET = ")";
1616

1717
public static final String SPACE_IN = " AND space in (";
@@ -23,5 +23,5 @@ public class CqlConstants {
2323
public static final String CONTENT_TYPE_NOT_IN = " AND type not in (";
2424
public static final String CQL_FIELD = "cql";
2525
public static final String EXPAND_FIELD = "expand";
26-
public static final String EXPAND_VALUE = "all,space";
26+
public static final String EXPAND_VALUE = "all,space,history.lastUpdated";
2727
}

data-prepper-plugins/saas-source-plugins/confluence-source/src/test/java/org/opensearch/dataprepper/plugins/source/confluence/configuration/NameConfigTest.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*
9+
*/
110
package org.opensearch.dataprepper.plugins.source.confluence.configuration;
211

312

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*
5+
* The OpenSearch Contributors require contributions made to
6+
* this file be licensed under the Apache-2.0 license or a
7+
* compatible open source license.
8+
*
9+
*/
10+
package org.opensearch.dataprepper.plugins.source.confluence.models;
11+
12+
import com.fasterxml.jackson.databind.ObjectMapper;
13+
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
14+
import org.junit.jupiter.api.Test;
15+
16+
import java.time.Instant;
17+
18+
import static org.junit.jupiter.api.Assertions.assertEquals;
19+
import static org.junit.jupiter.api.Assertions.assertNull;
20+
import static org.junit.jupiter.api.Assertions.assertThrows;
21+
22+
class ContentHistoryTest {
23+
24+
ObjectMapper objectMapper = new ObjectMapper().registerModule(new JavaTimeModule());
25+
26+
@Test
27+
void testGetCreatedDateInMillis_ValidDate() {
28+
ContentHistory history = new ContentHistory();
29+
history.setCreatedDate(Instant.parse("2025-02-17T23:34:44.633Z"));
30+
31+
long expectedMillis = 1739835284633L; // Pre-calculated value for this timestamp
32+
assertEquals(expectedMillis, history.getCreatedDateInMillis());
33+
}
34+
35+
@Test
36+
void testGetLastModifiedInMillis_ValidDate() {
37+
ContentHistory history = new ContentHistory();
38+
ContentHistory.LastUpdated lastUpdated = new ContentHistory.LastUpdated();
39+
lastUpdated.setWhen(Instant.parse("2025-02-17T23:34:44.633Z"));
40+
history.setLastUpdated(lastUpdated);
41+
long expectedMillis = 1739835284633L; // Pre-calculated value for this timestamp
42+
assertEquals(expectedMillis, history.getLastUpdatedInMillis());
43+
}
44+
45+
@Test
46+
void testGetCreatedDateInMillis_NullDate() {
47+
ContentHistory history = new ContentHistory();
48+
history.setCreatedDate(null);
49+
assertEquals(0L, history.getCreatedDateInMillis());
50+
}
51+
52+
@Test
53+
public void testNullValues() throws Exception {
54+
// Test null value
55+
String json = "{\"createdDate\": null, \"lastUpdated\": { \"when\": null}}";
56+
57+
// Test deserialization of null
58+
ContentHistory deserializedData = objectMapper.readValue(json, ContentHistory.class);
59+
assertNull(deserializedData.getCreatedDate());
60+
assertNull(deserializedData.getLastUpdated().when);
61+
}
62+
63+
@Test
64+
public void testNonNullValues() throws Exception {
65+
// Test null value
66+
String json = "{\"createdDate\": \"2025-02-23T23:20:20.1234z\", \"lastUpdated\": { \"when\": \"2025-02-24T23:20:20.1234z\"}}";
67+
68+
// Test deserialization of null
69+
ContentHistory deserializedData = objectMapper.readValue(json, ContentHistory.class);
70+
assertEquals(Instant.parse("2025-02-23T23:20:20.123400Z"), deserializedData.getCreatedDate());
71+
assertEquals(Instant.parse("2025-02-24T23:20:20.123400Z"), deserializedData.getLastUpdated().when);
72+
}
73+
74+
@Test
75+
public void testGetCreatedDateInMillis_InvalidDate() {
76+
String invalidJson = "{\"createdDate\":\"invalid-date\"}";
77+
assertThrows(Exception.class, () -> objectMapper.readValue(invalidJson, ContentHistory.class));
78+
}
79+
}
80+

0 commit comments

Comments
 (0)