Skip to content

Commit 24998cf

Browse files
committed
Add utilities to detect and replace broken links V2
1 parent 66b8d6e commit 24998cf

File tree

1 file changed

+25
-18
lines changed

1 file changed

+25
-18
lines changed

application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@
1717
* Utility class to detect links.
1818
*/
1919
public class LinkDetection {
20-
private static final HttpClient HTTP_CLIENT = HttpClient.newHttpClient();
20+
private static final HttpClient HTTP_CLIENT =
21+
HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).build();
2122

22-
private static final Set<LinkFilter> DEFAULT_FILTERS =
23+
/**
24+
* Default filters used when extracting links: skip suppressed URLs and non-http schemes.
25+
*/
26+
private static final Set<LinkFilter> DEFAULT_EXTRACT_FILTERS =
2327
Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
2428

2529
/**
@@ -78,23 +82,22 @@ public static boolean containsLink(String content) {
7882
* </ul>
7983
*
8084
* <p>
81-
* The method first performs an HTTP {@code HEAD} request and falls back to an HTTP {@code GET}
82-
* request if the {@code HEAD} request indicates a failure.
83-
* </p>
84-
*
85-
* <p>
8685
* Notes:
8786
* <ul>
8887
* <li>Status code {@code 200} is considered valid, even if the response body is empty</li>
8988
* <li>The response body content is not inspected</li>
9089
* </ul>
9190
*
92-
* @param url the URL to check (must be a valid {@link URI})
91+
* @param url the URL to check
9392
* @return a future completing with {@code true} if the link is broken
9493
* @throws IllegalArgumentException if the given URL is not a valid URI
9594
*/
9695

96+
9797
public static CompletableFuture<Boolean> isLinkBroken(String url) {
98+
// Quick check with HEAD "cheaper". If HEAD indicates failure some servers don't implement
99+
// HEAD properly,
100+
// fall back to GET to confirm the resource status.
98101
HttpRequest headCheckRequest = HttpRequest.newBuilder(URI.create(url))
99102
.method("HEAD", HttpRequest.BodyPublishers.noBody())
100103
.build();
@@ -104,7 +107,7 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
104107
int status = response.statusCode();
105108
return status < 200 || status >= 400;
106109
})
107-
.exceptionally(ignored -> true)
110+
.exceptionally(_ -> true)
108111
.thenCompose(result -> {
109112
if (!result) {
110113
return CompletableFuture.completedFuture(false);
@@ -127,7 +130,7 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
127130
*
128131
* <p>
129132
* Example:
130-
*
133+
*
131134
* <pre>{@code
132135
* replaceDeadLinks("""
133136
* Test
@@ -138,19 +141,19 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
138141
*
139142
* <p>
140143
* Results in:
141-
*
144+
*
142145
* <pre>{@code
143146
* Test
144147
* broken
145148
* http://workinglink/1
146149
* }</pre>
147150
*
148-
* @param text the input text containing URLs (must not be {@code null})
149-
* @param replacement the string to replace broken links with (must not be {@code null})
151+
* @param text the input text containing URLs
152+
* @param replacement the string to replace broken links with
150153
* @return a future containing the modified text
151154
*/
152155
public static CompletableFuture<String> replaceDeadLinks(String text, String replacement) {
153-
List<String> links = extractLinks(text, DEFAULT_FILTERS);
156+
List<String> links = extractLinks(text, DEFAULT_EXTRACT_FILTERS);
154157

155158
if (links.isEmpty()) {
156159
return CompletableFuture.completedFuture(text);
@@ -164,16 +167,20 @@ public static CompletableFuture<String> replaceDeadLinks(String text, String rep
164167

165168

166169
return CompletableFuture.allOf(deadLinkFutures.toArray(CompletableFuture[]::new))
167-
.thenApply(ignored -> deadLinkFutures.stream()
170+
.thenApply(_ -> deadLinkFutures.stream()
168171
.map(CompletableFuture::join)
169172
.flatMap(Optional::stream)
170173
.toList())
171174
.thenApply(deadLinks -> {
172-
String result = text;
175+
StringBuilder sb = new StringBuilder(text);
173176
for (String deadLink : deadLinks) {
174-
result = result.replace(deadLink, replacement);
177+
int idx = sb.indexOf(deadLink);
178+
while (idx != -1) {
179+
sb.replace(idx, idx + deadLink.length(), replacement);
180+
idx = sb.indexOf(deadLink, idx + replacement.length());
181+
}
175182
}
176-
return result;
183+
return sb.toString();
177184
});
178185
}
179186

0 commit comments

Comments
 (0)