Skip to content

Commit 7d3bf57

Browse files
committed
commit:Broken links resolve requested changes
- Rename replaceDeadLinks to replaceBrokenLinks for consistency - Use Optional instead of null values in stream processing - Add convenience overload for extractLinks with default filters - Update javadocs to be more generic and future-proof - Move implementation details from javadoc to inline comments - Replace 'ignored' lambda params with '_' Resolves the review comments from @Zabuzard
1 parent ef38986 commit 7d3bf57

File tree

1 file changed

+68
-74
lines changed

1 file changed

+68
-74
lines changed

application/src/main/java/org/togetherjava/tjbot/features/utils/LinkDetection.java

Lines changed: 68 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import java.net.http.HttpRequest;
1010
import java.net.http.HttpResponse;
1111
import java.util.List;
12-
import java.util.Objects;
1312
import java.util.Optional;
1413
import java.util.Set;
1514
import java.util.concurrent.CompletableFuture;
@@ -37,14 +36,8 @@ public class LinkDetection {
3736
* Default filters applied when extracting links from text.
3837
*
3938
* <p>
40-
* These filters intentionally ignore:
41-
* <ul>
42-
* <li>Suppressed links like {@code <https://example.com>}</li>
43-
* <li>Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}</li>
44-
* </ul>
45-
*
46-
* <p>
47-
* This reduces false positives when scanning chat messages or source-code snippets.
39+
* Links to intentionally ignore in order to reduce false positives when scanning chat messages
40+
* or source-code snippets.
4841
*/
4942

5043
private static final Set<LinkFilter> DEFAULT_FILTERS =
@@ -76,15 +69,15 @@ private LinkDetection() {
7669
}
7770

7871
/**
79-
* Extracts HTTP(S) links from the given text.
72+
* Extracts links from the given text.
8073
*
8174
* <p>
8275
* The text is scanned using a URL detector, then filtered and normalized according to the
8376
* provided {@link LinkFilter}s.
8477
*
8578
* <p>
8679
* Example:
87-
*
80+
*
8881
* <pre>{@code
8982
* Set<LinkFilter> filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
9083
* extractLinks("Visit https://example.com and <ftp://skip.me>", filters)
@@ -98,10 +91,24 @@ private LinkDetection() {
9891

9992
public static List<String> extractLinks(String content, Set<LinkFilter> filter) {
10093
return new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect()
101-
.stream()
102-
.map(url -> toLink(url, filter))
103-
.flatMap(Optional::stream)
104-
.toList();
94+
.stream()
95+
.map(url -> toLink(url, filter))
96+
.flatMap(Optional::stream)
97+
.toList();
98+
}
99+
100+
/**
101+
* Extracts links from the given text using default filters.
102+
*
103+
* <p>
104+
* This is a convenience method that uses {@link #DEFAULT_FILTERS}.
105+
*
106+
* @param content the text to scan for links
107+
* @return a list of extracted links in the order they appear in the text
108+
* @see #extractLinks(String, Set)
109+
*/
110+
public static List<String> extractLinks(String content) {
111+
return extractLinks(content, DEFAULT_FILTERS);
105112
}
106113

107114
/**
@@ -122,13 +129,6 @@ public static boolean containsLink(String content) {
122129
* Asynchronously checks whether a URL is considered broken.
123130
*
124131
* <p>
125-
* The check is performed in two steps:
126-
* <ol>
127-
* <li>A {@code HEAD} request is sent first (cheap and fast)</li>
128-
* <li>If that fails or returns an error, a {@code GET} request is used as a fallback</li>
129-
* </ol>
130-
*
131-
* <p>
132132
* A link is considered broken if:
133133
* <ul>
134134
* <li>The URL is malformed or unreachable</li>
@@ -146,32 +146,34 @@ public static boolean containsLink(String content) {
146146
*/
147147

148148
public static CompletableFuture<Boolean> isLinkBroken(String url) {
149+
// Try HEAD request first (cheap and fast)
149150
HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url))
150-
.method("HEAD", HttpRequest.BodyPublishers.noBody())
151-
.build();
151+
.method("HEAD", HttpRequest.BodyPublishers.noBody())
152+
.build();
152153

153154
return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding())
154-
.thenApply(response -> {
155-
int status = response.statusCode();
156-
// 2xx and 3xx are success, 4xx and 5xx are errors
157-
return status >= 400;
158-
})
159-
.exceptionally(ignored -> true)
160-
.thenCompose(result -> {
161-
if (!Boolean.TRUE.equals(result)) {
162-
return CompletableFuture.completedFuture(false);
163-
}
164-
HttpRequest fallbackGetRequest =
165-
HttpRequest.newBuilder(URI.create(url)).GET().build();
166-
return HTTP_CLIENT
167-
.sendAsync(fallbackGetRequest, HttpResponse.BodyHandlers.discarding())
168-
.thenApply(resp -> resp.statusCode() >= 400)
169-
.exceptionally(ignored -> true);
170-
});
155+
.thenApply(response -> {
156+
int status = response.statusCode();
157+
// 2xx and 3xx are success, 4xx and 5xx are errors
158+
return status >= 400;
159+
})
160+
.exceptionally(_ -> true)
161+
.thenCompose(result -> {
162+
if (!Boolean.TRUE.equals(result)) {
163+
return CompletableFuture.completedFuture(false);
164+
}
165+
// If HEAD fails, fall back to GET request (some servers don't support HEAD)
166+
HttpRequest fallbackGetRequest =
167+
HttpRequest.newBuilder(URI.create(url)).GET().build();
168+
return HTTP_CLIENT
169+
.sendAsync(fallbackGetRequest, HttpResponse.BodyHandlers.discarding())
170+
.thenApply(resp -> resp.statusCode() >= 400)
171+
.exceptionally(_ -> true);
172+
});
171173
}
172174

173175
/**
174-
* Replaces all broken HTTP(S) links in the given text.
176+
* Replaces all broken links in the given text.
175177
*
176178
* <p>
177179
* Each detected link is checked asynchronously using {@link #isLinkBroken(String)}. Only links
@@ -184,9 +186,9 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
184186
*
185187
* <p>
186188
* Example:
187-
*
189+
*
188190
* <pre>{@code
189-
* replaceDeadLinks("""
191+
* replaceBrokenLinks("""
190192
* Test
191193
* http://deadlink/1
192194
* http://workinglink/1
@@ -195,7 +197,7 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
195197
*
196198
* <p>
197199
* Results in:
198-
*
200+
*
199201
* <pre>{@code
200202
* Test
201203
* (broken link)
@@ -208,47 +210,39 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
208210
* text if no broken links were found
209211
*/
210212

211-
212-
public static CompletableFuture<String> replaceDeadLinks(String text, String replacement) {
213+
public static CompletableFuture<String> replaceBrokenLinks(String text, String replacement) {
213214
List<String> links = extractLinks(text, DEFAULT_FILTERS);
214215

215216
if (links.isEmpty()) {
216217
return CompletableFuture.completedFuture(text);
217218
}
218219

219-
List<CompletableFuture<String>> deadLinkFutures = links.stream()
220-
.distinct()
221-
.map(link -> isLinkBroken(link)
222-
.thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null))
223-
224-
.toList();
220+
List<CompletableFuture<Optional<String>>> brokenLinkFutures = links.stream()
221+
.distinct()
222+
.map(link -> isLinkBroken(link)
223+
.thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? Optional.of(link) : Optional.<String>empty()))
224+
.toList();
225225

226-
return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0]))
227-
.thenApply(ignored -> deadLinkFutures.stream()
228-
.map(CompletableFuture::join)
229-
.filter(Objects::nonNull)
230-
.toList())
231-
.thenApply(deadLinks -> {
232-
String result = text;
233-
for (String deadLink : deadLinks) {
234-
result = result.replace(deadLink, replacement);
235-
}
236-
return result;
237-
});
226+
return CompletableFuture.allOf(brokenLinkFutures.toArray(new CompletableFuture[0]))
227+
.thenApply(_ -> brokenLinkFutures.stream()
228+
.map(CompletableFuture::join)
229+
.flatMap(Optional::stream)
230+
.toList())
231+
.thenApply(brokenLinks -> {
232+
String result = text;
233+
for (String brokenLink : brokenLinks) {
234+
result = result.replace(brokenLink, replacement);
235+
}
236+
return result;
237+
});
238238
}
239239

240240
/**
241241
* Converts a detected {@link Url} into a normalized link string.
242242
*
243243
* <p>
244-
* Applies the provided {@link LinkFilter}s:
245-
* <ul>
246-
* <li>{@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets</li>
247-
* <li>{@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes</li>
248-
* </ul>
249-
*
250-
* <p>
251-
* Additionally removes trailing punctuation such as commas or periods from the detected URL.
244+
* Applies the provided {@link LinkFilter}s. Additionally removes trailing punctuation such as
245+
* commas or periods from the detected URL.
252246
*
253247
* @param url the detected URL
254248
* @param filter active link filters to apply
@@ -276,4 +270,4 @@ private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
276270
}
277271
return Optional.of(link);
278272
}
279-
}
273+
}

0 commit comments

Comments
 (0)