1717/**
1818 * Utility methods for working with links inside arbitrary text.
1919 *
20- * <p>This class can:
20+ * <p>
21+ * This class can:
2122 * <ul>
22- * <li>Extract HTTP(S) links from text</li>
23- * <li>Check whether a link is reachable via HTTP</li>
24- * <li>Replace broken links asynchronously</li>
23+ * <li>Extract HTTP(S) links from text</li>
24+ * <li>Check whether a link is reachable via HTTP</li>
25+ * <li>Replace broken links asynchronously</li>
2526 * </ul>
2627 *
27- * <p>It is intentionally stateless and uses asynchronous HTTP requests
28- * to avoid blocking calling threads.
28+ * <p>
29+ * It is intentionally stateless and uses asynchronous HTTP requests to avoid blocking calling
30+ * threads.
2931 */
3032
3133public class LinkDetection {
@@ -34,14 +36,15 @@ public class LinkDetection {
3436 /**
3537 * Default filters applied when extracting links from text.
3638 *
37- * <p>These filters intentionally ignore:
39+ * <p>
40+ * These filters intentionally ignore:
3841 * <ul>
39- * <li>Suppressed links like {@code <https://example.com>}</li>
40- * <li>Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}</li>
42+ * <li>Suppressed links like {@code <https://example.com>}</li>
43+ * <li>Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}</li>
4144 * </ul>
4245 *
43- * <p>This reduces false positives when scanning chat messages
44- * or source-code snippets.
46+ * <p>
47+ * This reduces false positives when scanning chat messages or source-code snippets.
4548 */
4649
4750 private static final Set <LinkFilter > DEFAULT_FILTERS =
@@ -52,17 +55,18 @@ public class LinkDetection {
5255 */
5356 public enum LinkFilter {
5457 /**
55- * Ignores URLs that are wrapped in angle brackets,
56- * e.g. {@code <https://example.com>}.
58+ * Ignores URLs that are wrapped in angle brackets, e.g. {@code <https://example.com>}.
5759 *
58- * <p>Such links are often intentionally suppressed in chat platforms.
60+ * <p>
61+ * Such links are often intentionally suppressed in chat platforms.
5962 */
6063 SUPPRESSED ,
6164 /**
6265 * Ignores URLs that do not use the HTTP or HTTPS scheme.
6366 *
64- * <p>This helps avoid false positives such as {@code ftp://},
65- * {@code file://}, or scheme-less matches.
67+ * <p>
68+ * This helps avoid false positives such as {@code ftp://}, {@code file://}, or scheme-less
69+ * matches.
6670 */
6771 NON_HTTP_SCHEME
6872 }
@@ -74,10 +78,13 @@ private LinkDetection() {
7478 /**
7579 * Extracts HTTP(S) links from the given text.
7680 *
77- * <p>The text is scanned using a URL detector, then filtered and normalized
78- * according to the provided {@link LinkFilter}s.
81+ * <p>
82+ * The text is scanned using a URL detector, then filtered and normalized according to the
83+ * provided {@link LinkFilter}s.
7984 *
80- * <p>Example:
85+ * <p>
86+ * Example:
87+ *
8188 * <pre>{@code
8289 * Set<LinkFilter> filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
8390 * extractLinks("Visit https://example.com and <ftp://skip.me>", filters)
@@ -91,17 +98,17 @@ private LinkDetection() {
9198
9299 public static List <String > extractLinks (String content , Set <LinkFilter > filter ) {
93100 return new UrlDetector (content , UrlDetectorOptions .BRACKET_MATCH ).detect ()
94- .stream ()
95- .map (url -> toLink (url , filter ))
96- .flatMap (Optional ::stream )
97- .toList ();
101+ .stream ()
102+ .map (url -> toLink (url , filter ))
103+ .flatMap (Optional ::stream )
104+ .toList ();
98105 }
99106
100107 /**
101108 * Checks whether the given text contains at least one detectable URL.
102109 *
103- * <p>This method performs a lightweight detection only and does not
104- * apply any {@link LinkFilter}s.
110+ * <p>
111+ * This method performs a lightweight detection only and does not apply any {@link LinkFilter}s.
105112 *
106113 * @param content the text to scan
107114 * @return {@code true} if at least one URL-like pattern is detected
@@ -114,21 +121,24 @@ public static boolean containsLink(String content) {
114121 /**
115122 * Asynchronously checks whether a URL is considered broken.
116123 *
117- * <p>The check is performed in two steps:
124+ * <p>
125+ * The check is performed in two steps:
118126 * <ol>
119- * <li>A {@code HEAD} request is sent first (cheap and fast)</li>
120- * <li>If that fails or returns an error, a {@code GET} request is used as a fallback</li>
127+ * <li>A {@code HEAD} request is sent first (cheap and fast)</li>
128+ * <li>If that fails or returns an error, a {@code GET} request is used as a fallback</li>
121129 * </ol>
122130 *
123- * <p>A link is considered broken if:
131+ * <p>
132+ * A link is considered broken if:
124133 * <ul>
125- * <li>The URL is malformed or unreachable</li>
126- * <li>The HTTP request fails with an exception</li>
127- * <li>The response status code is 4xx (client error) or 5xx (server error)</li>
134+ * <li>The URL is malformed or unreachable</li>
135+ * <li>The HTTP request fails with an exception</li>
136+ * <li>The response status code is 4xx (client error) or 5xx (server error)</li>
128137 * </ul>
129138 *
130- * <p>Successful responses (2xx) and redirects (3xx) are considered valid links.
131- * The response body is never inspected.
139+ * <p>
140+ * Successful responses (2xx) and redirects (3xx) are considered valid links. The response body
141+ * is never inspected.
132142 *
133143 * @param url the URL to check
134144 * @return a {@code CompletableFuture} completing with {@code true} if the link is broken,
@@ -137,48 +147,55 @@ public static boolean containsLink(String content) {
137147
138148 public static CompletableFuture <Boolean > isLinkBroken (String url ) {
139149 HttpRequest headRequest = HttpRequest .newBuilder (URI .create (url ))
140- .method ("HEAD" , HttpRequest .BodyPublishers .noBody ())
141- .build ();
150+ .method ("HEAD" , HttpRequest .BodyPublishers .noBody ())
151+ .build ();
142152
143153 return HTTP_CLIENT .sendAsync (headRequest , HttpResponse .BodyHandlers .discarding ())
144- .thenApply (response -> {
145- int status = response .statusCode ();
146- // 2xx and 3xx are success, 4xx and 5xx are errors
147- return status >= 400 ;
148- })
149- .exceptionally (ignored -> true )
150- .thenCompose (result -> {
151- if (!Boolean .TRUE .equals (result )) {
152- return CompletableFuture .completedFuture (false );
153- }
154- HttpRequest fallbackGetRequest = HttpRequest .newBuilder (URI .create (url )).GET ().build ();
155- return HTTP_CLIENT .sendAsync (fallbackGetRequest , HttpResponse .BodyHandlers .discarding ())
156- .thenApply (resp -> resp .statusCode () >= 400 )
157- .exceptionally (ignored -> true );
158- });
154+ .thenApply (response -> {
155+ int status = response .statusCode ();
156+ // 2xx and 3xx are success, 4xx and 5xx are errors
157+ return status >= 400 ;
158+ })
159+ .exceptionally (ignored -> true )
160+ .thenCompose (result -> {
161+ if (!Boolean .TRUE .equals (result )) {
162+ return CompletableFuture .completedFuture (false );
163+ }
164+ HttpRequest fallbackGetRequest =
165+ HttpRequest .newBuilder (URI .create (url )).GET ().build ();
166+ return HTTP_CLIENT
167+ .sendAsync (fallbackGetRequest , HttpResponse .BodyHandlers .discarding ())
168+ .thenApply (resp -> resp .statusCode () >= 400 )
169+ .exceptionally (ignored -> true );
170+ });
159171 }
160172
161173 /**
162174 * Replaces all broken HTTP(S) links in the given text.
163175 *
164- * <p>Each detected link is checked asynchronously using
165- * {@link #isLinkBroken(String)}. Only links confirmed as broken
166- * are replaced. Duplicate URLs are checked only once and all occurrences
176+ * <p>
177+ * Each detected link is checked asynchronously using {@link #isLinkBroken(String)}. Only links
178+ * confirmed as broken are replaced. Duplicate URLs are checked only once and all occurrences
167179 * are replaced if found to be broken.
168180 *
169- * <p>This method does not block - all link checks are performed
170- * asynchronously and combined into a single {@code CompletableFuture}.
181+ * <p>
182+ * This method does not block - all link checks are performed asynchronously and combined into a
183+ * single {@code CompletableFuture}.
171184 *
172- * <p>Example:
185+ * <p>
186+ * Example:
187+ *
173188 * <pre>{@code
174189 * replaceDeadLinks("""
175- * Test
176- * http://deadlink/1
177- * http://workinglink/1
178- * """, "(broken link)")
190+ * Test
191+ * http://deadlink/1
192+ * http://workinglink/1
193+ * """, "(broken link)")
179194 * }</pre>
180195 *
181- * <p>Results in:
196+ * <p>
197+ * Results in:
198+ *
182199 * <pre>{@code
183200 * Test
184201 * (broken link)
@@ -187,8 +204,8 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
187204 *
188205 * @param text the input text containing URLs
189206 * @param replacement the string used to replace broken links
190- * @return a {@code CompletableFuture} that completes with the modified text,
191- * or the original text if no broken links were found
207+ * @return a {@code CompletableFuture} that completes with the modified text, or the original
208+ * text if no broken links were found
192209 */
193210
194211
@@ -200,42 +217,43 @@ public static CompletableFuture<String> replaceDeadLinks(String text, String rep
200217 }
201218
202219 List <CompletableFuture <String >> deadLinkFutures = links .stream ()
203- .distinct ()
204- .map (link -> isLinkBroken (link )
205- .thenApply (isBroken -> Boolean .TRUE .equals (isBroken ) ? link : null ))
220+ .distinct ()
221+ .map (link -> isLinkBroken (link )
222+ .thenApply (isBroken -> Boolean .TRUE .equals (isBroken ) ? link : null ))
206223
207- .toList ();
224+ .toList ();
208225
209226 return CompletableFuture .allOf (deadLinkFutures .toArray (new CompletableFuture [0 ]))
210- .thenApply (ignored -> deadLinkFutures .stream ()
211- .map (CompletableFuture ::join )
212- .filter (Objects ::nonNull )
213- .toList ())
214- .thenApply (deadLinks -> {
215- String result = text ;
216- for (String deadLink : deadLinks ) {
217- result = result .replace (deadLink , replacement );
218- }
219- return result ;
220- });
227+ .thenApply (ignored -> deadLinkFutures .stream ()
228+ .map (CompletableFuture ::join )
229+ .filter (Objects ::nonNull )
230+ .toList ())
231+ .thenApply (deadLinks -> {
232+ String result = text ;
233+ for (String deadLink : deadLinks ) {
234+ result = result .replace (deadLink , replacement );
235+ }
236+ return result ;
237+ });
221238 }
222239
223240 /**
224241 * Converts a detected {@link Url} into a normalized link string.
225242 *
226- * <p>Applies the provided {@link LinkFilter}s:
243+ * <p>
244+ * Applies the provided {@link LinkFilter}s:
227245 * <ul>
228- * <li>{@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets</li>
229- * <li>{@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes</li>
246+ * <li>{@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets</li>
247+ * <li>{@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes</li>
230248 * </ul>
231249 *
232- * <p>Additionally removes trailing punctuation such as commas or periods
233- * from the detected URL.
250+ * <p>
251+ * Additionally removes trailing punctuation such as commas or periods from the detected URL.
234252 *
235253 * @param url the detected URL
236254 * @param filter active link filters to apply
237- * @return an {@link Optional} containing the normalized link,
238- * or {@code Optional.empty()} if the link should be filtered out
255+ * @return an {@link Optional} containing the normalized link, or {@code Optional.empty()} if
256+ * the link should be filtered out
239257 */
240258
241259 private static Optional <String > toLink (Url url , Set <LinkFilter > filter ) {
@@ -258,4 +276,4 @@ private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
258276 }
259277 return Optional .of (link );
260278 }
261- }
279+ }
0 commit comments