99import java .net .http .HttpRequest ;
1010import java .net .http .HttpResponse ;
1111import java .util .List ;
12- import java .util .Objects ;
1312import java .util .Optional ;
1413import java .util .Set ;
1514import java .util .concurrent .CompletableFuture ;
@@ -37,14 +36,8 @@ public class LinkDetection {
3736 * Default filters applied when extracting links from text.
3837 *
3938 * <p>
40- * These filters intentionally ignore:
41- * <ul>
42- * <li>Suppressed links like {@code <https://example.com>}</li>
43- * <li>Non-HTTP(S) schemes such as {@code ftp://} or {@code file://}</li>
44- * </ul>
45- *
46- * <p>
47- * This reduces false positives when scanning chat messages or source-code snippets.
39+ * Links to intentionally ignore in order to reduce false positives when scanning chat messages
40+ * or source-code snippets.
4841 */
4942
5043 private static final Set <LinkFilter > DEFAULT_FILTERS =
@@ -76,15 +69,15 @@ private LinkDetection() {
7669 }
7770
7871 /**
79- * Extracts HTTP(S) links from the given text.
72+ * Extracts links from the given text.
8073 *
8174 * <p>
8275 * The text is scanned using a URL detector, then filtered and normalized according to the
8376 * provided {@link LinkFilter}s.
8477 *
8578 * <p>
8679 * Example:
87- *
80+ *
8881 * <pre>{@code
8982 * Set<LinkFilter> filters = Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
9083 * extractLinks("Visit https://example.com and <ftp://skip.me>", filters)
@@ -98,10 +91,24 @@ private LinkDetection() {
9891
9992 public static List <String > extractLinks (String content , Set <LinkFilter > filter ) {
10093 return new UrlDetector (content , UrlDetectorOptions .BRACKET_MATCH ).detect ()
101- .stream ()
102- .map (url -> toLink (url , filter ))
103- .flatMap (Optional ::stream )
104- .toList ();
94+ .stream ()
95+ .map (url -> toLink (url , filter ))
96+ .flatMap (Optional ::stream )
97+ .toList ();
98+ }
99+
100+ /**
101+ * Extracts links from the given text using default filters.
102+ *
103+ * <p>
104+ * This is a convenience method that uses {@link #DEFAULT_FILTERS}.
105+ *
106+ * @param content the text to scan for links
107+ * @return a list of extracted links in the order they appear in the text
108+ * @see #extractLinks(String, Set)
109+ */
110+ public static List <String > extractLinks (String content ) {
111+ return extractLinks (content , DEFAULT_FILTERS );
105112 }
106113
107114 /**
@@ -122,13 +129,6 @@ public static boolean containsLink(String content) {
122129 * Asynchronously checks whether a URL is considered broken.
123130 *
124131 * <p>
125- * The check is performed in two steps:
126- * <ol>
127- * <li>A {@code HEAD} request is sent first (cheap and fast)</li>
128- * <li>If that fails or returns an error, a {@code GET} request is used as a fallback</li>
129- * </ol>
130- *
131- * <p>
132132 * A link is considered broken if:
133133 * <ul>
134134 * <li>The URL is malformed or unreachable</li>
@@ -146,32 +146,34 @@ public static boolean containsLink(String content) {
146146 */
147147
148148 public static CompletableFuture <Boolean > isLinkBroken (String url ) {
149+ // Try HEAD request first (cheap and fast)
149150 HttpRequest headRequest = HttpRequest .newBuilder (URI .create (url ))
150- .method ("HEAD" , HttpRequest .BodyPublishers .noBody ())
151- .build ();
151+ .method ("HEAD" , HttpRequest .BodyPublishers .noBody ())
152+ .build ();
152153
153154 return HTTP_CLIENT .sendAsync (headRequest , HttpResponse .BodyHandlers .discarding ())
154- .thenApply (response -> {
155- int status = response .statusCode ();
156- // 2xx and 3xx are success, 4xx and 5xx are errors
157- return status >= 400 ;
158- })
159- .exceptionally (ignored -> true )
160- .thenCompose (result -> {
161- if (!Boolean .TRUE .equals (result )) {
162- return CompletableFuture .completedFuture (false );
163- }
164- HttpRequest fallbackGetRequest =
165- HttpRequest .newBuilder (URI .create (url )).GET ().build ();
166- return HTTP_CLIENT
167- .sendAsync (fallbackGetRequest , HttpResponse .BodyHandlers .discarding ())
168- .thenApply (resp -> resp .statusCode () >= 400 )
169- .exceptionally (ignored -> true );
170- });
155+ .thenApply (response -> {
156+ int status = response .statusCode ();
157+ // 2xx and 3xx are success, 4xx and 5xx are errors
158+ return status >= 400 ;
159+ })
160+ .exceptionally (_ -> true )
161+ .thenCompose (result -> {
162+ if (!Boolean .TRUE .equals (result )) {
163+ return CompletableFuture .completedFuture (false );
164+ }
165+ // If HEAD fails, fall back to GET request (some servers don't support HEAD)
166+ HttpRequest fallbackGetRequest =
167+ HttpRequest .newBuilder (URI .create (url )).GET ().build ();
168+ return HTTP_CLIENT
169+ .sendAsync (fallbackGetRequest , HttpResponse .BodyHandlers .discarding ())
170+ .thenApply (resp -> resp .statusCode () >= 400 )
171+ .exceptionally (_ -> true );
172+ });
171173 }
172174
173175 /**
174- * Replaces all broken HTTP(S) links in the given text.
176+ * Replaces all broken links in the given text.
175177 *
176178 * <p>
177179 * Each detected link is checked asynchronously using {@link #isLinkBroken(String)}. Only links
@@ -184,9 +186,9 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
184186 *
185187 * <p>
186188 * Example:
187- *
189+ *
188190 * <pre>{@code
189- * replaceDeadLinks ("""
191+ * replaceBrokenLinks ("""
190192 * Test
191193 * http://deadlink/1
192194 * http://workinglink/1
@@ -195,7 +197,7 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
195197 *
196198 * <p>
197199 * Results in:
198- *
200+ *
199201 * <pre>{@code
200202 * Test
201203 * (broken link)
@@ -208,47 +210,39 @@ public static CompletableFuture<Boolean> isLinkBroken(String url) {
208210 * text if no broken links were found
209211 */
210212
211-
212- public static CompletableFuture <String > replaceDeadLinks (String text , String replacement ) {
213+ public static CompletableFuture <String > replaceBrokenLinks (String text , String replacement ) {
213214 List <String > links = extractLinks (text , DEFAULT_FILTERS );
214215
215216 if (links .isEmpty ()) {
216217 return CompletableFuture .completedFuture (text );
217218 }
218219
219- List <CompletableFuture <String >> deadLinkFutures = links .stream ()
220- .distinct ()
221- .map (link -> isLinkBroken (link )
222- .thenApply (isBroken -> Boolean .TRUE .equals (isBroken ) ? link : null ))
223-
224- .toList ();
220+ List <CompletableFuture <Optional <String >>> brokenLinkFutures = links .stream ()
221+ .distinct ()
222+ .map (link -> isLinkBroken (link )
223+ .thenApply (isBroken -> Boolean .TRUE .equals (isBroken ) ? Optional .of (link ) : Optional .<String >empty ()))
224+ .toList ();
225225
226- return CompletableFuture .allOf (deadLinkFutures .toArray (new CompletableFuture [0 ]))
227- .thenApply (ignored -> deadLinkFutures .stream ()
228- .map (CompletableFuture ::join )
229- . filter ( Objects :: nonNull )
230- .toList ())
231- .thenApply (deadLinks -> {
232- String result = text ;
233- for (String deadLink : deadLinks ) {
234- result = result .replace (deadLink , replacement );
235- }
236- return result ;
237- });
226+ return CompletableFuture .allOf (brokenLinkFutures .toArray (new CompletableFuture [0 ]))
227+ .thenApply (_ -> brokenLinkFutures .stream ()
228+ .map (CompletableFuture ::join )
229+ . flatMap ( Optional :: stream )
230+ .toList ())
231+ .thenApply (brokenLinks -> {
232+ String result = text ;
233+ for (String brokenLink : brokenLinks ) {
234+ result = result .replace (brokenLink , replacement );
235+ }
236+ return result ;
237+ });
238238 }
239239
240240 /**
241241 * Converts a detected {@link Url} into a normalized link string.
242242 *
243243 * <p>
244- * Applies the provided {@link LinkFilter}s:
245- * <ul>
246- * <li>{@link LinkFilter#SUPPRESSED} - filters URLs wrapped in angle brackets</li>
247- * <li>{@link LinkFilter#NON_HTTP_SCHEME} - filters non-HTTP(S) schemes</li>
248- * </ul>
249- *
250- * <p>
251- * Additionally removes trailing punctuation such as commas or periods from the detected URL.
244+ * Applies the provided {@link LinkFilter}s. Additionally removes trailing punctuation such as
245+ * commas or periods from the detected URL.
252246 *
253247 * @param url the detected URL
254248 * @param filter active link filters to apply
@@ -276,4 +270,4 @@ private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
276270 }
277271 return Optional .of (link );
278272 }
279- }
273+ }
0 commit comments