Skip to content
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,24 @@
import com.linkedin.urls.detection.UrlDetector;
import com.linkedin.urls.detection.UrlDetectorOptions;

import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;

/**
* Utility class to detect links.
*/
public class LinkDetection {
private static final HttpClient HTTP_CLIENT = HttpClient.newHttpClient();

private static final Set<LinkFilter> DEFAULT_FILTERS =
Set.of(LinkFilter.SUPPRESSED, LinkFilter.NON_HTTP_SCHEME);
Comment thread
barsh404error marked this conversation as resolved.
Comment thread
Zabuzard marked this conversation as resolved.

/**
* Possible ways to filter a link.
Expand Down Expand Up @@ -58,6 +68,56 @@ public static boolean containsLink(String content) {
return !(new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect().isEmpty());
}

public static CompletableFuture<Boolean> isLinkBroken(String url) {
HttpRequest headRequest = HttpRequest.newBuilder(URI.create(url))
Comment thread
barsh404error marked this conversation as resolved.
Comment thread
Zabuzard marked this conversation as resolved.
.method("HEAD", HttpRequest.BodyPublishers.noBody())
.build();

return HTTP_CLIENT.sendAsync(headRequest, HttpResponse.BodyHandlers.discarding())
.thenApply(response -> {
int status = response.statusCode();
return status < 200 || status >= 400;
})
.exceptionally(ignored -> true)
Comment thread
Zabuzard marked this conversation as resolved.
Outdated
.thenCompose(result -> {
if (!Boolean.TRUE.equals(result)) {
return CompletableFuture.completedFuture(false);
}
HttpRequest getRequest = HttpRequest.newBuilder(URI.create(url)).GET().build();
Comment thread
Zabuzard marked this conversation as resolved.
Outdated
return HTTP_CLIENT.sendAsync(getRequest, HttpResponse.BodyHandlers.discarding())
.thenApply(resp -> resp.statusCode() >= 400)
.exceptionally(ignored -> true); // still never null
});
Comment thread
barsh404error marked this conversation as resolved.
}

public static CompletableFuture<String> replaceDeadLinks(String text, String replacement) {
Comment thread
tj-wazei marked this conversation as resolved.
Outdated
Comment thread
Zabuzard marked this conversation as resolved.
Outdated
List<String> links = extractLinks(text, DEFAULT_FILTERS);

if (links.isEmpty()) {
return CompletableFuture.completedFuture(text);
}

List<CompletableFuture<String>> deadLinkFutures = links.stream()
.distinct()
.map(link -> isLinkBroken(link)
.thenApply(isBroken -> Boolean.TRUE.equals(isBroken) ? link : null))
Comment thread
Zabuzard marked this conversation as resolved.
Comment thread
Zabuzard marked this conversation as resolved.
Outdated

Comment thread
barsh404error marked this conversation as resolved.
.toList();

return CompletableFuture.allOf(deadLinkFutures.toArray(new CompletableFuture[0]))
Comment thread
barsh404error marked this conversation as resolved.
Outdated
.thenApply(ignored -> deadLinkFutures.stream()
Comment thread
Zabuzard marked this conversation as resolved.
Outdated
.map(CompletableFuture::join)
.filter(Objects::nonNull)
Comment thread
Zabuzard marked this conversation as resolved.
.toList())
.thenApply(deadLinks -> {
String result = text;
for (String deadLink : deadLinks) {
result = result.replace(deadLink, replacement);
Comment thread
Zabuzard marked this conversation as resolved.
Outdated
}
return result;
});
}

private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
String raw = url.getOriginalUrl();
if (filter.contains(LinkFilter.SUPPRESSED) && raw.contains(">")) {
Expand All @@ -76,7 +136,6 @@ private static Optional<String> toLink(Url url, Set<LinkFilter> filter) {
// Remove trailing punctuation
link = link.substring(0, link.length() - 1);
}

return Optional.of(link);
}

Expand Down
Loading