Skip to content

Commit 0587d3e

Browse files
authored
Fixed bugs with link preview (#706)
* Ensure that link preview failure does not go silent and stuck * Fixed some issues with false positive URL matches * fixed more issues with link preview * removed trailing punctuation * some website use meta name instead of meta property * Code polish
1 parent b80feef commit 0587d3e

File tree

1 file changed

+49
-13
lines changed

1 file changed

+49
-13
lines changed

application/src/main/java/org/togetherjava/tjbot/commands/utils/LinkPreviews.java

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
import java.net.http.HttpClient;
1818
import java.net.http.HttpRequest;
1919
import java.net.http.HttpResponse;
20+
import java.time.Duration;
2021
import java.util.Collection;
2122
import java.util.List;
2223
import java.util.Optional;
2324
import java.util.concurrent.CompletableFuture;
25+
import java.util.concurrent.TimeUnit;
2426
import java.util.function.Predicate;
2527
import java.util.stream.IntStream;
2628

@@ -34,7 +36,8 @@ public final class LinkPreviews {
3436
private static final String IMAGE_CONTENT_TYPE_PREFIX = "image";
3537
private static final String IMAGE_META_NAME = "image";
3638

37-
private static final HttpClient CLIENT = HttpClient.newHttpClient();
39+
private static final HttpClient CLIENT =
40+
HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
3841

3942
private LinkPreviews() {
4043
throw new UnsupportedOperationException("Utility class");
@@ -49,10 +52,33 @@ private LinkPreviews() {
4952
public static List<String> extractLinks(String content) {
5053
return new UrlDetector(content, UrlDetectorOptions.BRACKET_MATCH).detect()
5154
.stream()
52-
.map(Url::getFullUrl)
55+
.map(LinkPreviews::toLink)
56+
.flatMap(Optional::stream)
5357
.toList();
5458
}
5559

60+
private static Optional<String> toLink(Url url) {
61+
String raw = url.getOriginalUrl();
62+
if (raw.contains(">")) {
63+
// URL escapes, such as "<http://example.com>" should be skipped
64+
return Optional.empty();
65+
}
66+
// Not interested in other schemes, also to filter out matches without scheme.
67+
// It detects a lot of such false-positives in Java snippets
68+
if (!raw.startsWith("http")) {
69+
return Optional.empty();
70+
}
71+
72+
String link = url.getFullUrl();
73+
74+
if (link.endsWith(",") || link.endsWith(".")) {
75+
// Remove trailing punctuation
76+
link = link.substring(0, link.length() - 1);
77+
}
78+
79+
return Optional.of(link);
80+
}
81+
5682
/**
5783
* Attempts to create previews of all given links.
5884
* <p>
@@ -75,7 +101,10 @@ public static CompletableFuture<List<LinkPreview>> createLinkPreviews(List<Strin
75101
.toList();
76102

77103
var allDoneTask = CompletableFuture.allOf(tasks.toArray(CompletableFuture[]::new));
78-
return allDoneTask.thenApply(any -> extractResults(tasks));
104+
return allDoneTask.thenApply(any -> extractResults(tasks)).exceptionally(e -> {
105+
logger.error("Unknown error during link preview creation", e);
106+
return List.of();
107+
});
79108
}
80109

81110
private static <T> List<T> extractResults(
@@ -103,6 +132,9 @@ private static CompletableFuture<Optional<LinkPreview>> createLinkPreview(String
103132
return parseWebsite(link, attachmentName, content.dataStream);
104133
}
105134
return noResult();
135+
}).orTimeout(10, TimeUnit.SECONDS).exceptionally(e -> {
136+
logger.warn("Failed to create link preview for {}", link, e);
137+
return Optional.empty();
106138
});
107139
}
108140

@@ -142,7 +174,8 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
142174
try {
143175
doc = Jsoup.parse(websiteContent, null, link);
144176
} catch (IOException e) {
145-
logger.warn("Attempted to create a preview for {}, but the content invalid.", link, e);
177+
logger.warn("Attempted to create a preview for {}, but the content is invalid.", link,
178+
e);
146179
return noResult();
147180
}
148181

@@ -152,7 +185,7 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
152185

153186
LinkPreview textPreview = LinkPreview.ofText(title, link, description);
154187

155-
String image = parseOpenGraphMeta(doc, IMAGE_META_NAME).orElse(null);
188+
String image = parseOpenGraphTwitterMeta(doc, IMAGE_META_NAME, null).orElse(null);
156189
if (image == null) {
157190
return result(textPreview);
158191
}
@@ -173,24 +206,27 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
173206

174207
private static Optional<String> parseOpenGraphTwitterMeta(Document doc, String metaProperty,
175208
@Nullable String fallback) {
176-
String value = Optional
177-
.ofNullable(doc.selectFirst("meta[property=og:%s]".formatted(metaProperty)))
178-
.or(() -> Optional
179-
.ofNullable(doc.selectFirst("meta[property=twitter:%s]".formatted(metaProperty))))
180-
.map(element -> element.attr("content"))
209+
String value = parseMetaProperty(doc, "og:" + metaProperty)
210+
.or(() -> parseMetaProperty(doc, "twitter:" + metaProperty))
181211
.orElse(fallback);
212+
182213
if (value == null) {
183214
return Optional.empty();
184215
}
185216
return value.isBlank() ? Optional.empty() : Optional.of(value);
186217
}
187218

188-
private static Optional<String> parseOpenGraphMeta(Document doc, String metaProperty) {
189-
return Optional.ofNullable(doc.selectFirst("meta[property=og:%s]".formatted(metaProperty)))
190-
.map(element -> element.attr("content"))
219+
private static Optional<String> parseMetaProperty(Document doc, String metaProperty) {
220+
return selectFirstMetaTag(doc, "property", metaProperty)
221+
.or(() -> selectFirstMetaTag(doc, "name", metaProperty))
191222
.filter(Predicate.not(String::isBlank));
192223
}
193224

225+
private static Optional<String> selectFirstMetaTag(Document doc, String key, String value) {
226+
return Optional.ofNullable(doc.selectFirst("meta[%s=%s]".formatted(key, value)))
227+
.map(element -> element.attr("content"));
228+
}
229+
194230
private static <T> CompletableFuture<Optional<T>> noResult() {
195231
return CompletableFuture.completedFuture(Optional.empty());
196232
}

0 commit comments

Comments
 (0)