17
17
import java .net .http .HttpClient ;
18
18
import java .net .http .HttpRequest ;
19
19
import java .net .http .HttpResponse ;
20
+ import java .time .Duration ;
20
21
import java .util .Collection ;
21
22
import java .util .List ;
22
23
import java .util .Optional ;
23
24
import java .util .concurrent .CompletableFuture ;
25
+ import java .util .concurrent .TimeUnit ;
24
26
import java .util .function .Predicate ;
25
27
import java .util .stream .IntStream ;
26
28
@@ -34,7 +36,8 @@ public final class LinkPreviews {
34
36
private static final String IMAGE_CONTENT_TYPE_PREFIX = "image" ;
35
37
private static final String IMAGE_META_NAME = "image" ;
36
38
37
- private static final HttpClient CLIENT = HttpClient .newHttpClient ();
39
+ private static final HttpClient CLIENT =
40
+ HttpClient .newBuilder ().connectTimeout (Duration .ofSeconds (10 )).build ();
38
41
39
42
private LinkPreviews () {
40
43
throw new UnsupportedOperationException ("Utility class" );
@@ -49,10 +52,33 @@ private LinkPreviews() {
49
52
public static List <String > extractLinks (String content ) {
50
53
return new UrlDetector (content , UrlDetectorOptions .BRACKET_MATCH ).detect ()
51
54
.stream ()
52
- .map (Url ::getFullUrl )
55
+ .map (LinkPreviews ::toLink )
56
+ .flatMap (Optional ::stream )
53
57
.toList ();
54
58
}
55
59
60
+ private static Optional <String > toLink (Url url ) {
61
+ String raw = url .getOriginalUrl ();
62
+ if (raw .contains (">" )) {
63
+ // URL escapes, such as "<http://example.com>" should be skipped
64
+ return Optional .empty ();
65
+ }
66
+ // Not interested in other schemes, also to filter out matches without scheme.
67
+ // It detects a lot of such false-positives in Java snippets
68
+ if (!raw .startsWith ("http" )) {
69
+ return Optional .empty ();
70
+ }
71
+
72
+ String link = url .getFullUrl ();
73
+
74
+ if (link .endsWith ("," ) || link .endsWith ("." )) {
75
+ // Remove trailing punctuation
76
+ link = link .substring (0 , link .length () - 1 );
77
+ }
78
+
79
+ return Optional .of (link );
80
+ }
81
+
56
82
/**
57
83
* Attempts to create previews of all given links.
58
84
* <p>
@@ -75,7 +101,10 @@ public static CompletableFuture<List<LinkPreview>> createLinkPreviews(List<Strin
75
101
.toList ();
76
102
77
103
var allDoneTask = CompletableFuture .allOf (tasks .toArray (CompletableFuture []::new ));
78
- return allDoneTask .thenApply (any -> extractResults (tasks ));
104
+ return allDoneTask .thenApply (any -> extractResults (tasks )).exceptionally (e -> {
105
+ logger .error ("Unknown error during link preview creation" , e );
106
+ return List .of ();
107
+ });
79
108
}
80
109
81
110
private static <T > List <T > extractResults (
@@ -103,6 +132,9 @@ private static CompletableFuture<Optional<LinkPreview>> createLinkPreview(String
103
132
return parseWebsite (link , attachmentName , content .dataStream );
104
133
}
105
134
return noResult ();
135
+ }).orTimeout (10 , TimeUnit .SECONDS ).exceptionally (e -> {
136
+ logger .warn ("Failed to create link preview for {}" , link , e );
137
+ return Optional .empty ();
106
138
});
107
139
}
108
140
@@ -142,7 +174,8 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
142
174
try {
143
175
doc = Jsoup .parse (websiteContent , null , link );
144
176
} catch (IOException e ) {
145
- logger .warn ("Attempted to create a preview for {}, but the content invalid." , link , e );
177
+ logger .warn ("Attempted to create a preview for {}, but the content is invalid." , link ,
178
+ e );
146
179
return noResult ();
147
180
}
148
181
@@ -152,7 +185,7 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
152
185
153
186
LinkPreview textPreview = LinkPreview .ofText (title , link , description );
154
187
155
- String image = parseOpenGraphMeta (doc , IMAGE_META_NAME ).orElse (null );
188
+ String image = parseOpenGraphTwitterMeta (doc , IMAGE_META_NAME , null ).orElse (null );
156
189
if (image == null ) {
157
190
return result (textPreview );
158
191
}
@@ -173,24 +206,27 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
173
206
174
207
private static Optional <String > parseOpenGraphTwitterMeta (Document doc , String metaProperty ,
175
208
@ Nullable String fallback ) {
176
- String value = Optional
177
- .ofNullable (doc .selectFirst ("meta[property=og:%s]" .formatted (metaProperty )))
178
- .or (() -> Optional
179
- .ofNullable (doc .selectFirst ("meta[property=twitter:%s]" .formatted (metaProperty ))))
180
- .map (element -> element .attr ("content" ))
209
+ String value = parseMetaProperty (doc , "og:" + metaProperty )
210
+ .or (() -> parseMetaProperty (doc , "twitter:" + metaProperty ))
181
211
.orElse (fallback );
212
+
182
213
if (value == null ) {
183
214
return Optional .empty ();
184
215
}
185
216
return value .isBlank () ? Optional .empty () : Optional .of (value );
186
217
}
187
218
188
- private static Optional <String > parseOpenGraphMeta (Document doc , String metaProperty ) {
189
- return Optional . ofNullable (doc . selectFirst ( "meta[ property=og:%s]" . formatted ( metaProperty )) )
190
- .map ( element -> element . attr ( "content" ))
219
+ private static Optional <String > parseMetaProperty (Document doc , String metaProperty ) {
220
+ return selectFirstMetaTag (doc , " property" , metaProperty )
221
+ .or (() -> selectFirstMetaTag ( doc , "name" , metaProperty ))
191
222
.filter (Predicate .not (String ::isBlank ));
192
223
}
193
224
225
+ private static Optional <String > selectFirstMetaTag (Document doc , String key , String value ) {
226
+ return Optional .ofNullable (doc .selectFirst ("meta[%s=%s]" .formatted (key , value )))
227
+ .map (element -> element .attr ("content" ));
228
+ }
229
+
194
230
private static <T > CompletableFuture <Optional <T >> noResult () {
195
231
return CompletableFuture .completedFuture (Optional .empty ());
196
232
}
0 commit comments