Skip to content

Commit 9b0a675

Browse files
committed
Finish next release.
1 parent c25cf08 commit 9b0a675

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

atra/src/extraction/html.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -234,20 +234,18 @@ mod selectors {
234234
Base can have anything except data: and javascript:
235235
*/
236236

237+
/// A matcher for href locations
237238
pub static HREF_LOCATION_MATCHER: Lazy<Regex> =
238239
Lazy::new(|| Regex::new("location\\s*\\.\\s*href\\s*=\\s*'\\s*([^']*)\\s*'\\s*;?").unwrap());
239240

240-
// todo: use form action https://github.com/apache/nutch/blob/master/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java#L330
241241
// Ignore [ping] of area/a
242242
static_selectors! {
243243
pub [
244244
BASE = "base"
245245
HREF_HOLDER = "a,area,link"
246246
SRC_HOLDER = "audio,embed,iframe,img,input,source,track,video"
247247
SCRIPT_HOLDER = "script"
248-
// TARGET_ELEMENTS = "a,area,base,link,script,audio,embed,iframe,img,input,script,source,track,video"
249248
ON_CLICK = "[onclick]"
250-
// SCRIPT = "script"
251249
FORM_HOLDER = "form[action]"
252250
META_NO_FOLLOW = "meta[name=\"robots\"][content=\"nofollow\"]"
253251
]

0 commit comments

Comments
 (0)