Skip to content

Commit 2edd72e

Browse files
Mark-SimulacrumJoshua Nelson
authored and
Joshua Nelson
committed
Migrate to kuchiki
1 parent 4e76142 commit 2edd72e

File tree

7 files changed

+108
-252
lines changed

7 files changed

+108
-252
lines changed

Cargo.lock

-100
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ badge = { path = "src/web/badge" }
2424
failure = "0.1.3"
2525
comrak = { version = "0.3", default-features = false }
2626
toml = "0.5"
27-
html5ever = "0.22"
27+
kuchiki = "0.8"
2828
schemamama = "0.3"
2929
schemamama_postgres = "0.2"
3030
rusoto_s3 = "0.40"
@@ -77,7 +77,6 @@ features = ["with-chrono", "with-serde_json"]
7777

7878
[dev-dependencies]
7979
once_cell = "1.2.0"
80-
kuchiki = "0.8"
8180
criterion = "0.3"
8281
rand = "0.7.3"
8382

src/utils/html.rs

+27-68
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,38 @@
11
use crate::error::Result;
22
use failure::err_msg;
3-
4-
use html5ever::driver::{parse_document, ParseOpts};
5-
use html5ever::rcdom::{Handle, NodeData, RcDom};
6-
use html5ever::serialize::{serialize, SerializeOpts};
7-
use html5ever::tendril::TendrilSink;
3+
use kuchiki::traits::TendrilSink;
4+
use kuchiki::NodeRef;
85

96
/// Extracts the contents of the `<head>` and `<body>` tags from an HTML document, as well as the
107
/// classes on the `<body>` tag, if any.
118
pub fn extract_head_and_body(html: &str) -> Result<(String, String, String)> {
12-
let parser = parse_document(RcDom::default(), ParseOpts::default());
13-
let dom = parser.one(html);
14-
15-
let (head, body) = extract_from_rcdom(&dom)?;
16-
let class = extract_class(&body);
17-
18-
Ok((stringify(head), stringify(body), class))
9+
let dom = kuchiki::parse_html().one(html);
10+
11+
let head = dom
12+
.select_first("head")
13+
.map_err(|_| err_msg("couldn't find <head> tag in rustdoc output"))?;
14+
let body = dom
15+
.select_first("body")
16+
.map_err(|_| err_msg("couldn't find <body> tag in rustdoc output"))?;
17+
18+
let class = body
19+
.attributes
20+
.borrow()
21+
.get("class")
22+
.map(|v| v.to_owned())
23+
.unwrap_or_default();
24+
25+
Ok((serialize(head.as_node()), serialize(body.as_node()), class))
1926
}
2027

21-
fn extract_from_rcdom(dom: &RcDom) -> Result<(Handle, Handle)> {
22-
let mut worklist = vec![dom.document.clone()];
23-
let (mut head, mut body) = (None, None);
24-
25-
while let Some(handle) = worklist.pop() {
26-
if let NodeData::Element { ref name, .. } = handle.data {
27-
match name.local.as_ref() {
28-
"head" => {
29-
if head.is_some() {
30-
return Err(err_msg("duplicate <head> tag"));
31-
} else {
32-
head = Some(handle.clone());
33-
}
34-
}
35-
36-
"body" => {
37-
if body.is_some() {
38-
return Err(err_msg("duplicate <body> tag"));
39-
} else {
40-
body = Some(handle.clone());
41-
}
42-
}
43-
44-
_ => {} // do nothing
45-
}
46-
}
47-
48-
worklist.extend(handle.children.borrow().iter().cloned());
49-
}
50-
51-
let head = head.ok_or_else(|| err_msg("couldn't find <head> tag in rustdoc output"))?;
52-
let body = body.ok_or_else(|| err_msg("couldn't find <body> tag in rustdoc output"))?;
53-
Ok((head, body))
54-
}
55-
56-
fn stringify(node: Handle) -> String {
57-
let mut vec = Vec::new();
58-
serialize(&mut vec, &node, SerializeOpts::default()).expect("serializing into buffer failed");
59-
60-
String::from_utf8(vec).expect("html5ever returned non-utf8 data")
61-
}
62-
63-
fn extract_class(node: &Handle) -> String {
64-
match node.data {
65-
NodeData::Element { ref attrs, .. } => {
66-
let attrs = attrs.borrow();
67-
68-
attrs
69-
.iter()
70-
.find(|a| &a.name.local == "class")
71-
.map_or(String::new(), |a| a.value.to_string())
72-
}
73-
74-
_ => String::new(),
28+
fn serialize(v: &NodeRef) -> String {
29+
let mut contents = Vec::new();
30+
for child in v.children() {
31+
child
32+
.serialize(&mut contents)
33+
.expect("serialization failed");
7534
}
35+
String::from_utf8(contents).expect("non utf-8 html")
7636
}
7737

7838
#[cfg(test)]
@@ -82,8 +42,7 @@ mod test {
8242
let (head, body, class) = super::extract_head_and_body(
8343
r#"<head><meta name="generator" content="rustdoc"></head><body class="rustdoc struct"><p>hello</p>"#
8444
).unwrap();
85-
86-
assert_eq!(head, r#"<meta name="generator" content="rustdoc">"#);
45+
assert_eq!(head, r#"<meta content="rustdoc" name="generator">"#);
8746
assert_eq!(body, "<p>hello</p>");
8847
assert_eq!(class, "rustdoc struct");
8948
}

src/web/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ mod test {
581581
test::*,
582582
web::{handlebars_engine, match_version},
583583
};
584-
use html5ever::tendril::TendrilSink;
584+
use kuchiki::traits::TendrilSink;
585585
use serde_json::json;
586586

587587
fn release(version: &str, db: &TestDatabase) -> i32 {

src/web/rustdoc.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ mod test {
567567
use super::*;
568568
use crate::test::*;
569569
use chrono::Utc;
570+
use kuchiki::traits::TendrilSink;
570571
use reqwest::StatusCode;
571572
use serde_json::json;
572573
use std::{collections::BTreeMap, iter::FromIterator};
@@ -575,7 +576,6 @@ mod test {
575576
path: &str,
576577
web: &TestFrontend,
577578
) -> Result<Option<String>, failure::Error> {
578-
use html5ever::tendril::TendrilSink;
579579
assert_success(path, web)?;
580580
let data = web.get(path).send()?.text()?;
581581
println!("{}", data);
@@ -872,7 +872,6 @@ mod test {
872872
#[test]
873873
fn yanked_release_shows_warning_in_nav() {
874874
fn has_yanked_warning(path: &str, web: &TestFrontend) -> Result<bool, failure::Error> {
875-
use html5ever::tendril::TendrilSink;
876875
assert_success(path, web)?;
877876
let data = web.get(path).send()?.text()?;
878877
Ok(kuchiki::parse_html()
@@ -1083,7 +1082,6 @@ mod test {
10831082
path: &str,
10841083
web: &TestFrontend,
10851084
) -> Result<Vec<(String, String)>, failure::Error> {
1086-
use html5ever::tendril::TendrilSink;
10871085
assert_success(path, web)?;
10881086
let data = web.get(path).send()?.text()?;
10891087
let dom = kuchiki::parse_html().one(data);

0 commit comments

Comments
 (0)