1
1
use crate :: error:: Result ;
2
2
use failure:: err_msg;
3
-
4
- use html5ever:: driver:: { parse_document, ParseOpts } ;
5
- use html5ever:: rcdom:: { Handle , NodeData , RcDom } ;
6
- use html5ever:: serialize:: { serialize, SerializeOpts } ;
7
- use html5ever:: tendril:: TendrilSink ;
3
+ use kuchiki:: traits:: TendrilSink ;
4
+ use kuchiki:: NodeRef ;
8
5
9
6
/// Extracts the contents of the `<head>` and `<body>` tags from an HTML document, as well as the
10
7
/// classes on the `<body>` tag, if any.
11
8
pub fn extract_head_and_body ( html : & str ) -> Result < ( String , String , String ) > {
12
- let parser = parse_document ( RcDom :: default ( ) , ParseOpts :: default ( ) ) ;
13
- let dom = parser. one ( html) ;
14
-
15
- let ( head, body) = extract_from_rcdom ( & dom) ?;
16
- let class = extract_class ( & body) ;
17
-
18
- Ok ( ( stringify ( head) , stringify ( body) , class) )
9
+ let dom = kuchiki:: parse_html ( ) . one ( html) ;
10
+
11
+ let head = dom
12
+ . select_first ( "head" )
13
+ . map_err ( |_| err_msg ( "couldn't find <head> tag in rustdoc output" ) ) ?;
14
+ let body = dom
15
+ . select_first ( "body" )
16
+ . map_err ( |_| err_msg ( "couldn't find <body> tag in rustdoc output" ) ) ?;
17
+
18
+ let class = body
19
+ . attributes
20
+ . borrow ( )
21
+ . get ( "class" )
22
+ . map ( |v| v. to_owned ( ) )
23
+ . unwrap_or_default ( ) ;
24
+
25
+ Ok ( ( serialize ( head. as_node ( ) ) , serialize ( body. as_node ( ) ) , class) )
19
26
}
20
27
21
- fn extract_from_rcdom ( dom : & RcDom ) -> Result < ( Handle , Handle ) > {
22
- let mut worklist = vec ! [ dom. document. clone( ) ] ;
23
- let ( mut head, mut body) = ( None , None ) ;
24
-
25
- while let Some ( handle) = worklist. pop ( ) {
26
- if let NodeData :: Element { ref name, .. } = handle. data {
27
- match name. local . as_ref ( ) {
28
- "head" => {
29
- if head. is_some ( ) {
30
- return Err ( err_msg ( "duplicate <head> tag" ) ) ;
31
- } else {
32
- head = Some ( handle. clone ( ) ) ;
33
- }
34
- }
35
-
36
- "body" => {
37
- if body. is_some ( ) {
38
- return Err ( err_msg ( "duplicate <body> tag" ) ) ;
39
- } else {
40
- body = Some ( handle. clone ( ) ) ;
41
- }
42
- }
43
-
44
- _ => { } // do nothing
45
- }
46
- }
47
-
48
- worklist. extend ( handle. children . borrow ( ) . iter ( ) . cloned ( ) ) ;
49
- }
50
-
51
- let head = head. ok_or_else ( || err_msg ( "couldn't find <head> tag in rustdoc output" ) ) ?;
52
- let body = body. ok_or_else ( || err_msg ( "couldn't find <body> tag in rustdoc output" ) ) ?;
53
- Ok ( ( head, body) )
54
- }
55
-
56
- fn stringify ( node : Handle ) -> String {
57
- let mut vec = Vec :: new ( ) ;
58
- serialize ( & mut vec, & node, SerializeOpts :: default ( ) ) . expect ( "serializing into buffer failed" ) ;
59
-
60
- String :: from_utf8 ( vec) . expect ( "html5ever returned non-utf8 data" )
61
- }
62
-
63
- fn extract_class ( node : & Handle ) -> String {
64
- match node. data {
65
- NodeData :: Element { ref attrs, .. } => {
66
- let attrs = attrs. borrow ( ) ;
67
-
68
- attrs
69
- . iter ( )
70
- . find ( |a| & a. name . local == "class" )
71
- . map_or ( String :: new ( ) , |a| a. value . to_string ( ) )
72
- }
73
-
74
- _ => String :: new ( ) ,
28
+ fn serialize ( v : & NodeRef ) -> String {
29
+ let mut contents = Vec :: new ( ) ;
30
+ for child in v. children ( ) {
31
+ child
32
+ . serialize ( & mut contents)
33
+ . expect ( "serialization failed" ) ;
75
34
}
35
+ String :: from_utf8 ( contents) . expect ( "non utf-8 html" )
76
36
}
77
37
78
38
#[ cfg( test) ]
@@ -82,8 +42,7 @@ mod test {
82
42
let ( head, body, class) = super :: extract_head_and_body (
83
43
r#"<head><meta name="generator" content="rustdoc"></head><body class="rustdoc struct"><p>hello</p>"#
84
44
) . unwrap ( ) ;
85
-
86
- assert_eq ! ( head, r#"<meta name="generator" content="rustdoc">"# ) ;
45
+ assert_eq ! ( head, r#"<meta content="rustdoc" name="generator">"# ) ;
87
46
assert_eq ! ( body, "<p>hello</p>" ) ;
88
47
assert_eq ! ( class, "rustdoc struct" ) ;
89
48
}
0 commit comments