|
| 1 | +import re |
| 2 | +import unittest |
| 3 | + |
| 4 | +from html_toc import HtmlTocParser |
| 5 | + |
| 6 | + |
| 7 | +def html_same(html1, html2): |
| 8 | + return re.sub(r"\s", "", html1) == re.sub(r"\s", "", html2) |
| 9 | + |
| 10 | + |
| 11 | +html_example = """ |
| 12 | +<h6>a <code>very</code> small title</h6> |
| 13 | +<h1>Title</h1> |
| 14 | + <h2>Title</h2> |
| 15 | + <h4>Another title, <strong>yes</strong>!</h4> |
| 16 | + <h3>中文,标题 Title&</h3> |
| 17 | + <p>a random paragraph...<br/></p> |
| 18 | + & < |
| 19 | + <!-- comment --> |
| 20 | +<h1>Another-h1-1</h1> |
| 21 | + <h5>a very small title</h5> |
| 22 | +""" |
| 23 | + |
| 24 | + |
| 25 | +class Test(unittest.TestCase): |
| 26 | + def test_empty(self): |
| 27 | + parser = HtmlTocParser() |
| 28 | + parser.feed("") |
| 29 | + assert parser.toc() == [] |
| 30 | + assert parser.toc_html() == "" |
| 31 | + |
| 32 | + def test_basic(self): |
| 33 | + parser = HtmlTocParser() |
| 34 | + parser.feed('<a href="#">no-effect</a>') |
| 35 | + assert html_same(parser.html, '<a href="#">no-effect</a>') |
| 36 | + |
| 37 | + parser.feed("<h1><strong>T</strong>itle</h1>") |
| 38 | + assert html_same( |
| 39 | + parser.html, |
| 40 | + '<a href="#">no-effect</a><h1><a id="Title" href="#Title" ' |
| 41 | + 'class="anchor"></a><strong>T</strong>itle</h1>', |
| 42 | + ) |
| 43 | + |
| 44 | + def test_complex(self): |
| 45 | + parser = HtmlTocParser() |
| 46 | + parser.feed(html_example) |
| 47 | + expected_toc = [ |
| 48 | + { |
| 49 | + "level": 6, |
| 50 | + "id": "a-very-small-title", |
| 51 | + "text": "a very small title", |
| 52 | + "inner_html": "a <code>very</code> small title", |
| 53 | + "children": [], |
| 54 | + }, |
| 55 | + { |
| 56 | + "level": 1, |
| 57 | + "id": "Title", |
| 58 | + "text": "Title", |
| 59 | + "inner_html": "Title", |
| 60 | + "children": [ |
| 61 | + { |
| 62 | + "level": 2, |
| 63 | + "id": "Title_1", |
| 64 | + "text": "Title", |
| 65 | + "inner_html": "Title", |
| 66 | + "children": [ |
| 67 | + { |
| 68 | + "level": 4, |
| 69 | + "id": "Another-title-yes", |
| 70 | + "text": "Another title, yes!", |
| 71 | + "inner_html": "Another title, <strong>yes</strong>!", |
| 72 | + "children": [], |
| 73 | + }, |
| 74 | + { |
| 75 | + "level": 3, |
| 76 | + "id": "中文-标题-Title-amp", |
| 77 | + "text": "中文,标题 Title&", |
| 78 | + "inner_html": "中文,标题 Title&", |
| 79 | + "children": [], |
| 80 | + }, |
| 81 | + ], |
| 82 | + } |
| 83 | + ], |
| 84 | + }, |
| 85 | + { |
| 86 | + "level": 1, |
| 87 | + "id": "Another-h1-1", |
| 88 | + "text": "Another-h1-1", |
| 89 | + "inner_html": "Another-h1-1", |
| 90 | + "children": [ |
| 91 | + { |
| 92 | + "level": 5, |
| 93 | + "id": "a-very-small-title_1", |
| 94 | + "text": "a very small title", |
| 95 | + "inner_html": "a very small title", |
| 96 | + "children": [], |
| 97 | + } |
| 98 | + ], |
| 99 | + }, |
| 100 | + ] |
| 101 | + assert parser.toc() == expected_toc |
| 102 | + |
| 103 | + expected_toc_html = """ |
| 104 | + <ul> |
| 105 | + <li><a href="#Title">Title</a> |
| 106 | + <ul> |
| 107 | + <li><a href="#Title_1">Title</a></li> |
| 108 | + </ul> |
| 109 | + </li> |
| 110 | + <li><a href="#Another-h1-1">Another-h1-1</a> |
| 111 | + <ul> |
| 112 | + <li><a href="#a-very-small-title_1">a very small title</a></li> |
| 113 | + </ul> |
| 114 | + </li> |
| 115 | + </ul> |
| 116 | + """ |
| 117 | + assert html_same(parser.toc_html(depth=2, lowest_level=5), expected_toc_html) |
| 118 | + |
| 119 | + |
| 120 | +if __name__ == "__main__": |
| 121 | + unittest.main() |
0 commit comments