Skip to content

Commit 93c3555

Browse files
authored
Move to pytest4/5
This largely involves moving away from using generators as tests
1 parent 5cd73ef commit 93c3555

7 files changed

+98
-119
lines changed

.pytest.expect

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
pytest-expect file v1
2-
(2, 7, 11, 'final', 0)
3-
b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
4-
b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
2+
(2, 7, 18, 'final', 0)
3+
b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
4+
b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
55
u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
66
u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
77
u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL

html5lib/tests/test_encoding.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
7575
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")
7676

7777

78-
def runParserEncodingTest(data, encoding):
78+
def param_encoding():
79+
for filename in get_data_files("encoding"):
80+
tests = _TestData(filename, b"data", encoding=None)
81+
for test in tests:
82+
yield test[b'data'], test[b'encoding']
83+
84+
85+
@pytest.mark.parametrize("data, encoding", param_encoding())
86+
def test_parser_encoding(data, encoding):
7987
p = HTMLParser()
8088
assert p.documentEncoding is None
8189
p.parse(data, useChardet=False)
@@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
8492
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
8593

8694

87-
def runPreScanEncodingTest(data, encoding):
95+
@pytest.mark.parametrize("data, encoding", param_encoding())
96+
def test_prescan_encoding(data, encoding):
8897
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
8998
encoding = encoding.lower().decode("ascii")
9099

@@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
95104
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)
96105

97106

98-
def test_encoding():
99-
for filename in get_data_files("encoding"):
100-
tests = _TestData(filename, b"data", encoding=None)
101-
for test in tests:
102-
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
103-
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
104-
105-
106107
# pylint:disable=wrong-import-position
107108
try:
108109
import chardet # noqa

html5lib/tests/test_sanitizer.py

+25-20
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,11 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3+
import pytest
4+
35
from html5lib import constants, parseFragment, serialize
46
from html5lib.filters import sanitizer
57

68

7-
def runSanitizerTest(_, expected, input):
8-
parsed = parseFragment(expected)
9-
expected = serialize(parsed,
10-
omit_optional_tags=False,
11-
use_trailing_solidus=True,
12-
space_before_trailing_solidus=False,
13-
quote_attr_values="always",
14-
quote_char='"',
15-
alphabetical_attributes=True)
16-
assert expected == sanitize_html(input)
17-
18-
199
def sanitize_html(stream):
2010
parsed = parseFragment(stream)
2111
serialized = serialize(parsed,
@@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
5949
assert expected == sanitized
6050

6151

62-
def test_sanitizer():
52+
def param_sanitizer():
6353
for ns, tag_name in sanitizer.allowed_elements:
6454
if ns != constants.namespaces["html"]:
6555
continue
6656
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
6757
'tfoot', 'th', 'thead', 'tr', 'select']:
6858
continue # TODO
6959
if tag_name == 'image':
70-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
60+
yield ("test_should_allow_%s_tag" % tag_name,
7161
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
7262
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
7363
elif tag_name == 'br':
74-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
64+
yield ("test_should_allow_%s_tag" % tag_name,
7565
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
7666
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
7767
elif tag_name in constants.voidElements:
78-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
68+
yield ("test_should_allow_%s_tag" % tag_name,
7969
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
8070
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
8171
else:
82-
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
72+
yield ("test_should_allow_%s_tag" % tag_name,
8373
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
8474
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
8575

@@ -93,15 +83,15 @@ def test_sanitizer():
9383
attribute_value = 'foo'
9484
if attribute_name in sanitizer.attr_val_is_uri:
9585
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
96-
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
86+
yield ("test_should_allow_%s_attribute" % attribute_name,
9787
"<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
9888
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
9989

10090
for protocol in sanitizer.allowed_protocols:
10191
rest_of_uri = '//sub.domain.tld/path/object.ext'
10292
if protocol == 'data':
10393
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
104-
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
94+
yield ("test_should_allow_uppercase_%s_uris" % protocol,
10595
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
10696
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
10797

@@ -110,11 +100,26 @@ def test_sanitizer():
110100
if protocol == 'data':
111101
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
112102
protocol = protocol.upper()
113-
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
103+
yield ("test_should_allow_uppercase_%s_uris" % protocol,
114104
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
115105
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
116106

117107

108+
@pytest.mark.parametrize("expected, input",
109+
(pytest.param(expected, input, id=id)
110+
for id, expected, input in param_sanitizer()))
111+
def test_sanitizer(expected, input):
112+
parsed = parseFragment(expected)
113+
expected = serialize(parsed,
114+
omit_optional_tags=False,
115+
use_trailing_solidus=True,
116+
space_before_trailing_solidus=False,
117+
quote_attr_values="always",
118+
quote_char='"',
119+
alphabetical_attributes=True)
120+
assert expected == sanitize_html(input)
121+
122+
118123
def test_lowercase_color_codes_in_style():
119124
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
120125
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'

html5lib/tests/test_serializer.py

+25-24
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,6 @@ def serialize_html(input, options):
8989
return serializer.render(stream, encoding)
9090

9191

92-
def runSerializerTest(input, expected, options):
93-
encoding = options.get("encoding", None)
94-
95-
if encoding:
96-
expected = list(map(lambda x: x.encode(encoding), expected))
97-
98-
result = serialize_html(input, options)
99-
if len(expected) == 1:
100-
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
101-
elif result not in expected:
102-
assert False, "Expected: %s, Received: %s" % (expected, result)
103-
104-
10592
def throwsWithLatin1(input):
10693
with pytest.raises(UnicodeEncodeError):
10794
serialize_html(input, {"encoding": "iso-8859-1"})
@@ -120,13 +107,13 @@ def testDoctypeSystemId():
120107

121108

122109
def testCdataCharacters():
123-
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
124-
["<style>&amacr;"], {"encoding": "iso-8859-1"})
110+
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
111+
["<style>&amacr;"], {"encoding": "iso-8859-1"})
125112

126113

127114
def testCharacters():
128-
runSerializerTest([["Characters", "\u0101"]],
129-
["&amacr;"], {"encoding": "iso-8859-1"})
115+
test_serializer([["Characters", "\u0101"]],
116+
["&amacr;"], {"encoding": "iso-8859-1"})
130117

131118

132119
def testStartTagName():
@@ -138,9 +125,9 @@ def testAttributeName():
138125

139126

140127
def testAttributeValue():
141-
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
142-
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
143-
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
128+
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
129+
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
130+
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
144131

145132

146133
def testEndTagName():
@@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
165152
else:
166153
output_ = ['<span foo="%s">' % c]
167154
options_ = {"quote_attr_values": "spec"}
168-
runSerializerTest(input_, output_, options_)
155+
test_serializer(input_, output_, options_)
169156

170157

171158
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
@@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
184171
else:
185172
output_ = ['<span foo="%s">' % c]
186173
options_ = {"quote_attr_values": "legacy"}
187-
runSerializerTest(input_, output_, options_)
174+
test_serializer(input_, output_, options_)
188175

189176

190177
@pytest.fixture
@@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
217204
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'
218205

219206

220-
def test_serializer():
207+
def param_serializer():
221208
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
222209
with open(filename) as fp:
223210
tests = json.load(fp)
224211
for test in tests['tests']:
225-
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
212+
yield test["input"], test["expected"], test.get("options", {})
213+
214+
215+
@pytest.mark.parametrize("input, expected, options", param_serializer())
216+
def test_serializer(input, expected, options):
217+
encoding = options.get("encoding", None)
218+
219+
if encoding:
220+
expected = list(map(lambda x: x.encode(encoding), expected))
221+
222+
result = serialize_html(input, options)
223+
if len(expected) == 1:
224+
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
225+
elif result not in expected:
226+
assert False, "Expected: %s, Received: %s" % (expected, result)

html5lib/tests/test_treewalkers.py

+20-19
Original file line numberDiff line numberDiff line change
@@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
6161
setter['ElementTree'](docfrag)(name, value)
6262

6363

64-
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
65-
"""tests what happens when we add attributes to the intext"""
66-
treeName, treeClass = tree
67-
if treeClass is None:
68-
pytest.skip("Treebuilder not loaded")
69-
parser = html5parser.HTMLParser(tree=treeClass["builder"])
70-
document = parser.parseFragment(intext)
71-
for nom, val in attrs_to_add:
72-
set_attribute_on_first_child(document, nom, val, treeName)
73-
74-
document = treeClass.get("adapter", lambda x: x)(document)
75-
output = treewalkers.pprint(treeClass["walker"](document))
76-
output = attrlist.sub(sortattrs, output)
77-
if output not in expected:
78-
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
79-
80-
81-
def test_treewalker_six_mix():
64+
def param_treewalker_six_mix():
8265
"""Str/Unicode mix. If str attrs added to tree"""
8366

8467
# On Python 2.x string literals are of type str. Unless, like this
@@ -99,7 +82,25 @@ def test_treewalker_six_mix():
9982

10083
for tree in sorted(treeTypes.items()):
10184
for intext, attrs, expected in sm_tests:
102-
yield runTreewalkerEditTest, intext, expected, attrs, tree
85+
yield intext, expected, attrs, tree
86+
87+
88+
@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
89+
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
90+
"""tests what happens when we add attributes to the intext"""
91+
treeName, treeClass = tree
92+
if treeClass is None:
93+
pytest.skip("Treebuilder not loaded")
94+
parser = html5parser.HTMLParser(tree=treeClass["builder"])
95+
document = parser.parseFragment(intext)
96+
for nom, val in attrs_to_add:
97+
set_attribute_on_first_child(document, nom, val, treeName)
98+
99+
document = treeClass.get("adapter", lambda x: x)(document)
100+
output = treewalkers.pprint(treeClass["walker"](document))
101+
output = attrlist.sub(sortattrs, output)
102+
if output not in expected:
103+
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
103104

104105

105106
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))

html5lib/tests/tree_construction.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,6 @@ def _getParserTests(self, treeName, treeAPIs):
5757
item.add_marker(pytest.mark.parser)
5858
if namespaceHTMLElements:
5959
item.add_marker(pytest.mark.namespaced)
60-
if treeAPIs is None:
61-
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
6260
yield item
6361

6462
def _getTreeWalkerTests(self, treeName, treeAPIs):
@@ -69,8 +67,6 @@ def _getTreeWalkerTests(self, treeName, treeAPIs):
6967
treeAPIs)
7068
item.add_marker(getattr(pytest.mark, treeName))
7169
item.add_marker(pytest.mark.treewalker)
72-
if treeAPIs is None:
73-
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
7470
yield item
7571

7672

@@ -84,12 +80,14 @@ def convertTreeDump(data):
8480
class ParserTest(pytest.Item):
8581
def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
8682
super(ParserTest, self).__init__(name, parent)
87-
self.obj = lambda: 1 # this is to hack around skipif needing a function!
8883
self.test = test
8984
self.treeClass = treeClass
9085
self.namespaceHTMLElements = namespaceHTMLElements
9186

9287
def runtest(self):
88+
if self.treeClass is None:
89+
pytest.skip("Treebuilder not loaded")
90+
9391
p = html5parser.HTMLParser(tree=self.treeClass,
9492
namespaceHTMLElements=self.namespaceHTMLElements)
9593

@@ -147,11 +145,13 @@ def repr_failure(self, excinfo):
147145
class TreeWalkerTest(pytest.Item):
148146
def __init__(self, name, parent, test, treeAPIs):
149147
super(TreeWalkerTest, self).__init__(name, parent)
150-
self.obj = lambda: 1 # this is to hack around skipif needing a function!
151148
self.test = test
152149
self.treeAPIs = treeAPIs
153150

154151
def runtest(self):
152+
if self.treeAPIs is None:
153+
pytest.skip("Treebuilder not loaded")
154+
155155
p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])
156156

157157
input = self.test['data']

0 commit comments

Comments
 (0)