1
1
from __future__ import absolute_import , division , unicode_literals
2
2
3
+ import pytest
4
+
3
5
from html5lib import constants , parseFragment , serialize
4
6
from html5lib .filters import sanitizer
5
7
6
8
7
- def runSanitizerTest (_ , expected , input ):
8
- parsed = parseFragment (expected )
9
- expected = serialize (parsed ,
10
- omit_optional_tags = False ,
11
- use_trailing_solidus = True ,
12
- space_before_trailing_solidus = False ,
13
- quote_attr_values = "always" ,
14
- quote_char = '"' ,
15
- alphabetical_attributes = True )
16
- assert expected == sanitize_html (input )
17
-
18
-
19
9
def sanitize_html (stream ):
20
10
parsed = parseFragment (stream )
21
11
serialized = serialize (parsed ,
@@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
59
49
assert expected == sanitized
60
50
61
51
62
- def test_sanitizer ():
52
+ def param_sanitizer ():
63
53
for ns , tag_name in sanitizer .allowed_elements :
64
54
if ns != constants .namespaces ["html" ]:
65
55
continue
66
56
if tag_name in ['caption' , 'col' , 'colgroup' , 'optgroup' , 'option' , 'table' , 'tbody' , 'td' ,
67
57
'tfoot' , 'th' , 'thead' , 'tr' , 'select' ]:
68
58
continue # TODO
69
59
if tag_name == 'image' :
70
- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
60
+ yield ("test_should_allow_%s_tag" % tag_name ,
71
61
"<img title=\" 1\" />foo <bad>bar</bad> baz" ,
72
62
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name , tag_name ))
73
63
elif tag_name == 'br' :
74
- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
64
+ yield ("test_should_allow_%s_tag" % tag_name ,
75
65
"<br title=\" 1\" />foo <bad>bar</bad> baz<br/>" ,
76
66
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name , tag_name ))
77
67
elif tag_name in constants .voidElements :
78
- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
68
+ yield ("test_should_allow_%s_tag" % tag_name ,
79
69
"<%s title=\" 1\" />foo <bad>bar</bad> baz" % tag_name ,
80
70
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name , tag_name ))
81
71
else :
82
- yield (runSanitizerTest , "test_should_allow_%s_tag" % tag_name ,
72
+ yield ("test_should_allow_%s_tag" % tag_name ,
83
73
"<%s title=\" 1\" >foo <bad>bar</bad> baz</%s>" % (tag_name , tag_name ),
84
74
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name , tag_name ))
85
75
@@ -93,15 +83,15 @@ def test_sanitizer():
93
83
attribute_value = 'foo'
94
84
if attribute_name in sanitizer .attr_val_is_uri :
95
85
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer .allowed_protocols [0 ]
96
- yield (runSanitizerTest , "test_should_allow_%s_attribute" % attribute_name ,
86
+ yield ("test_should_allow_%s_attribute" % attribute_name ,
97
87
"<p %s=\" %s\" >foo <bad>bar</bad> baz</p>" % (attribute_name , attribute_value ),
98
88
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name , attribute_value ))
99
89
100
90
for protocol in sanitizer .allowed_protocols :
101
91
rest_of_uri = '//sub.domain.tld/path/object.ext'
102
92
if protocol == 'data' :
103
93
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
104
- yield (runSanitizerTest , "test_should_allow_uppercase_%s_uris" % protocol ,
94
+ yield ("test_should_allow_uppercase_%s_uris" % protocol ,
105
95
"<img src=\" %s:%s\" >foo</a>" % (protocol , rest_of_uri ),
106
96
"""<img src="%s:%s">foo</a>""" % (protocol , rest_of_uri ))
107
97
@@ -110,11 +100,26 @@ def test_sanitizer():
110
100
if protocol == 'data' :
111
101
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
112
102
protocol = protocol .upper ()
113
- yield (runSanitizerTest , "test_should_allow_uppercase_%s_uris" % protocol ,
103
+ yield ("test_should_allow_uppercase_%s_uris" % protocol ,
114
104
"<img src=\" %s:%s\" >foo</a>" % (protocol , rest_of_uri ),
115
105
"""<img src="%s:%s">foo</a>""" % (protocol , rest_of_uri ))
116
106
117
107
108
+ @pytest .mark .parametrize ("expected, input" ,
109
+ (pytest .param (expected , input , id = id )
110
+ for id , expected , input in param_sanitizer ()))
111
+ def test_sanitizer (expected , input ):
112
+ parsed = parseFragment (expected )
113
+ expected = serialize (parsed ,
114
+ omit_optional_tags = False ,
115
+ use_trailing_solidus = True ,
116
+ space_before_trailing_solidus = False ,
117
+ quote_attr_values = "always" ,
118
+ quote_char = '"' ,
119
+ alphabetical_attributes = True )
120
+ assert expected == sanitize_html (input )
121
+
122
+
118
123
def test_lowercase_color_codes_in_style ():
119
124
sanitized = sanitize_html ("<p style=\" border: 1px solid #a2a2a2;\" ></p>" )
120
125
expected = '<p style=\" border: 1px solid #a2a2a2;\" ></p>'
0 commit comments