Skip to content

Commit 05fcd62

Browse files
willkggsnedders
authored andcommitted
Fix alphabeticalattributes filter namepsace problem (#324)
If a tag has an attribute with a None namespace and one with a str namespace, then this filter would fail with a TypeError in Python 3. This fixes that. Fixes #322
1 parent 984f934 commit 05fcd62

File tree

2 files changed

+93
-1
lines changed

2 files changed

+93
-1
lines changed

html5lib/filters/alphabeticalattributes.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,24 @@
88
from ordereddict import OrderedDict
99

1010

11+
def _attr_key(attr):
12+
"""Return an appropriate key for an attribute for sorting
13+
14+
Attributes have a namespace that can be either ``None`` or a string. We
15+
can't compare the two because they're different types, so we convert
16+
``None`` to an empty string first.
17+
18+
"""
19+
return (attr[0][0] or ''), attr[0][1]
20+
21+
1122
class Filter(base.Filter):
1223
def __iter__(self):
1324
for token in base.Filter.__iter__(self):
1425
if token["type"] in ("StartTag", "EmptyTag"):
1526
attrs = OrderedDict()
1627
for name, value in sorted(token["data"].items(),
17-
key=lambda x: x[0]):
28+
key=_attr_key):
1829
attrs[name] = value
1930
token["data"] = attrs
2031
yield token
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from __future__ import absolute_import, division, unicode_literals
2+
3+
try:
4+
from collections import OrderedDict
5+
except ImportError:
6+
from ordereddict import OrderedDict
7+
8+
import pytest
9+
10+
import html5lib
11+
from html5lib.filters.alphabeticalattributes import Filter
12+
from html5lib.serializer import HTMLSerializer
13+
14+
15+
@pytest.mark.parametrize('msg, attrs, expected_attrs', [
16+
(
17+
'no attrs',
18+
{},
19+
{}
20+
),
21+
(
22+
'one attr',
23+
{(None, 'alt'): 'image'},
24+
OrderedDict([((None, 'alt'), 'image')])
25+
),
26+
(
27+
'multiple attrs',
28+
{
29+
(None, 'src'): 'foo',
30+
(None, 'alt'): 'image',
31+
(None, 'style'): 'border: 1px solid black;'
32+
},
33+
OrderedDict([
34+
((None, 'alt'), 'image'),
35+
((None, 'src'), 'foo'),
36+
((None, 'style'), 'border: 1px solid black;')
37+
])
38+
),
39+
])
40+
def test_alphabetizing(msg, attrs, expected_attrs):
41+
tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
42+
output_tokens = list(Filter(tokens))
43+
44+
attrs = output_tokens[0]['data']
45+
assert attrs == expected_attrs
46+
47+
48+
def test_with_different_namespaces():
49+
tokens = [{
50+
'type': 'StartTag',
51+
'name': 'pattern',
52+
'data': {
53+
(None, 'id'): 'patt1',
54+
('http://www.w3.org/1999/xlink', 'href'): '#patt2'
55+
}
56+
}]
57+
output_tokens = list(Filter(tokens))
58+
59+
attrs = output_tokens[0]['data']
60+
assert attrs == OrderedDict([
61+
((None, 'id'), 'patt1'),
62+
(('http://www.w3.org/1999/xlink', 'href'), '#patt2')
63+
])
64+
65+
66+
def test_with_serializer():
67+
"""Verify filter works in the context of everything else"""
68+
parser = html5lib.HTMLParser()
69+
dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
70+
walker = html5lib.getTreeWalker('etree')
71+
ser = HTMLSerializer(
72+
alphabetical_attributes=True,
73+
quote_attr_values='always'
74+
)
75+
76+
# FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
77+
# that gets fixed, we can fix this expected result.
78+
assert (
79+
ser.render(walker(dom)) ==
80+
'<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
81+
)

0 commit comments

Comments
 (0)