Skip to content

Commit 4768c64

Browse files
committed
Update characters that need be quoted in attributes in the serializer per spec
This also moves to using re, which seems far cleaner than the reduce-based search previously used.
1 parent 6bd01d0 commit 4768c64

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

html5lib/serializer/htmlserializer.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
from __future__ import absolute_import, division, unicode_literals
22
from six import text_type
33

4-
try:
5-
from functools import reduce
6-
except ImportError:
7-
pass
4+
import re
85

96
from ..constants import voidElements, booleanAttributes, spaceCharacters
107
from ..constants import rcdataElements, entities, xmlEntities
@@ -13,6 +10,8 @@
1310

1411
spaceCharacters = "".join(spaceCharacters)
1512

13+
quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]")
14+
1615
try:
1716
from codecs import register_error, xmlcharrefreplace_errors
1817
except ImportError:
@@ -240,11 +239,10 @@ def serialize(self, treewalker, encoding=None):
240239
(k not in booleanAttributes.get(name, tuple()) and
241240
k not in booleanAttributes.get("", tuple())):
242241
yield self.encodeStrict("=")
243-
if self.quote_attr_values or not v:
242+
if self.quote_attr_values:
244243
quote_attr = True
245244
else:
246-
quote_attr = reduce(lambda x, y: x or (y in v),
247-
spaceCharacters + ">\"'=", False)
245+
quote_attr = len(v) == 0 or quoteAttributeSpec.search(v)
248246
v = v.replace("&", "&amp;")
249247
if self.escape_lt_in_attrs:
250248
v = v.replace("<", "&lt;")

html5lib/tests/serializer-testdata/core.test

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
},
4343
{
4444
"expected": [
45-
"<span title=foo<bar>"
45+
"<span title=\"foo<bar\">"
4646
],
4747
"input": [
4848
[

0 commit comments

Comments
 (0)