|  | 
| 10 | 10 | 
 | 
| 11 | 11 | spaceCharacters = "".join(spaceCharacters) | 
| 12 | 12 | 
 | 
| 13 |  | -quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]") | 
|  | 13 | +quoteAttributeSpecChars = spaceCharacters + "\"'=<>`" | 
|  | 14 | +quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]") | 
|  | 15 | +quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars + | 
|  | 16 | +                                  "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" | 
|  | 17 | +                                  "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" | 
|  | 18 | +                                  "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" | 
|  | 19 | +                                  "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" | 
|  | 20 | +                                  "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" | 
|  | 21 | +                                  "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" | 
|  | 22 | +                                  "\u3000]") | 
| 14 | 23 | 
 | 
| 15 | 24 | try: | 
| 16 | 25 |     from codecs import register_error, xmlcharrefreplace_errors | 
| @@ -72,7 +81,7 @@ def htmlentityreplace_errors(exc): | 
| 72 | 81 | class HTMLSerializer(object): | 
| 73 | 82 | 
 | 
| 74 | 83 |     # attribute quoting options | 
| 75 |  | -    quote_attr_values = False | 
|  | 84 | +    quote_attr_values = "legacy"  # be secure by default | 
| 76 | 85 |     quote_char = '"' | 
| 77 | 86 |     use_best_quote_char = True | 
| 78 | 87 | 
 | 
| @@ -108,9 +117,9 @@ def __init__(self, **kwargs): | 
| 108 | 117 |         inject_meta_charset=True|False | 
| 109 | 118 |           Whether it insert a meta element to define the character set of the | 
| 110 | 119 |           document. | 
| 111 |  | -        quote_attr_values=True|False | 
|  | 120 | +        quote_attr_values="legacy"|"spec"|"always" | 
| 112 | 121 |           Whether to quote attribute values that don't require quoting | 
| 113 |  | -          per HTML5 parsing rules. | 
|  | 122 | +          per legacy browser behaviour, when required by the standard, or always. | 
| 114 | 123 |         quote_char=u'"'|u"'" | 
| 115 | 124 |           Use given quote character for attribute quoting. Default is to | 
| 116 | 125 |           use double quote unless attribute value contains a double quote, | 
| @@ -239,10 +248,15 @@ def serialize(self, treewalker, encoding=None): | 
| 239 | 248 |                         (k not in booleanAttributes.get(name, tuple()) and | 
| 240 | 249 |                          k not in booleanAttributes.get("", tuple())): | 
| 241 | 250 |                         yield self.encodeStrict("=") | 
| 242 |  | -                        if self.quote_attr_values: | 
|  | 251 | +                        if self.quote_attr_values == "always" or len(v) == 0: | 
| 243 | 252 |                             quote_attr = True | 
|  | 253 | +                        elif self.quote_attr_values == "spec": | 
|  | 254 | +                            quote_attr = quoteAttributeSpec.search(v) is not None | 
|  | 255 | +                        elif self.quote_attr_values == "legacy": | 
|  | 256 | +                            quote_attr = quoteAttributeLegacy.search(v) is not None | 
| 244 | 257 |                         else: | 
| 245 |  | -                            quote_attr = len(v) == 0 or quoteAttributeSpec.search(v) | 
|  | 258 | +                            raise ValueError("quote_attr_values must be one of: " | 
|  | 259 | +                                             "'always', 'spec', or 'legacy'") | 
| 246 | 260 |                         v = v.replace("&", "&") | 
| 247 | 261 |                         if self.escape_lt_in_attrs: | 
| 248 | 262 |                             v = v.replace("<", "<") | 
|  | 
0 commit comments