Skip to content

Commit eb6ef02

Browse files
authored
Do not lcase element or attribute names that match SVG or MathML name… (#206)
* Do not lcase element or attribute names that match SVG or MathML names exactly > Currently all names are converted to lowercase which is ok when > you're using it for HTML only, but if there is an SVG image nested > inside the HTML it breaks. For example, when `viewBox` attribute is > converted to `viewbox` the image is not displayed correctly. This commit splits *HtmlLexer*.*canonicalName* into variants which preserve items on whitelists derived from the SVG and MathML specifications, and adjusts callers of *canonicalName* to use the appropriate variant. Fixes #182 * add unittests for mixed-case SVG names
1 parent fd6b2dd commit eb6ef02

8 files changed

+182
-30
lines changed

src/main/java/org/owasp/html/ElementAndAttributePolicies.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
/**
3737
* Encapsulates all the information needed by the
38-
* {@link ElementAndAttributePolicySanitizerPolicy} to sanitize one kind
38+
* {@link ElementAndAttributePolicyBasedSanitizerPolicy} to sanitize one kind
3939
* of element.
4040
*/
4141
@Immutable

src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ public void openTag(String elementName, List<String> attrs) {
144144

145145
adjustedElementName = policies.elPolicy.apply(elementName, attrs);
146146
if (adjustedElementName != null) {
147-
adjustedElementName = HtmlLexer.canonicalName(adjustedElementName);
147+
adjustedElementName = HtmlLexer.canonicalElementName(adjustedElementName);
148148
}
149149
} else {
150150
adjustedElementName = null;

src/main/java/org/owasp/html/HtmlLexer.java

+144-11
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,27 @@ public HtmlLexer(String input) {
5555

5656
/**
5757
* Normalize case of names that are not name-spaced. This lower-cases HTML
58-
* element and attribute names, but not ones for embedded SVG or MATHML.
58+
* element names, but not ones for embedded SVG or MathML.
5959
*/
60-
static String canonicalName(String elementOrAttribName) {
61-
return elementOrAttribName.indexOf(':') >= 0
62-
? elementOrAttribName : Strings.toLowerCase(elementOrAttribName);
60+
static String canonicalElementName(String elementName) {
61+
return elementName.indexOf(':') >= 0 || mixedCaseForeignElementNames.contains(elementName)
62+
? elementName : Strings.toLowerCase(elementName);
63+
}
64+
65+
/**
66+
* Normalize case of names that are not name-spaced. This lower-cases HTML
67+
* attribute names, but not ones for embedded SVG or MathML.
68+
*/
69+
static String canonicalAttributeName(String attribName) {
70+
return attribName.indexOf(':') >= 0 || mixedCaseForeignAttributeNames.contains(attribName)
71+
? attribName : Strings.toLowerCase(attribName);
72+
}
73+
74+
/**
75+
* Normalize case of keywords in attribute values.
76+
*/
77+
public static String canonicalKeywordAttributeValue(String keywordValue) {
78+
return Strings.toLowerCase(keywordValue);
6379
}
6480

6581
/**
@@ -243,16 +259,133 @@ private void pushbackToken(HtmlToken token) {
243259

244260
/** Can the attribute appear in HTML without a value. */
245261
private static boolean isValuelessAttribute(String attribName) {
246-
boolean valueless = VALUELESS_ATTRIB_NAMES.contains(
247-
Strings.toLowerCase(attribName));
248-
return valueless;
262+
return VALUELESS_ATTRIB_NAMES.contains(canonicalAttributeName(attribName));
249263
}
250264

251265
// From http://issues.apache.org/jira/browse/XALANC-519
252266
private static final Set<String> VALUELESS_ATTRIB_NAMES = ImmutableSet.of(
253267
"checked", "compact", "declare", "defer", "disabled",
254268
"ismap", "multiple", "nohref", "noresize", "noshade",
255269
"nowrap", "readonly", "selected");
270+
271+
private static final ImmutableSet<String> mixedCaseForeignAttributeNames = ImmutableSet.of(
272+
"attributeName",
273+
"attributeType",
274+
"baseFrequency",
275+
"baseProfile",
276+
"calcMode",
277+
"clipPathUnits",
278+
"contentScriptType",
279+
"defaultAction",
280+
"definitionURL",
281+
"diffuseConstant",
282+
"edgeMode",
283+
"externalResourcesRequired",
284+
"filterUnits",
285+
"focusHighlight",
286+
"gradientTransform",
287+
"gradientUnits",
288+
"initialVisibility",
289+
"kernelMatrix",
290+
"kernelUnitLength",
291+
"keyPoints",
292+
"keySplines",
293+
"keyTimes",
294+
"lengthAdjust",
295+
"limitingConeAngle",
296+
"markerHeight",
297+
"markerUnits",
298+
"markerWidth",
299+
"maskContentUnits",
300+
"maskUnits",
301+
"mediaCharacterEncoding",
302+
"mediaContentEncodings",
303+
"mediaSize",
304+
"mediaTime",
305+
"numOctaves",
306+
"pathLength",
307+
"patternContentUnits",
308+
"patternTransform",
309+
"patternUnits",
310+
"playbackOrder",
311+
"pointsAtX",
312+
"pointsAtY",
313+
"pointsAtZ",
314+
"preserveAlpha",
315+
"preserveAspectRatio",
316+
"primitiveUnits",
317+
"refX",
318+
"refY",
319+
"repeatCount",
320+
"repeatDur",
321+
"requiredExtensions",
322+
"requiredFeatures",
323+
"requiredFonts",
324+
"requiredFormats",
325+
"schemaLocation",
326+
"snapshotTime",
327+
"specularConstant",
328+
"specularExponent",
329+
"spreadMethod",
330+
"startOffset",
331+
"stdDeviation",
332+
"stitchTiles",
333+
"surfaceScale",
334+
"syncBehavior",
335+
"syncBehaviorDefault",
336+
"syncMaster",
337+
"syncTolerance",
338+
"syncToleranceDefault",
339+
"systemLanguage",
340+
"tableValues",
341+
"targetX",
342+
"targetY",
343+
"textLength",
344+
"timelineBegin",
345+
"transformBehavior",
346+
"viewBox",
347+
"xChannelSelector",
348+
"yChannelSelector",
349+
"zoomAndPan"
350+
);
351+
352+
private static final ImmutableSet<String> mixedCaseForeignElementNames = ImmutableSet.of(
353+
"animateColor",
354+
"animateMotion",
355+
"animateTransform",
356+
"clipPath",
357+
"feBlend",
358+
"feColorMatrix",
359+
"feComponentTransfer",
360+
"feComposite",
361+
"feConvolveMatrix",
362+
"feDiffuseLighting",
363+
"feDisplacementMap",
364+
"feDistantLight",
365+
"feDropShadow",
366+
"feFlood",
367+
"feFuncA",
368+
"feFuncB",
369+
"feFuncG",
370+
"feFuncR",
371+
"feGaussianBlur",
372+
"feImage",
373+
"feMerge",
374+
"feMergeNode",
375+
"feMorphology",
376+
"feOffset",
377+
"fePointLight",
378+
"feSpecularLighting",
379+
"feSpotLight",
380+
"feTile",
381+
"feTurbulence",
382+
"foreignObject",
383+
"linearGradient",
384+
"radialGradient",
385+
"solidColor",
386+
"textArea",
387+
"textPath"
388+
);
256389
}
257390

258391
/**
@@ -311,7 +444,7 @@ protected HtmlToken produce() {
311444
switch (token.type) {
312445
case TAGBEGIN:
313446
{
314-
String canonTagName = canonicalName(
447+
String canonTagName = canonicalElementName(
315448
token.start + 1, token.end);
316449
if (HtmlTextEscapingMode.isTagFollowedByLiteralContent(
317450
canonTagName)) {
@@ -478,7 +611,7 @@ private HtmlToken parseToken() {
478611
if (this.inEscapeExemptBlock
479612
&& '/' == input.charAt(start + 1)
480613
&& textEscapingMode != HtmlTextEscapingMode.PLAIN_TEXT
481-
&& canonicalName(start + 2, end)
614+
&& canonicalElementName(start + 2, end)
482615
.equals(escapeExemptTagName)) {
483616
this.inEscapeExemptBlock = false;
484617
this.escapeExemptTagName = null;
@@ -612,8 +745,8 @@ && canonicalName(start + 2, end)
612745
return result;
613746
}
614747

615-
private String canonicalName(int start, int end) {
616-
return HtmlLexer.canonicalName(input.substring(start, end));
748+
private String canonicalElementName(int start, int end) {
749+
return HtmlLexer.canonicalElementName(input.substring(start, end));
617750
}
618751

619752
private static boolean isIdentStart(char ch) {

src/main/java/org/owasp/html/HtmlPolicyBuilder.java

+9-9
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ public HtmlPolicyBuilder allowElements(
238238
ElementPolicy policy, String... elementNames) {
239239
invalidateCompiledState();
240240
for (String elementName : elementNames) {
241-
elementName = HtmlLexer.canonicalName(elementName);
241+
elementName = HtmlLexer.canonicalElementName(elementName);
242242
ElementPolicy newPolicy = ElementPolicy.Util.join(
243243
elPolicies.get(elementName), policy);
244244
// Don't remove if newPolicy is the always reject policy since we want
@@ -286,7 +286,7 @@ public HtmlPolicyBuilder allowCommonBlockElements() {
286286
public HtmlPolicyBuilder allowTextIn(String... elementNames) {
287287
invalidateCompiledState();
288288
for (String elementName : elementNames) {
289-
elementName = HtmlLexer.canonicalName(elementName);
289+
elementName = HtmlLexer.canonicalElementName(elementName);
290290
textContainers.put(elementName, true);
291291
}
292292
return this;
@@ -305,7 +305,7 @@ public HtmlPolicyBuilder allowTextIn(String... elementNames) {
305305
public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
306306
invalidateCompiledState();
307307
for (String elementName : elementNames) {
308-
elementName = HtmlLexer.canonicalName(elementName);
308+
elementName = HtmlLexer.canonicalElementName(elementName);
309309
textContainers.put(elementName, false);
310310
}
311311
return this;
@@ -321,7 +321,7 @@ public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
321321
public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
322322
invalidateCompiledState();
323323
for (String elementName : elementNames) {
324-
elementName = HtmlLexer.canonicalName(elementName);
324+
elementName = HtmlLexer.canonicalElementName(elementName);
325325
skipIssueTagMap.put(elementName, HtmlTagSkipType.DO_NOT_SKIP);
326326
}
327327
return this;
@@ -336,7 +336,7 @@ public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
336336
public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
337337
invalidateCompiledState();
338338
for (String elementName : elementNames) {
339-
elementName = HtmlLexer.canonicalName(elementName);
339+
elementName = HtmlLexer.canonicalElementName(elementName);
340340
skipIssueTagMap.put(elementName, HtmlTagSkipType.SKIP);
341341
}
342342
return this;
@@ -349,7 +349,7 @@ public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
349349
public AttributeBuilder allowAttributes(String... attributeNames) {
350350
ImmutableList.Builder<String> b = ImmutableList.builder();
351351
for (String attributeName : attributeNames) {
352-
b.add(HtmlLexer.canonicalName(attributeName));
352+
b.add(HtmlLexer.canonicalAttributeName(attributeName));
353353
}
354354
return new AttributeBuilder(b.build());
355355
}
@@ -432,7 +432,7 @@ public HtmlPolicyBuilder requireRelsOnLinks(String... linkValues) {
432432
this.extraRelsForLinks = Sets.newLinkedHashSet();
433433
}
434434
for (String linkValue : linkValues) {
435-
linkValue = HtmlLexer.canonicalName(linkValue);
435+
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
436436
Preconditions.checkArgument(
437437
!Strings.containsHtmlSpace(linkValue),
438438
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
@@ -456,7 +456,7 @@ public HtmlPolicyBuilder skipRelsOnLinks(String... linkValues) {
456456
this.skipRelsForLinks = Sets.newLinkedHashSet();
457457
}
458458
for (String linkValue : linkValues) {
459-
linkValue = HtmlLexer.canonicalName(linkValue);
459+
linkValue = HtmlLexer.canonicalKeywordAttributeValue(linkValue);
460460
Preconditions.checkArgument(
461461
!Strings.containsHtmlSpace(linkValue),
462462
"spaces in input. use f(\"foo\", \"bar\") not f(\"foo bar\")");
@@ -980,7 +980,7 @@ public HtmlPolicyBuilder globally() {
980980
public HtmlPolicyBuilder onElements(String... elementNames) {
981981
ImmutableList.Builder<String> b = ImmutableList.builder();
982982
for (String elementName : elementNames) {
983-
b.add(HtmlLexer.canonicalName(elementName));
983+
b.add(HtmlLexer.canonicalElementName(elementName));
984984
}
985985
return HtmlPolicyBuilder.this.allowAttributesOnElements(
986986
policy, attributeNames, b.build());

src/main/java/org/owasp/html/HtmlSanitizer.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public static void sanitize(
152152
break;
153153
case TAGBEGIN:
154154
if (htmlContent.charAt(token.start + 1) == '/') { // A close tag.
155-
receiver.closeTag(HtmlLexer.canonicalName(
155+
receiver.closeTag(HtmlLexer.canonicalElementName(
156156
htmlContent.substring(token.start + 2, token.end)));
157157
while (lexer.hasNext()
158158
&& lexer.next().type != HtmlTokenType.TAGEND) {
@@ -173,7 +173,7 @@ public static void sanitize(
173173
} else {
174174
attrsReadyForName = false;
175175
}
176-
attrs.add(HtmlLexer.canonicalName(
176+
attrs.add(HtmlLexer.canonicalAttributeName(
177177
htmlContent.substring(tagBodyToken.start, tagBodyToken.end)));
178178
break;
179179
case ATTRVALUE:
@@ -191,7 +191,7 @@ public static void sanitize(
191191
attrs.add(attrs.getLast());
192192
}
193193
receiver.openTag(
194-
HtmlLexer.canonicalName(
194+
HtmlLexer.canonicalElementName(
195195
htmlContent.substring(token.start + 1, token.end)),
196196
attrs);
197197
}

src/main/java/org/owasp/html/HtmlStreamRenderer.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private void writeOpenTag(
187187
attrIt.hasNext();) {
188188
String name = attrIt.next();
189189
String value = attrIt.next();
190-
name = HtmlLexer.canonicalName(name);
190+
name = HtmlLexer.canonicalAttributeName(name);
191191
if (!isValidHtmlName(name)) {
192192
error("Invalid attr name", name);
193193
continue;
@@ -234,7 +234,7 @@ public final void closeTag(String elementName) {
234234
private final void writeCloseTag(String uncanonElementName)
235235
throws IOException {
236236
if (!open) { throw new IllegalStateException(); }
237-
String elementName = HtmlLexer.canonicalName(uncanonElementName);
237+
String elementName = HtmlLexer.canonicalElementName(uncanonElementName);
238238
if (!isValidHtmlName(elementName)) {
239239
error("Invalid element name", elementName);
240240
return;
@@ -386,7 +386,7 @@ static boolean isValidHtmlName(String name) {
386386
* that has more consistent semantics.
387387
*/
388388
static String safeName(String unsafeElementName) {
389-
String elementName = HtmlLexer.canonicalName(unsafeElementName);
389+
String elementName = HtmlLexer.canonicalElementName(unsafeElementName);
390390

391391
// Substitute a reliably non-raw-text element for raw-text and
392392
// plain-text elements.

src/main/java/org/owasp/html/TagBalancingHtmlStreamEventReceiver.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ public void openTag(String elementName, List<String> attrs) {
9797
if (DEBUG) {
9898
dumpState("open " + elementName);
9999
}
100-
String canonElementName = HtmlLexer.canonicalName(elementName);
100+
String canonElementName = HtmlLexer.canonicalElementName(elementName);
101101

102102
int elIndex = METADATA.indexForName(canonElementName);
103103
// Treat unrecognized tags as void, but emit closing tags in closeTag().
@@ -238,7 +238,7 @@ public void closeTag(String elementName) {
238238
if (DEBUG) {
239239
dumpState("close " + elementName);
240240
}
241-
String canonElementName = HtmlLexer.canonicalName(elementName);
241+
String canonElementName = HtmlLexer.canonicalElementName(elementName);
242242

243243
int elIndex = METADATA.indexForName(canonElementName);
244244
if (elIndex == UNRECOGNIZED_TAG) { // Allow unrecognized end tags through.

src/test/java/org/owasp/html/HtmlPolicyBuilderTest.java

+19
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,25 @@ public static final void testTableStructure() {
975975
sanitized);
976976
}
977977

978+
@Test
979+
public static final void testSvgNames() {
980+
PolicyFactory policyFactory = new HtmlPolicyBuilder()
981+
.allowElements("svg", "animateColor")
982+
.allowAttributes("viewBox").onElements("svg")
983+
.toFactory();
984+
String svg = "<svg viewBox=\"0 0 0 0\"><animateColor></animateColor></svg>";
985+
assertEquals(svg, policyFactory.sanitize(svg));
986+
}
987+
988+
@Test
989+
public static final void testTextareaIsNotTextArea() {
990+
String input = "<textarea>x</textarea><textArea>y</textArea>";
991+
PolicyFactory textareaPolicy = new HtmlPolicyBuilder().allowElements("textarea").toFactory();
992+
PolicyFactory textAreaPolicy = new HtmlPolicyBuilder().allowElements("textArea").toFactory();
993+
assertEquals("<textarea>x</textarea>y", textareaPolicy.sanitize(input));
994+
assertEquals("x<textArea>y</textArea>", textAreaPolicy.sanitize(input));
995+
}
996+
978997
private static String apply(HtmlPolicyBuilder b) {
979998
return apply(b, EXAMPLE);
980999
}

0 commit comments

Comments
 (0)