Skip to content

Commit 0e63daf

Browse files
committed
1 parent f729a08 commit 0e63daf

File tree

4 files changed

+224
-10
lines changed

4 files changed

+224
-10
lines changed

owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java

Lines changed: 156 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,163 @@ public void closeDocument() {
9494

9595
public void text(String textChunk) {
9696
if (!skipText) {
97-
out.text(textChunk);
97+
// Check if we're inside a CDATA element (style/script) with allowTextIn
98+
// where tags are reclassified as UNESCAPED text and need to be validated
99+
// Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
100+
boolean insideCdataElement = false;
101+
for (int i = openElementStack.size() - 1; i >= 0; i -= 2) {
102+
String adjustedName = openElementStack.get(i);
103+
if (adjustedName != null
104+
&& allowedTextContainers.contains(adjustedName)
105+
&& ("style".equals(adjustedName) || "script".equals(adjustedName))) {
106+
insideCdataElement = true;
107+
break;
108+
}
109+
}
110+
111+
// If inside a CDATA element (style/script) with allowTextIn, we need to filter out
112+
// HTML tags that aren't allowed because tags inside these blocks are reclassified
113+
// as UNESCAPED text by the lexer
114+
if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) {
115+
// Strip out HTML tags that aren't in the allowed elements list
116+
String filtered = stripDisallowedTags(textChunk);
117+
out.text(filtered);
118+
} else {
119+
out.text(textChunk);
120+
}
121+
}
122+
}
123+
124+
/**
125+
* Strips out HTML tags that aren't in the allowed elements list from text content.
126+
* This is used when tags appear inside text containers (like style blocks) where
127+
* they're treated as text but should still be validated.
128+
*/
129+
private String stripDisallowedTags(String text) {
130+
if (text == null) {
131+
return text;
132+
}
133+
134+
StringBuilder result = new StringBuilder();
135+
int len = text.length();
136+
int i = 0;
137+
138+
while (i < len) {
139+
int tagStart = text.indexOf('<', i);
140+
if (tagStart < 0) {
141+
// No more tags, append the rest
142+
result.append(text.substring(i));
143+
break;
144+
}
145+
146+
// Append text before the tag
147+
if (tagStart > i) {
148+
result.append(text.substring(i, tagStart));
149+
}
150+
151+
// Find the end of the tag (either '>' or end of string)
152+
int tagEnd = text.indexOf('>', tagStart + 1);
153+
if (tagEnd < 0) {
154+
// Unclosed tag, skip it
155+
i = tagStart + 1;
156+
continue;
157+
}
158+
159+
// Extract the tag content (between < and >)
160+
String tagContent = text.substring(tagStart + 1, tagEnd);
161+
162+
// Only process if this looks like a valid HTML element tag
163+
// Valid tags start with a letter or / followed by a letter
164+
// Skip things like <, </>, <3, etc.
165+
boolean isValidTag = false;
166+
String tagName = null;
167+
168+
if (tagContent.startsWith("/")) {
169+
// Closing tag - must have / followed by a letter
170+
if (tagContent.length() > 1) {
171+
char firstChar = tagContent.charAt(1);
172+
if (Character.isLetter(firstChar)) {
173+
isValidTag = true;
174+
tagName = tagContent.substring(1).trim().split("\\s")[0];
175+
tagName = HtmlLexer.canonicalElementName(tagName);
176+
}
177+
}
178+
} else {
179+
// Opening tag - must start with a letter
180+
char firstChar = tagContent.charAt(0);
181+
if (Character.isLetter(firstChar)) {
182+
isValidTag = true;
183+
tagName = tagContent.trim().split("\\s")[0];
184+
tagName = HtmlLexer.canonicalElementName(tagName);
185+
}
186+
}
187+
188+
if (!isValidTag) {
189+
// Not a valid HTML tag, just append it as-is
190+
result.append('<').append(tagContent).append('>');
191+
i = tagEnd + 1;
192+
continue;
193+
}
194+
195+
// Check if it's a closing tag
196+
if (tagContent.startsWith("/")) {
197+
// Only allow closing tags if the element is allowed
198+
if (elAndAttrPolicies.containsKey(tagName)) {
199+
result.append('<').append(tagContent).append('>');
200+
}
201+
// Otherwise skip the closing tag
202+
i = tagEnd + 1;
203+
} else {
204+
// Opening tag - only allow tags if the element is in the allowed list
205+
if (elAndAttrPolicies.containsKey(tagName)) {
206+
result.append('<').append(tagContent).append('>');
207+
i = tagEnd + 1;
208+
} else {
209+
// Skip disallowed tag and its content until matching closing tag
210+
i = tagEnd + 1;
211+
// Track nesting level to find the matching closing tag
212+
int nestingLevel = 1;
213+
while (i < len && nestingLevel > 0) {
214+
int nextTagStart = text.indexOf('<', i);
215+
if (nextTagStart < 0) {
216+
// No more tags, skip to end
217+
i = len;
218+
break;
219+
}
220+
int nextTagEnd = text.indexOf('>', nextTagStart + 1);
221+
if (nextTagEnd < 0) {
222+
// Unclosed tag, skip to end
223+
i = len;
224+
break;
225+
}
226+
String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd);
227+
String nextTagName = nextTagContent.trim().split("\\s")[0];
228+
if (nextTagContent.startsWith("/")) {
229+
// Closing tag
230+
nextTagName = nextTagName.substring(1);
231+
nextTagName = HtmlLexer.canonicalElementName(nextTagName);
232+
if (nextTagName.equals(tagName)) {
233+
nestingLevel--;
234+
if (nestingLevel == 0) {
235+
// Found matching closing tag, skip it and continue
236+
i = nextTagEnd + 1;
237+
break;
238+
}
239+
}
240+
} else {
241+
// Opening tag
242+
nextTagName = HtmlLexer.canonicalElementName(nextTagName);
243+
if (nextTagName.equals(tagName)) {
244+
nestingLevel++;
245+
}
246+
}
247+
i = nextTagEnd + 1;
248+
}
249+
}
250+
}
98251
}
252+
253+
return result.toString();
99254
}
100255

101256
public void openTag(String elementName, List<String> attrs) {

owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase {
4646
public final void testHtmlLexer() throws Exception {
4747
// Do the lexing.
4848
String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
49+
// Normalize line endings in input to handle Windows/Unix differences
50+
input = input.replace("\r\n", "\n").replace("\r", "\n");
4951
StringBuilder actual = new StringBuilder();
5052
lex(input, actual);
5153

5254
// Get the golden.
5355
String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
56+
// Normalize line endings to handle Windows/Unix differences
57+
golden = golden.replace("\r\n", "\n").replace("\r", "\n");
58+
String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n");
5459

5560
// Compare.
56-
assertEquals(golden, actual.toString());
61+
assertEquals(golden, actualStr);
5762
}
5863

5964
@Test

owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,13 @@
2828

2929
package org.owasp.html;
3030

31-
import java.io.BufferedReader;
32-
import java.io.InputStreamReader;
3331
import java.nio.charset.StandardCharsets;
32+
import java.nio.file.Files;
33+
import java.nio.file.Paths;
3434
import java.util.List;
3535
import java.util.concurrent.LinkedBlockingQueue;
3636
import java.util.concurrent.ThreadPoolExecutor;
3737
import java.util.concurrent.TimeUnit;
38-
import java.util.stream.Collectors;
39-
40-
import org.apache.commons.codec.Resources;
4138

4239
/**
4340
* Throws malformed inputs at the HTML sanitizer to try and crash it.
@@ -62,9 +59,9 @@ public void text(String textChunk) { /* do nothing */ }
6259
};
6360

6461
public final void testFuzzHtmlParser() throws Exception {
65-
String html = new BufferedReader(new InputStreamReader(
66-
Resources.getInputStream("benchmark-data/Yahoo!.html"),
67-
StandardCharsets.UTF_8)).lines().collect(Collectors.joining());
62+
String html = new String(Files.readAllBytes(
63+
Paths.get(getClass().getResource("/benchmark-data/Yahoo!.html").toURI())),
64+
StandardCharsets.UTF_8);
6865
int length = html.length();
6966

7067
char[] fuzzyHtml0 = new char[length];

owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,63 @@ public static final void testStylingCornerCase() {
454454
assertEquals(want, sanitize(input));
455455
}
456456

457+
@Test
458+
public static final void testCVE202566021_1() {
459+
// Arrange
460+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"evil\">XSS?</div></style></noscript>";
461+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
462+
463+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
464+
PolicyFactory policy = htmlPolicyBuilder
465+
.allowElements("style", "noscript")
466+
.allowTextIn("style")
467+
.toFactory();
468+
469+
// Act
470+
String sanitized = policy.sanitize(actualPayload);
471+
472+
// Assert
473+
assertEquals(expectedPayload, sanitized);
474+
}
475+
476+
@Test
477+
public static final void testCVE202566021_2() {
478+
// Arrange
479+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<script>alert('XSS Attack!')</script></style></noscript>";
480+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
481+
482+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
483+
PolicyFactory policy = htmlPolicyBuilder
484+
.allowElements("style", "noscript")
485+
.allowTextIn("style")
486+
.toFactory();
487+
488+
// Act
489+
String sanitized = policy.sanitize(actualPayload);
490+
491+
// Assert
492+
assertEquals(expectedPayload, sanitized);
493+
}
494+
495+
@Test
496+
public static final void testCVE202566021_3() {
497+
// Arrange
498+
String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
499+
String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
500+
501+
HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
502+
PolicyFactory policy = htmlPolicyBuilder
503+
.allowElements("style", "noscript", "div")
504+
.allowTextIn("style")
505+
.toFactory();
506+
507+
// Act
508+
String sanitized = policy.sanitize(actualPayload);
509+
510+
// Assert
511+
assertEquals(expectedPayload, sanitized);
512+
}
513+
457514
private static String sanitize(@Nullable String html) {
458515
StringBuilder sb = new StringBuilder();
459516
HtmlStreamRenderer renderer = HtmlStreamRenderer.create(

0 commit comments

Comments
 (0)