Fix #363: CVE-2025-66021

melloware · melloware · commit 0e63dafd49d4 · 2025-12-08T16:08:18.000-05:00
diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/ElementAndAttributePolicyBasedSanitizerPolicy.java
@@ -94,8 +94,163 @@ public void closeDocument() {
 
   public void text(String textChunk) {
     if (!skipText) {
-      out.text(textChunk);
+      // Check if we're inside a CDATA element (style/script) with allowTextIn
+      // where tags are reclassified as UNESCAPED text and need to be validated
+      // Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
+      boolean insideCdataElement = false;
+      for (int i = openElementStack.size() - 1; i >= 0; i -= 2) {
+        String adjustedName = openElementStack.get(i);
+        if (adjustedName != null 
+            && allowedTextContainers.contains(adjustedName)
+            && ("style".equals(adjustedName) || "script".equals(adjustedName))) {
+          insideCdataElement = true;
+          break;
+        }
+      }
+      
+      // If inside a CDATA element (style/script) with allowTextIn, we need to filter out 
+      // HTML tags that aren't allowed because tags inside these blocks are reclassified 
+      // as UNESCAPED text by the lexer
+      if (insideCdataElement && textChunk != null && textChunk.indexOf('<') >= 0) {
+        // Strip out HTML tags that aren't in the allowed elements list
+        String filtered = stripDisallowedTags(textChunk);
+        out.text(filtered);
+      } else {
+        out.text(textChunk);
+      }
+    }
+  }
+  
+  /**
+   * Strips out HTML tags that aren't in the allowed elements list from text content.
+   * This is used when tags appear inside text containers (like style blocks) where
+   * they're treated as text but should still be validated.
+   */
+  private String stripDisallowedTags(String text) {
+    if (text == null) {
+      return text;
+    }
+    
+    StringBuilder result = new StringBuilder();
+    int len = text.length();
+    int i = 0;
+    
+    while (i < len) {
+      int tagStart = text.indexOf('<', i);
+      if (tagStart < 0) {
+        // No more tags, append the rest
+        result.append(text.substring(i));
+        break;
+      }
+      
+      // Append text before the tag
+      if (tagStart > i) {
+        result.append(text.substring(i, tagStart));
+      }
+      
+      // Find the end of the tag (either '>' or end of string)
+      int tagEnd = text.indexOf('>', tagStart + 1);
+      if (tagEnd < 0) {
+        // Unclosed tag, skip it
+        i = tagStart + 1;
+        continue;
+      }
+      
+      // Extract the tag content (between < and >)
+      String tagContent = text.substring(tagStart + 1, tagEnd);
+      
+      // Only process if this looks like a valid HTML element tag
+      // Valid tags start with a letter or / followed by a letter
+      // Skip things like <, </>, <3, etc.
+      boolean isValidTag = false;
+      String tagName = null;
+      
+      if (tagContent.startsWith("/")) {
+        // Closing tag - must have / followed by a letter
+        if (tagContent.length() > 1) {
+          char firstChar = tagContent.charAt(1);
+          if (Character.isLetter(firstChar)) {
+            isValidTag = true;
+            tagName = tagContent.substring(1).trim().split("\\s")[0];
+            tagName = HtmlLexer.canonicalElementName(tagName);
+          }
+        }
+      } else {
+        // Opening tag - must start with a letter
+        char firstChar = tagContent.charAt(0);
+        if (Character.isLetter(firstChar)) {
+          isValidTag = true;
+          tagName = tagContent.trim().split("\\s")[0];
+          tagName = HtmlLexer.canonicalElementName(tagName);
+        }
+      }
+      
+      if (!isValidTag) {
+        // Not a valid HTML tag, just append it as-is
+        result.append('<').append(tagContent).append('>');
+        i = tagEnd + 1;
+        continue;
+      }
+      
+      // Check if it's a closing tag
+      if (tagContent.startsWith("/")) {
+        // Only allow closing tags if the element is allowed
+        if (elAndAttrPolicies.containsKey(tagName)) {
+          result.append('<').append(tagContent).append('>');
+        }
+        // Otherwise skip the closing tag
+        i = tagEnd + 1;
+      } else {
+        // Opening tag - only allow tags if the element is in the allowed list
+        if (elAndAttrPolicies.containsKey(tagName)) {
+          result.append('<').append(tagContent).append('>');
+          i = tagEnd + 1;
+        } else {
+          // Skip disallowed tag and its content until matching closing tag
+          i = tagEnd + 1;
+          // Track nesting level to find the matching closing tag
+          int nestingLevel = 1;
+          while (i < len && nestingLevel > 0) {
+            int nextTagStart = text.indexOf('<', i);
+            if (nextTagStart < 0) {
+              // No more tags, skip to end
+              i = len;
+              break;
+            }
+            int nextTagEnd = text.indexOf('>', nextTagStart + 1);
+            if (nextTagEnd < 0) {
+              // Unclosed tag, skip to end
+              i = len;
+              break;
+            }
+            String nextTagContent = text.substring(nextTagStart + 1, nextTagEnd);
+            String nextTagName = nextTagContent.trim().split("\\s")[0];
+            if (nextTagContent.startsWith("/")) {
+              // Closing tag
+              nextTagName = nextTagName.substring(1);
+              nextTagName = HtmlLexer.canonicalElementName(nextTagName);
+              if (nextTagName.equals(tagName)) {
+                nestingLevel--;
+                if (nestingLevel == 0) {
+                  // Found matching closing tag, skip it and continue
+                  i = nextTagEnd + 1;
+                  break;
+                }
+              }
+            } else {
+              // Opening tag
+              nextTagName = HtmlLexer.canonicalElementName(nextTagName);
+              if (nextTagName.equals(tagName)) {
+                nestingLevel++;
+              }
+            }
+            i = nextTagEnd + 1;
+          }
+        }
+      }
     }
+    
+    return result.toString();
   }
 
   public void openTag(String elementName, List<String> attrs) {
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
@@ -46,14 +46,19 @@ public class HtmlLexerTest extends TestCase {
   public final void testHtmlLexer() throws Exception {
     // Do the lexing.
     String input = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexerinput1.html").toURI())), StandardCharsets.UTF_8);
+    // Normalize line endings in input to handle Windows/Unix differences
+    input = input.replace("\r\n", "\n").replace("\r", "\n");
     StringBuilder actual = new StringBuilder();
     lex(input, actual);
 
     // Get the golden.
     String golden = new String(Files.readAllBytes(Paths.get(getClass().getResource("htmllexergolden1.txt").toURI())), StandardCharsets.UTF_8);
+    // Normalize line endings to handle Windows/Unix differences
+    golden = golden.replace("\r\n", "\n").replace("\r", "\n");
+    String actualStr = actual.toString().replace("\r\n", "\n").replace("\r", "\n");
 
     // Compare.
-    assertEquals(golden, actual.toString());
+    assertEquals(golden, actualStr);
   }
 
   @Test
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerFuzzerTest.java
@@ -28,16 +28,13 @@
 
 package org.owasp.html;
 
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.List;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
-import java.util.stream.Collectors;
-
-import org.apache.commons.codec.Resources;
 
 /**
  * Throws malformed inputs at the HTML sanitizer to try and crash it.
@@ -62,9 +59,9 @@ public void text(String textChunk) { /* do nothing */ }
       };
 
   public final void testFuzzHtmlParser() throws Exception {
-    String html = new BufferedReader(new InputStreamReader(
-        Resources.getInputStream("benchmark-data/Yahoo!.html"),
-        StandardCharsets.UTF_8)).lines().collect(Collectors.joining()); 
+    String html = new String(Files.readAllBytes(
+        Paths.get(getClass().getResource("/benchmark-data/Yahoo!.html").toURI())),
+        StandardCharsets.UTF_8); 
     int length = html.length();
 
     char[] fuzzyHtml0 = new char[length];
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlSanitizerTest.java
@@ -454,6 +454,63 @@ public static final void testStylingCornerCase() {
     assertEquals(want, sanitize(input));
   }
 
+  @Test
+  public static final void testCVE202566021_1() {
+    // Arrange
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"evil\">XSS?</div></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  @Test
+  public static final void testCVE202566021_2() {
+    // Arrange
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<script>alert('XSS Attack!')</script></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }</style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
+  @Test
+  public static final void testCVE202566021_3() {
+    // Arrange
+    String actualPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
+    String expectedPayload = "<noscript><style>/* user content */.x { font-size: 12px; }<div id=\"good\">ALLOWED?</div></style></noscript>";
+
+    HtmlPolicyBuilder htmlPolicyBuilder = new HtmlPolicyBuilder();
+    PolicyFactory policy = htmlPolicyBuilder
+        .allowElements("style", "noscript", "div")
+        .allowTextIn("style")
+        .toFactory();
+
+    // Act
+    String sanitized = policy.sanitize(actualPayload);
+
+    // Assert
+    assertEquals(expectedPayload, sanitized);
+  }
+
   private static String sanitize(@Nullable String html) {
     StringBuilder sb = new StringBuilder();
     HtmlStreamRenderer renderer = HtmlStreamRenderer.create(