@@ -94,8 +94,163 @@ public void closeDocument() {
9494
9595 public void text (String textChunk ) {
9696 if (!skipText ) {
97- out .text (textChunk );
97+ // Check if we're inside a CDATA element (style/script) with allowTextIn
98+ // where tags are reclassified as UNESCAPED text and need to be validated
99+ // Note: Only style and script are CDATA elements; noscript/noembed/noframes are PCDATA
100+ boolean insideCdataElement = false ;
101+ for (int i = openElementStack .size () - 1 ; i >= 0 ; i -= 2 ) {
102+ String adjustedName = openElementStack .get (i );
103+ if (adjustedName != null
104+ && allowedTextContainers .contains (adjustedName )
105+ && ("style" .equals (adjustedName ) || "script" .equals (adjustedName ))) {
106+ insideCdataElement = true ;
107+ break ;
108+ }
109+ }
110+
111+ // If inside a CDATA element (style/script) with allowTextIn, we need to filter out
112+ // HTML tags that aren't allowed because tags inside these blocks are reclassified
113+ // as UNESCAPED text by the lexer
114+ if (insideCdataElement && textChunk != null && textChunk .indexOf ('<' ) >= 0 ) {
115+ // Strip out HTML tags that aren't in the allowed elements list
116+ String filtered = stripDisallowedTags (textChunk );
117+ out .text (filtered );
118+ } else {
119+ out .text (textChunk );
120+ }
121+ }
122+ }
123+
124+ /**
125+ * Strips out HTML tags that aren't in the allowed elements list from text content.
126+ * This is used when tags appear inside text containers (like style blocks) where
127+ * they're treated as text but should still be validated.
128+ */
129+ private String stripDisallowedTags (String text ) {
130+ if (text == null ) {
131+ return text ;
132+ }
133+
134+ StringBuilder result = new StringBuilder ();
135+ int len = text .length ();
136+ int i = 0 ;
137+
138+ while (i < len ) {
139+ int tagStart = text .indexOf ('<' , i );
140+ if (tagStart < 0 ) {
141+ // No more tags, append the rest
142+ result .append (text .substring (i ));
143+ break ;
144+ }
145+
146+ // Append text before the tag
147+ if (tagStart > i ) {
148+ result .append (text .substring (i , tagStart ));
149+ }
150+
151+ // Find the end of the tag (either '>' or end of string)
152+ int tagEnd = text .indexOf ('>' , tagStart + 1 );
153+ if (tagEnd < 0 ) {
154+ // Unclosed tag, skip it
155+ i = tagStart + 1 ;
156+ continue ;
157+ }
158+
159+ // Extract the tag content (between < and >)
160+ String tagContent = text .substring (tagStart + 1 , tagEnd );
161+
162+ // Only process if this looks like a valid HTML element tag
163+ // Valid tags start with a letter or / followed by a letter
164+ // Skip things like <, </>, <3, etc.
165+ boolean isValidTag = false ;
166+ String tagName = null ;
167+
168+ if (tagContent .startsWith ("/" )) {
169+ // Closing tag - must have / followed by a letter
170+ if (tagContent .length () > 1 ) {
171+ char firstChar = tagContent .charAt (1 );
172+ if (Character .isLetter (firstChar )) {
173+ isValidTag = true ;
174+ tagName = tagContent .substring (1 ).trim ().split ("\\ s" )[0 ];
175+ tagName = HtmlLexer .canonicalElementName (tagName );
176+ }
177+ }
178+ } else {
179+ // Opening tag - must start with a letter
180+ char firstChar = tagContent .charAt (0 );
181+ if (Character .isLetter (firstChar )) {
182+ isValidTag = true ;
183+ tagName = tagContent .trim ().split ("\\ s" )[0 ];
184+ tagName = HtmlLexer .canonicalElementName (tagName );
185+ }
186+ }
187+
188+ if (!isValidTag ) {
189+ // Not a valid HTML tag, just append it as-is
190+ result .append ('<' ).append (tagContent ).append ('>' );
191+ i = tagEnd + 1 ;
192+ continue ;
193+ }
194+
195+ // Check if it's a closing tag
196+ if (tagContent .startsWith ("/" )) {
197+ // Only allow closing tags if the element is allowed
198+ if (elAndAttrPolicies .containsKey (tagName )) {
199+ result .append ('<' ).append (tagContent ).append ('>' );
200+ }
201+ // Otherwise skip the closing tag
202+ i = tagEnd + 1 ;
203+ } else {
204+ // Opening tag - only allow tags if the element is in the allowed list
205+ if (elAndAttrPolicies .containsKey (tagName )) {
206+ result .append ('<' ).append (tagContent ).append ('>' );
207+ i = tagEnd + 1 ;
208+ } else {
209+ // Skip disallowed tag and its content until matching closing tag
210+ i = tagEnd + 1 ;
211+ // Track nesting level to find the matching closing tag
212+ int nestingLevel = 1 ;
213+ while (i < len && nestingLevel > 0 ) {
214+ int nextTagStart = text .indexOf ('<' , i );
215+ if (nextTagStart < 0 ) {
216+ // No more tags, skip to end
217+ i = len ;
218+ break ;
219+ }
220+ int nextTagEnd = text .indexOf ('>' , nextTagStart + 1 );
221+ if (nextTagEnd < 0 ) {
222+ // Unclosed tag, skip to end
223+ i = len ;
224+ break ;
225+ }
226+ String nextTagContent = text .substring (nextTagStart + 1 , nextTagEnd );
227+ String nextTagName = nextTagContent .trim ().split ("\\ s" )[0 ];
228+ if (nextTagContent .startsWith ("/" )) {
229+ // Closing tag
230+ nextTagName = nextTagName .substring (1 );
231+ nextTagName = HtmlLexer .canonicalElementName (nextTagName );
232+ if (nextTagName .equals (tagName )) {
233+ nestingLevel --;
234+ if (nestingLevel == 0 ) {
235+ // Found matching closing tag, skip it and continue
236+ i = nextTagEnd + 1 ;
237+ break ;
238+ }
239+ }
240+ } else {
241+ // Opening tag
242+ nextTagName = HtmlLexer .canonicalElementName (nextTagName );
243+ if (nextTagName .equals (tagName )) {
244+ nestingLevel ++;
245+ }
246+ }
247+ i = nextTagEnd + 1 ;
248+ }
249+ }
250+ }
98251 }
252+
253+ return result .toString ();
99254 }
100255
101256 public void openTag (String elementName , List <String > attrs ) {
0 commit comments