33
33
import javax .annotation .Nullable ;
34
34
35
35
/** Encoders and decoders for HTML. */
36
- final class Encoding {
36
+ public final class Encoding {
37
37
38
38
/**
39
39
* Decodes HTML entities to produce a string containing only valid
40
40
* Unicode scalar values.
41
+ *
42
+ * @param s text/html
43
+ * @return text/plain
41
44
*/
42
45
public static String decodeHtml (String s ) {
43
46
int firstAmp = s .indexOf ('&' );
@@ -151,11 +154,40 @@ private static int longestPrefixOfGoodCodeunits(String s) {
151
154
return -1 ;
152
155
}
153
156
157
+ /**
158
+ * Appends an encoded form of plainText to output where the encoding is
159
+ * sufficient to prevent an HTML parser from interpreting any characters in
160
+ * the appended chunk as part of an attribute or tag boundary.
161
+ *
162
+ * @param plainText text/plain
163
+ * @param output a buffer of text/html that has a well-formed HTML prefix that
164
+ * ends after the open-quote of an attribute value and does not yet contain
165
+ * a corresponding close quote.
166
+ * Modified in place.
167
+ */
154
168
static void encodeHtmlAttribOnto (String plainText , Appendable output )
155
169
throws IOException {
156
170
encodeHtmlOnto (plainText , output , "{\u200B " );
157
171
}
158
172
173
+ /**
174
+ * Appends an encoded form of plainText to putput where the encoding is
175
+ * sufficient to prevent an HTML parser from transitioning out of the
176
+ * <a href="https://html.spec.whatwg.org/multipage/parsing.html#data-state">
177
+ * Data state</a>.
178
+ *
179
+ * This is suitable for encoding a text node inside any element that does not
180
+ * require special handling as a context element (see "context element" in
181
+ * <a href="https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments">
182
+ * step 4</a>.)
183
+ *
184
+ * @param plainText text/plain
185
+ * @param output a buffer of text/html that has a well-formed HTML prefix that
186
+ * would leave an HTML parser in the Data state if it were to encounter a space
187
+ * character as the next character. In practice this means that the buffer
188
+ * does not contain partial tags or comments, and does not have an unclosed
189
+ * element with a special content model.
190
+ */
159
191
static void encodePcdataOnto (String plainText , Appendable output )
160
192
throws IOException {
161
193
// Avoid problems with client-side template languages like
@@ -166,7 +198,23 @@ static void encodePcdataOnto(String plainText, Appendable output)
166
198
encodeHtmlOnto (plainText , output , "{<!-- -->" );
167
199
}
168
200
169
- static void encodeRcdataOnto (String plainText , Appendable output )
201
+ /**
202
+ * Appends an encoded form of plainText to putput where the encoding is
203
+ * sufficient to prevent an HTML parser from transitioning out of the
204
+ * <a href="https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state">
205
+ * RCDATA state</a>.
206
+ *
207
+ * This is suitable for encoding a text node inside a {@code <textarea>} or
208
+ * {@code <title>} element outside foreign content.
209
+ *
210
+ * @param plainText text/plain
211
+ * @param output a buffer of text/html that has a well-formed HTML prefix that
212
+ * would leave an HTML parser in the Data state if it were to encounter a space
213
+ * character as the next character. In practice this means that the buffer
214
+ * does not contain partial tags or comments, and the most recently opened
215
+ * element is `<textarea>` or `<title>` and that element is still open.
216
+ */
217
+ public static void encodeRcdataOnto (String plainText , Appendable output )
170
218
throws IOException {
171
219
// Avoid problems with client-side template languages like
172
220
// Angular & Polymer which attach special significance to text like
@@ -316,7 +364,7 @@ static void appendNumericEntity(int codepoint, Appendable output)
316
364
};
317
365
318
366
/** Maps ASCII chars that need to be encoded to an equivalent HTML entity. */
319
- static final String [] REPLACEMENTS = new String [0x80 ];
367
+ private static final String [] REPLACEMENTS = new String [0x80 ];
320
368
static {
321
369
for (int i = 0 ; i < ' ' ; ++i ) {
322
370
// We elide control characters so that we can ensure that our output is
@@ -342,8 +390,8 @@ static void appendNumericEntity(int codepoint, Appendable output)
342
390
}
343
391
344
392
/**
345
- * {@code DECODES_TO_SELF[c]} is true iff the codepoint c decodes to itself in
346
- * an HTML5 text node or properly quoted attribute value .
393
+ * IS_BANNED_ASCII[i] where is an ASCII control character codepoint (< 0x20)
394
+ * is true for control characters that are not allowed in an XML source text .
347
395
*/
348
396
private static boolean [] IS_BANNED_ASCII = new boolean [0x20 ];
349
397
static {
0 commit comments