53
53
*
54
54
* <p>Please make sure to read and understand the context that the method encodes
55
55
* for. Encoding for the incorrect context will likely lead to exposing a
56
- * cross-site scripting vulnerability.</p>
56
+ * cross-site scripting vulnerability. Those new to XSS mitigation may find it
57
+ * useful to read the
58
+ * <a href="https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html">
59
+ * Cross Site Scripting Prevention Cheat Sheet</a> that is part of the OWASP Cheat Sheet series for background
60
+ * material.
61
+ * </p>
57
62
*
58
63
* @author Jeff Ichnowski
59
64
*/
@@ -66,7 +71,7 @@ private Encode() {}
66
71
* this method encodes for both contexts, it may be slightly less
67
72
* efficient to use this method over the methods targeted towards
68
73
* the specific contexts ({@link #forHtmlAttribute(String)} and
69
- * {@link #forHtmlContent(String)}. In general this method should
74
+ * {@link #forHtmlContent(String)}) . In general this method should
70
75
* be preferred unless you are really concerned with saving a few
71
76
* bytes or are writing a framework that utilizes this
72
77
* package.</p>
@@ -155,7 +160,7 @@ public static void forHtml(Writer out, String input) throws IOException {
155
160
/**
156
161
* <p>This method encodes for HTML text content. It does not escape
157
162
* quotation characters and is thus unsafe for use with
158
- * HTML attributes. Use either forHtml or forHtmlAttribute for those
163
+ * HTML attributes. Use either {@link # forHtml(String)} or {@link # forHtmlAttribute(String)} for those
159
164
* methods.</p>
160
165
*
161
166
* <b>Example JSP Usage</b>
@@ -232,7 +237,9 @@ public static void forHtmlContent(Writer out, String input)
232
237
}
233
238
234
239
/**
235
- * <p>This method encodes for HTML text attributes.</p>
240
+ * <p>This method encodes for HTML text attributes. Do not use for JavaScript event attributes or for attributes
241
+ * that are interpreted as a URL. Instead use {@link #forJavaScript(String)} and {@link #forUriComponent(String)}
242
+ * respectively for those.</p>
236
243
*
237
244
* <b>Example JSP Usage</b>
238
245
* <pre>
@@ -472,31 +479,29 @@ public static void forHtmlUnquotedAttribute(Writer out, String input)
472
479
* <b>Encoding Notes</b>
473
480
* <ul>
474
481
*
475
- * <li>The following characters are encoded using hexidecimal
482
+ * <li>The following characters are encoded using hexadecimal
476
483
* encodings: {@code U+0000} - {@code U+001f},
477
484
* {@code "},
478
485
* {@code '},
479
486
* {@code \},
480
487
* {@code <},
481
488
* {@code &},
482
- * {@code (},
483
- * {@code )},
484
489
* {@code /},
485
490
* {@code >},
486
491
* {@code U+007f},
487
492
* line separator ({@code U+2028}),
488
493
* paragraph separator ({@code U+2029}).</li>
489
494
*
490
495
* <li>Any character requiring encoding is encoded as {@code \xxx}
491
- * where {@code xxx} is the shortest hexidecimal representation of
496
+ * where {@code xxx} is the shortest hexadecimal representation of
492
497
* its Unicode code point (after decoding surrogate pairs if
493
498
* necessary). This encoding is never zero padded. Thus, for
494
499
* example, the tab character is encoded as {@code \9}, not {@code
495
500
* \0009}.</li>
496
501
*
497
502
* <li>The encoder looks ahead 1 character in the input and
498
503
* appends a space to an encoding to avoid the next character
499
- * becoming part of the hexidecimal encoded sequence. Thus
504
+ * becoming part of the hexadecimal encoded sequence. Thus
500
505
* “{@code '1}” is encoded as “{@code \27
501
506
* 1}”, and not as “{@code \271}”. If a space
502
507
* is not necessary, it is not included, thus “{@code
@@ -544,13 +549,13 @@ public static void forCssString(Writer out, String input)
544
549
* <div style="background:url(<=Encode.forCssUrl(...)%>);">
545
550
*
546
551
* <style type="text/css">
547
- * background: url(<%=Encode.forCssUrl(...)%>);
552
+ * background: url(' <%=Encode.forCssUrl(...)%>' );
548
553
* </style>
549
554
* </pre>
550
555
* <b>Encoding Notes</b>
551
556
* <ul>
552
557
*
553
- * <li>The following characters are encoded using hexidecimal
558
+ * <li>The following characters are encoded using hexadecimal
554
559
* encodings: {@code U+0000} - {@code U+001f},
555
560
* {@code "},
556
561
* {@code '},
@@ -564,15 +569,15 @@ public static void forCssString(Writer out, String input)
564
569
* paragraph separator ({@code U+2029}).</li>
565
570
*
566
571
* <li>Any character requiring encoding is encoded as {@code \xxx}
567
- * where {@code xxx} is the shortest hexidecimal representation of
572
+ * where {@code xxx} is the shortest hexadecimal representation of
568
573
* its Unicode code point (after decoding surrogate pairs if
569
574
* necessary). This encoding is never zero padded. Thus, for
570
575
* example, the tab character is encoded as {@code \9}, not {@code
571
576
* \0009}.</li>
572
577
*
573
578
* <li>The encoder looks ahead 1 character in the input and
574
579
* appends a space to an encoding to avoid the next character
575
- * becoming part of the hexidecimal encoded sequence. Thus
580
+ * becoming part of the hexadecimal encoded sequence. Thus
576
581
* “{@code '1}” is encoded as “{@code \27
577
582
* 1}”, and not as “{@code \271}”. If a space
578
583
* is not necessary, it is not included, thus “{@code
@@ -639,7 +644,7 @@ public static void forCssUrl(Writer out, String input)
639
644
* <li>URL encoding is an encoding for bytes, not unicode. The
640
645
* input string is thus first encoded as a sequence of UTF-8
641
646
* byte. The bytes are then encoded as {@code %xx} where {@code
642
- * xx} is the two-digit hexidecimal representation of the
647
+ * xx} is the two-digit hexadecimal representation of the
643
648
* byte. (The implementation does this as one step for
644
649
* performance.)</li>
645
650
*
@@ -690,7 +695,7 @@ public static void forCssUrl(Writer out, String input)
690
695
* <p>The following characters are <i>not</i> encoded:</p>
691
696
* <pre>
692
697
* U+20: - . 0 1 2 3 4 5 6 7 8 9
693
- * U+40: @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _
698
+ * U+40: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _
694
699
* U+60: a b c d e f g h i j k l m n o p q r s t u v w x y z ~
695
700
* </pre>
696
701
*
@@ -704,7 +709,7 @@ public static void forCssUrl(Writer out, String input)
704
709
* <li>URL encoding is an encoding for bytes, not unicode. The
705
710
* input string is thus first encoded as a sequence of UTF-8
706
711
* byte. The bytes are then encoded as {@code %xx} where {@code
707
- * xx} is the two-digit hexidecimal representation of the
712
+ * xx} is the two-digit hexadecimal representation of the
708
713
* byte. (The implementation does this as one step for
709
714
* performance.)</li>
710
715
*
@@ -937,7 +942,7 @@ public static void forJava(Writer out, String input)
937
942
* provide the surrounding quotation characters for the string.
938
943
* Since this performs additional encoding so it can work in all
939
944
* of the JavaScript contexts listed, it may be slightly less
940
- * efficient than using one of the methods targetted to a specific
945
+ * efficient than using one of the methods targeted to a specific
941
946
* JavaScript context ({@link #forJavaScriptAttribute(String)},
942
947
* {@link #forJavaScriptBlock}, {@link #forJavaScriptSource}).
943
948
* Unless you are interested in saving a few bytes of output or
0 commit comments