@@ -90,16 +90,19 @@ public final class EncodingSniffer {
90
90
private static final byte [] WHITESPACE = {0x09 , 0x0A , 0x0C , 0x0D , 0x20 , 0x3E };
91
91
private static final byte [] COMMENT_END = {'-' , '-' , '>' };
92
92
93
- /** <a href="http ://encoding.spec.whatwg.org/#encodings">Reference </a> */
93
+ /** <a href="https ://encoding.spec.whatwg.org/#names-and-labels">Encoding names and labels </a> */
94
94
private static final Map <String , String > ENCODING_FROM_LABEL ;
95
95
static {
96
96
ENCODING_FROM_LABEL = new HashMap <>();
97
97
98
98
// The Encoding
99
99
// ------------
100
100
ENCODING_FROM_LABEL .put ("unicode-1-1-utf-8" , "utf-8" );
101
+ ENCODING_FROM_LABEL .put ("unicode11utf8" , "utf-8" );
102
+ ENCODING_FROM_LABEL .put ("unicode20utf8" , "utf-8" );
101
103
ENCODING_FROM_LABEL .put ("utf-8" , "utf-8" );
102
104
ENCODING_FROM_LABEL .put ("utf8" , "utf-8" );
105
+ ENCODING_FROM_LABEL .put ("x-unicode20utf8" , "utf-8" );
103
106
104
107
// Legacy single-byte encodings
105
108
// ----------------------------
@@ -367,8 +370,9 @@ public final class EncodingSniffer {
367
370
ENCODING_FROM_LABEL .put ("csiso2022jp" , "iso-2022-jp" );
368
371
ENCODING_FROM_LABEL .put ("iso-2022-jp" , "iso-2022-jp" );
369
372
370
- // iso-2022-jp
373
+ // shift_jis
371
374
ENCODING_FROM_LABEL .put ("csshiftjis" , "shift_jis" );
375
+ ENCODING_FROM_LABEL .put ("ms932" , "shift_jis" );
372
376
ENCODING_FROM_LABEL .put ("ms_kanji" , "shift_jis" );
373
377
ENCODING_FROM_LABEL .put ("shift-jis" , "shift_jis" );
374
378
ENCODING_FROM_LABEL .put ("shift_jis" , "shift_jis" );
@@ -396,14 +400,22 @@ public final class EncodingSniffer {
396
400
397
401
// replacement
398
402
ENCODING_FROM_LABEL .put ("csiso2022kr" , "replacement" );
403
+ ENCODING_FROM_LABEL .put ("hz-gb-2312" , "replacement" );
399
404
ENCODING_FROM_LABEL .put ("iso-2022-cn" , "replacement" );
400
405
ENCODING_FROM_LABEL .put ("iso-2022-cn-ext" , "replacement" );
401
406
ENCODING_FROM_LABEL .put ("iso-2022-kr" , "replacement" );
407
+ ENCODING_FROM_LABEL .put ("replacement" , "replacement" );
402
408
403
409
// utf-16be
410
+ ENCODING_FROM_LABEL .put ("unicodefffe" , "utf-16be" );
404
411
ENCODING_FROM_LABEL .put ("utf-16be" , "utf-16be" );
405
412
406
413
// utf-16le
414
+ ENCODING_FROM_LABEL .put ("csunicode" , "utf-16le" );
415
+ ENCODING_FROM_LABEL .put ("iso-10646-ucs-2" , "utf-16le" );
416
+ ENCODING_FROM_LABEL .put ("ucs-2" , "utf-16le" );
417
+ ENCODING_FROM_LABEL .put ("unicode" , "utf-16le" );
418
+ ENCODING_FROM_LABEL .put ("unicodefeff" , "utf-16le" );
407
419
ENCODING_FROM_LABEL .put ("utf-16" , "utf-16le" );
408
420
ENCODING_FROM_LABEL .put ("utf-16le" , "utf-16le" );
409
421
0 commit comments