@@ -12,18 +12,12 @@ import (
12
12
"strings"
13
13
14
14
szip "github.com/STARRY-S/zip"
15
+ "golang.org/x/text/encoding"
15
16
16
17
"github.com/dsnet/compress/bzip2"
17
18
"github.com/klauspost/compress/zip"
18
19
"github.com/klauspost/compress/zstd"
19
20
"github.com/ulikunitz/xz"
20
- "golang.org/x/text/encoding"
21
- "golang.org/x/text/encoding/charmap"
22
- "golang.org/x/text/encoding/japanese"
23
- "golang.org/x/text/encoding/korean"
24
- "golang.org/x/text/encoding/simplifiedchinese"
25
- "golang.org/x/text/encoding/traditionalchinese"
26
- "golang.org/x/text/encoding/unicode"
27
21
)
28
22
29
23
func init () {
@@ -80,7 +74,7 @@ type Zip struct {
80
74
// For files in zip archives that do not have UTF-8
81
75
// encoded filenames and comments, specify the character
82
76
// encoding here.
83
- TextEncoding string
77
+ TextEncoding encoding. Encoding
84
78
}
85
79
86
80
func (z Zip ) Extension () string { return ".zip" }
@@ -94,11 +88,16 @@ func (z Zip) Match(_ context.Context, filename string, stream io.Reader) (MatchR
94
88
}
95
89
96
90
// match file header
97
- buf , err := readAtMost (stream , len (zipHeader ))
98
- if err != nil {
99
- return mr , err
91
+ for _ , hdr := range zipHeaders {
92
+ buf , err := readAtMost (stream , len (hdr ))
93
+ if err != nil {
94
+ return mr , err
95
+ }
96
+ if bytes .Equal (buf , hdr ) {
97
+ mr .ByStream = true
98
+ break
99
+ }
100
100
}
101
- mr .ByStream = bytes .Equal (buf , zipHeader )
102
101
103
102
return mr , nil
104
103
}
@@ -255,13 +254,14 @@ func (z Zip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile Fi
255
254
// It is a no-op if the text is already UTF-8 encoded or if z.TextEncoding
256
255
// is not specified.
257
256
func (z Zip ) decodeText (hdr * zip.FileHeader ) {
258
- if hdr .NonUTF8 && z .TextEncoding != "" {
259
- filename , err := decodeText (hdr .Name , z .TextEncoding )
257
+ if hdr .NonUTF8 && z .TextEncoding != nil {
258
+ dec := z .TextEncoding .NewDecoder ()
259
+ filename , err := dec .String (hdr .Name )
260
260
if err == nil {
261
261
hdr .Name = filename
262
262
}
263
263
if hdr .Comment != "" {
264
- comment , err := decodeText (hdr .Comment , z . TextEncoding )
264
+ comment , err := dec . String (hdr .Comment )
265
265
if err == nil {
266
266
hdr .Comment = comment
267
267
}
@@ -384,58 +384,11 @@ var compressedFormats = map[string]struct{}{
384
384
".zipx" : {},
385
385
}
386
386
387
- var encodings = map [string ]encoding.Encoding {
388
- "ibm866" : charmap .CodePage866 ,
389
- "iso8859_2" : charmap .ISO8859_2 ,
390
- "iso8859_3" : charmap .ISO8859_3 ,
391
- "iso8859_4" : charmap .ISO8859_4 ,
392
- "iso8859_5" : charmap .ISO8859_5 ,
393
- "iso8859_6" : charmap .ISO8859_6 ,
394
- "iso8859_7" : charmap .ISO8859_7 ,
395
- "iso8859_8" : charmap .ISO8859_8 ,
396
- "iso8859_8I" : charmap .ISO8859_8I ,
397
- "iso8859_10" : charmap .ISO8859_10 ,
398
- "iso8859_13" : charmap .ISO8859_13 ,
399
- "iso8859_14" : charmap .ISO8859_14 ,
400
- "iso8859_15" : charmap .ISO8859_15 ,
401
- "iso8859_16" : charmap .ISO8859_16 ,
402
- "koi8r" : charmap .KOI8R ,
403
- "koi8u" : charmap .KOI8U ,
404
- "macintosh" : charmap .Macintosh ,
405
- "windows874" : charmap .Windows874 ,
406
- "windows1250" : charmap .Windows1250 ,
407
- "windows1251" : charmap .Windows1251 ,
408
- "windows1252" : charmap .Windows1252 ,
409
- "windows1253" : charmap .Windows1253 ,
410
- "windows1254" : charmap .Windows1254 ,
411
- "windows1255" : charmap .Windows1255 ,
412
- "windows1256" : charmap .Windows1256 ,
413
- "windows1257" : charmap .Windows1257 ,
414
- "windows1258" : charmap .Windows1258 ,
415
- "macintoshcyrillic" : charmap .MacintoshCyrillic ,
416
- "gbk" : simplifiedchinese .GBK ,
417
- "gb18030" : simplifiedchinese .GB18030 ,
418
- "big5" : traditionalchinese .Big5 ,
419
- "eucjp" : japanese .EUCJP ,
420
- "iso2022jp" : japanese .ISO2022JP ,
421
- "shiftjis" : japanese .ShiftJIS ,
422
- "euckr" : korean .EUCKR ,
423
- "utf16be" : unicode .UTF16 (unicode .BigEndian , unicode .IgnoreBOM ),
424
- "utf16le" : unicode .UTF16 (unicode .LittleEndian , unicode .IgnoreBOM ),
387
+ var zipHeaders = [][]byte {
388
+ []byte ("PK\x03 \x04 " ), // normal
389
+ []byte ("PK\x05 \x06 " ), // empty
425
390
}
426
391
427
- // decodeText returns UTF-8 encoded text from the given charset.
428
- // Thanks to @zxdvd for contributing non-UTF-8 encoding logic in
429
- // #149, and to @pashifika for helping in #305.
430
- func decodeText (input , charset string ) (string , error ) {
431
- if enc , ok := encodings [charset ]; ok {
432
- return enc .NewDecoder ().String (input )
433
- }
434
- return "" , fmt .Errorf ("unrecognized charset %s" , charset )
435
- }
436
-
437
- var zipHeader = []byte ("PK\x03 \x04 " ) // NOTE: headers of empty zip files might end with 0x05,0x06 or 0x06,0x06 instead of 0x03,0x04
438
-
439
392
// Interface guards
440
393
var (
441
394
_ Archiver = Zip {}
0 commit comments