6
6
"errors"
7
7
"fmt"
8
8
"io"
9
+ "path"
10
+ "path/filepath"
9
11
"strings"
10
12
)
11
13
@@ -25,25 +27,23 @@ func RegisterFormat(format Format) {
25
27
// compressed archive files (tar.gz, tar.bz2...). The returned Format
26
28
// value can be type-asserted to ascertain its capabilities.
27
29
//
28
- // If no matching formats were found, special error ErrNoMatch is returned.
30
+ // If no matching formats were found, special error NoMatch is returned.
29
31
//
30
32
// If stream is nil then it will only match on file name and the
31
33
// returned io.Reader will be nil.
32
34
//
33
- // If stream is non-nil then the returned io.Reader will always be
34
- // non-nil and will read from the same point as the reader which was
35
- // passed in. If the input stream is not an io.Seeker, the returned
36
- // io.Reader value should be used in place of the input stream after
37
- // calling Identify() because it preserves and re-reads the bytes that
38
- // were already read during the identification process.
39
- //
40
- // If the input stream is an io.Seeker, Seek() must work, and the
41
- // original input value will be returned instead of a wrapper value.
35
+ // If stream is non-nil, it will be returned in the same read position
36
+ // as it was before Identify() was called, by virtue of buffering the
37
+ // peeked bytes. However, if the stream is an io.Seeker, Seek() must
38
+ // work, no extra buffering will be performed, and the original input
39
+ // value will be returned at the original position by seeking.
42
40
func Identify (ctx context.Context , filename string , stream io.Reader ) (Format , io.Reader , error ) {
43
41
var compression Compression
44
42
var archival Archival
45
43
var extraction Extraction
46
44
45
+ filename = path .Base (filepath .ToSlash (filename ))
46
+
47
47
rewindableStream , err := newRewindReader (stream )
48
48
if err != nil {
49
49
return nil , nil , err
@@ -69,7 +69,7 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
69
69
}
70
70
}
71
71
72
- // try archival and extraction format next
72
+ // try archival and extraction formats next
73
73
for name , format := range formats {
74
74
ar , isArchive := format .(Archival )
75
75
ex , isExtract := format .(Extraction )
@@ -98,8 +98,14 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
98
98
return archival , bufferedStream , nil
99
99
case compression == nil && archival == nil && extraction != nil :
100
100
return extraction , bufferedStream , nil
101
- case archival != nil || extraction != nil :
102
- return Archive {compression , archival , extraction }, bufferedStream , nil
101
+ case compression == nil && archival != nil && extraction != nil :
102
+ // archival and extraction are always set together, so they must be the same
103
+ return archival , bufferedStream , nil
104
+ case compression != nil && extraction != nil :
105
+ // in practice, this is only used for compressed tar files, and the tar format can
106
+ // both read and write, so the archival value should always work too; but keep in
107
+ // mind that Identify() is used on existing files to be read, not new files to write
108
+ return CompressedArchive {archival , extraction , compression }, bufferedStream , nil
103
109
default :
104
110
return nil , bufferedStream , NoMatch
105
111
}
@@ -166,44 +172,43 @@ func readAtMost(stream io.Reader, n int) ([]byte, error) {
166
172
return nil , err
167
173
}
168
174
169
- // Archive represents an archive which may be compressed at the outer layer.
170
- // It combines a compression format on top of an archive/extraction
171
- // format (e.g. ".tar.gz") and provides both functionalities in a single
172
- // type. It ensures that archival functions are wrapped by compressors and
173
- // decompressors. However, compressed archives have some limitations; for
174
- // example, files cannot be inserted/appended because of complexities with
175
- // modifying existing compression state (perhaps this could be overcome,
176
- // but I'm not about to try it).
177
- //
178
- // The embedded Archival and Extraction values are used for writing and
179
- // reading, respectively. Compression is optional and is only needed if the
180
- // format is compressed externally (for example, tar archives).
181
- type Archive struct {
182
- Compression
175
+ // CompressedArchive represents an archive which is compressed externally
176
+ // (for example, a gzipped tar file, .tar.gz.) It combines a compression
177
+ // format on top of an archival/extraction format and provides both
178
+ // functionalities in a single type, allowing archival and extraction
179
+ // operations transparently through compression and decompression. However,
180
+ // compressed archives have some limitations; for example, files cannot be
181
+ // inserted/appended because of complexities with modifying existing
182
+ // compression state (perhaps this could be overcome, but I'm not about to
183
+ // try it).
184
+ type CompressedArchive struct {
183
185
Archival
184
186
Extraction
187
+ Compression
185
188
}
186
189
187
190
// Name returns a concatenation of the archive and compression format extensions.
188
- func (ar Archive ) Extension () string {
191
+ func (ca CompressedArchive ) Extension () string {
189
192
var name string
190
- if ar .Archival != nil {
191
- name += ar .Archival .Extension ()
192
- } else if ar .Extraction != nil {
193
- name += ar .Extraction .Extension ()
194
- }
195
- if ar .Compression != nil {
196
- name += ar .Compression .Extension ()
193
+ if ca .Archival != nil {
194
+ name += ca .Archival .Extension ()
195
+ } else if ca .Extraction != nil {
196
+ name += ca .Extraction .Extension ()
197
197
}
198
+ name += ca .Compression .Extension ()
198
199
return name
199
200
}
200
201
202
+ // MediaType returns the compression format's MIME type, since
203
+ // a compressed archive is fundamentally a compressed file.
204
+ func (ca CompressedArchive ) MediaType () string { return ca .Compression .MediaType () }
205
+
201
206
// Match matches if the input matches both the compression and archival/extraction format.
202
- func (ar Archive ) Match (ctx context.Context , filename string , stream io.Reader ) (MatchResult , error ) {
207
+ func (ca CompressedArchive ) Match (ctx context.Context , filename string , stream io.Reader ) (MatchResult , error ) {
203
208
var conglomerate MatchResult
204
209
205
- if ar .Compression != nil {
206
- matchResult , err := ar .Compression .Match (ctx , filename , stream )
210
+ if ca .Compression != nil {
211
+ matchResult , err := ca .Compression .Match (ctx , filename , stream )
207
212
if err != nil {
208
213
return MatchResult {}, err
209
214
}
@@ -213,7 +218,7 @@ func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader)
213
218
214
219
// wrap the reader with the decompressor so we can
215
220
// attempt to match the archive by reading the stream
216
- rc , err := ar .Compression .OpenReader (stream )
221
+ rc , err := ca .Compression .OpenReader (stream )
217
222
if err != nil {
218
223
return matchResult , err
219
224
}
@@ -223,8 +228,8 @@ func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader)
223
228
conglomerate = matchResult
224
229
}
225
230
226
- if ar .Archival != nil {
227
- matchResult , err := ar .Archival .Match (ctx , filename , stream )
231
+ if ca .Archival != nil {
232
+ matchResult , err := ca .Archival .Match (ctx , filename , stream )
228
233
if err != nil {
229
234
return MatchResult {}, err
230
235
}
@@ -238,33 +243,33 @@ func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader)
238
243
return conglomerate , nil
239
244
}
240
245
241
- // Archive adds files to the output archive while compressing the result.
242
- func (ar Archive ) Archive (ctx context.Context , output io.Writer , files []FileInfo ) error {
243
- if ar .Archival == nil {
246
+ // Archive writes an archive to the output stream while compressing the result.
247
+ func (ca CompressedArchive ) Archive (ctx context.Context , output io.Writer , files []FileInfo ) error {
248
+ if ca .Archival == nil {
244
249
return fmt .Errorf ("no archival format" )
245
250
}
246
- if ar .Compression != nil {
247
- wc , err := ar .Compression .OpenWriter (output )
251
+ if ca .Compression != nil {
252
+ wc , err := ca .Compression .OpenWriter (output )
248
253
if err != nil {
249
254
return err
250
255
}
251
256
defer wc .Close ()
252
257
output = wc
253
258
}
254
- return ar .Archival .Archive (ctx , output , files )
259
+ return ca .Archival .Archive (ctx , output , files )
255
260
}
256
261
257
262
// ArchiveAsync adds files to the output archive while compressing the result asynchronously.
258
- func (ar Archive ) ArchiveAsync (ctx context.Context , output io.Writer , jobs <- chan ArchiveAsyncJob ) error {
259
- if ar .Archival == nil {
263
+ func (ca CompressedArchive ) ArchiveAsync (ctx context.Context , output io.Writer , jobs <- chan ArchiveAsyncJob ) error {
264
+ if ca .Archival == nil {
260
265
return fmt .Errorf ("no archival format" )
261
266
}
262
- do , ok := ar .Archival .(ArchiverAsync )
267
+ do , ok := ca .Archival .(ArchiverAsync )
263
268
if ! ok {
264
- return fmt .Errorf ("%T archive does not support async writing" , ar .Archival )
269
+ return fmt .Errorf ("%T archive does not support async writing" , ca .Archival )
265
270
}
266
- if ar .Compression != nil {
267
- wc , err := ar .Compression .OpenWriter (output )
271
+ if ca .Compression != nil {
272
+ wc , err := ca .Compression .OpenWriter (output )
268
273
if err != nil {
269
274
return err
270
275
}
@@ -274,20 +279,20 @@ func (ar Archive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-cha
274
279
return do .ArchiveAsync (ctx , output , jobs )
275
280
}
276
281
277
- // Extract reads files out of an archive while decompressing the results.
278
- func (ar Archive ) Extract (ctx context.Context , sourceArchive io.Reader , handleFile FileHandler ) error {
279
- if ar .Extraction == nil {
282
+ // Extract reads files out of a compressed archive while decompressing the results.
283
+ func (ca CompressedArchive ) Extract (ctx context.Context , sourceArchive io.Reader , handleFile FileHandler ) error {
284
+ if ca .Extraction == nil {
280
285
return fmt .Errorf ("no extraction format" )
281
286
}
282
- if ar .Compression != nil {
283
- rc , err := ar .Compression .OpenReader (sourceArchive )
287
+ if ca .Compression != nil {
288
+ rc , err := ca .Compression .OpenReader (sourceArchive )
284
289
if err != nil {
285
290
return err
286
291
}
287
292
defer rc .Close ()
288
293
sourceArchive = rc
289
294
}
290
- return ar .Extraction .Extract (ctx , sourceArchive , handleFile )
295
+ return ca .Extraction .Extract (ctx , sourceArchive , handleFile )
291
296
}
292
297
293
298
// MatchResult returns true if the format was matched either
@@ -303,6 +308,10 @@ type MatchResult struct {
303
308
// Matched returns true if a match was made by either name or stream.
304
309
func (mr MatchResult ) Matched () bool { return mr .ByName || mr .ByStream }
305
310
311
+ func (mr MatchResult ) String () string {
312
+ return fmt .Sprintf ("{ByName=%v ByStream=%v}" , mr .ByName , mr .ByStream )
313
+ }
314
+
306
315
// rewindReader is a Reader that can be rewound (reset) to re-read what
307
316
// was already read and then continue to read more from the underlying
308
317
// stream. When no more rewinding is necessary, call reader() to get a
@@ -422,8 +431,10 @@ var formats = make(map[string]Format)
422
431
423
432
// Interface guards
424
433
var (
425
- _ Format = (* Archive )(nil )
426
- _ Archiver = (* Archive )(nil )
427
- _ ArchiverAsync = (* Archive )(nil )
428
- _ Extractor = (* Archive )(nil )
434
+ _ Format = (* CompressedArchive )(nil )
435
+ _ Archiver = (* CompressedArchive )(nil )
436
+ _ ArchiverAsync = (* CompressedArchive )(nil )
437
+ _ Extractor = (* CompressedArchive )(nil )
438
+ _ Compressor = (* CompressedArchive )(nil )
439
+ _ Decompressor = (* CompressedArchive )(nil )
429
440
)
0 commit comments