Skip to content

Commit ab120f0

Browse files
committed
Refactor Archive=>CompressedArchive; implement DeepFS
I don't think an Archive type is necessary, and to my surprise, an optional embedded interface field (Compression) is always non-nil, but will panic if trying to use it unless it is set. Hence we now have CompressedArchive again, where Compression is required. DeepFS is a uniquely useful type as well, allowing one to traverse the file system including archive files (and compressed archive files!) as if they were part of the file system. But it probably a terrible thing to do. :)
1 parent c1ebc84 commit ab120f0

File tree

16 files changed

+306
-79
lines changed

16 files changed

+306
-79
lines changed

7z.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ type SevenZip struct {
3131
Password string
3232
}
3333

34-
func (z SevenZip) Extension() string { return ".7z" }
34+
func (SevenZip) Extension() string { return ".7z" }
35+
func (SevenZip) MediaType() string { return "application/x-7z-compressed" }
3536

3637
func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
3738
var mr MatchResult

brotli.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ type Brotli struct {
1818
}
1919

2020
func (Brotli) Extension() string { return ".br" }
21+
func (Brotli) MediaType() string { return "application/x-br" }
2122

2223
func (br Brotli) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
2324
var mr MatchResult

bz2.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ type Bz2 struct {
1919
}
2020

2121
func (Bz2) Extension() string { return ".bz2" }
22+
func (Bz2) MediaType() string { return "application/x-bzip2" }
2223

2324
func (bz Bz2) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
2425
var mr MatchResult

formats.go

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"errors"
77
"fmt"
88
"io"
9+
"path"
10+
"path/filepath"
911
"strings"
1012
)
1113

@@ -25,25 +27,23 @@ func RegisterFormat(format Format) {
2527
// compressed archive files (tar.gz, tar.bz2...). The returned Format
2628
// value can be type-asserted to ascertain its capabilities.
2729
//
28-
// If no matching formats were found, special error ErrNoMatch is returned.
30+
// If no matching formats were found, special error NoMatch is returned.
2931
//
3032
// If stream is nil then it will only match on file name and the
3133
// returned io.Reader will be nil.
3234
//
33-
// If stream is non-nil then the returned io.Reader will always be
34-
// non-nil and will read from the same point as the reader which was
35-
// passed in. If the input stream is not an io.Seeker, the returned
36-
// io.Reader value should be used in place of the input stream after
37-
// calling Identify() because it preserves and re-reads the bytes that
38-
// were already read during the identification process.
39-
//
40-
// If the input stream is an io.Seeker, Seek() must work, and the
41-
// original input value will be returned instead of a wrapper value.
35+
// If stream is non-nil, it will be returned in the same read position
36+
// as it was before Identify() was called, by virtue of buffering the
37+
// peeked bytes. However, if the stream is an io.Seeker, Seek() must
38+
// work, no extra buffering will be performed, and the original input
39+
// value will be returned at the original position by seeking.
4240
func Identify(ctx context.Context, filename string, stream io.Reader) (Format, io.Reader, error) {
4341
var compression Compression
4442
var archival Archival
4543
var extraction Extraction
4644

45+
filename = path.Base(filepath.ToSlash(filename))
46+
4747
rewindableStream, err := newRewindReader(stream)
4848
if err != nil {
4949
return nil, nil, err
@@ -69,7 +69,7 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
6969
}
7070
}
7171

72-
// try archival and extraction format next
72+
// try archival and extraction formats next
7373
for name, format := range formats {
7474
ar, isArchive := format.(Archival)
7575
ex, isExtract := format.(Extraction)
@@ -98,8 +98,14 @@ func Identify(ctx context.Context, filename string, stream io.Reader) (Format, i
9898
return archival, bufferedStream, nil
9999
case compression == nil && archival == nil && extraction != nil:
100100
return extraction, bufferedStream, nil
101-
case archival != nil || extraction != nil:
102-
return Archive{compression, archival, extraction}, bufferedStream, nil
101+
case compression == nil && archival != nil && extraction != nil:
102+
// archival and extraction are always set together, so they must be the same
103+
return archival, bufferedStream, nil
104+
case compression != nil && extraction != nil:
105+
// in practice, this is only used for compressed tar files, and the tar format can
106+
// both read and write, so the archival value should always work too; but keep in
107+
// mind that Identify() is used on existing files to be read, not new files to write
108+
return CompressedArchive{archival, extraction, compression}, bufferedStream, nil
103109
default:
104110
return nil, bufferedStream, NoMatch
105111
}
@@ -166,44 +172,43 @@ func readAtMost(stream io.Reader, n int) ([]byte, error) {
166172
return nil, err
167173
}
168174

169-
// Archive represents an archive which may be compressed at the outer layer.
170-
// It combines a compression format on top of an archive/extraction
171-
// format (e.g. ".tar.gz") and provides both functionalities in a single
172-
// type. It ensures that archival functions are wrapped by compressors and
173-
// decompressors. However, compressed archives have some limitations; for
174-
// example, files cannot be inserted/appended because of complexities with
175-
// modifying existing compression state (perhaps this could be overcome,
176-
// but I'm not about to try it).
177-
//
178-
// The embedded Archival and Extraction values are used for writing and
179-
// reading, respectively. Compression is optional and is only needed if the
180-
// format is compressed externally (for example, tar archives).
181-
type Archive struct {
182-
Compression
175+
// CompressedArchive represents an archive which is compressed externally
176+
// (for example, a gzipped tar file, .tar.gz.) It combines a compression
177+
// format on top of an archival/extraction format and provides both
178+
// functionalities in a single type, allowing archival and extraction
179+
// operations transparently through compression and decompression. However,
180+
// compressed archives have some limitations; for example, files cannot be
181+
// inserted/appended because of complexities with modifying existing
182+
// compression state (perhaps this could be overcome, but I'm not about to
183+
// try it).
184+
type CompressedArchive struct {
183185
Archival
184186
Extraction
187+
Compression
185188
}
186189

187190
// Name returns a concatenation of the archive and compression format extensions.
188-
func (ar Archive) Extension() string {
191+
func (ca CompressedArchive) Extension() string {
189192
var name string
190-
if ar.Archival != nil {
191-
name += ar.Archival.Extension()
192-
} else if ar.Extraction != nil {
193-
name += ar.Extraction.Extension()
194-
}
195-
if ar.Compression != nil {
196-
name += ar.Compression.Extension()
193+
if ca.Archival != nil {
194+
name += ca.Archival.Extension()
195+
} else if ca.Extraction != nil {
196+
name += ca.Extraction.Extension()
197197
}
198+
name += ca.Compression.Extension()
198199
return name
199200
}
200201

202+
// MediaType returns the compression format's MIME type, since
203+
// a compressed archive is fundamentally a compressed file.
204+
func (ca CompressedArchive) MediaType() string { return ca.Compression.MediaType() }
205+
201206
// Match matches if the input matches both the compression and archival/extraction format.
202-
func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
207+
func (ca CompressedArchive) Match(ctx context.Context, filename string, stream io.Reader) (MatchResult, error) {
203208
var conglomerate MatchResult
204209

205-
if ar.Compression != nil {
206-
matchResult, err := ar.Compression.Match(ctx, filename, stream)
210+
if ca.Compression != nil {
211+
matchResult, err := ca.Compression.Match(ctx, filename, stream)
207212
if err != nil {
208213
return MatchResult{}, err
209214
}
@@ -213,7 +218,7 @@ func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader)
213218

214219
// wrap the reader with the decompressor so we can
215220
// attempt to match the archive by reading the stream
216-
rc, err := ar.Compression.OpenReader(stream)
221+
rc, err := ca.Compression.OpenReader(stream)
217222
if err != nil {
218223
return matchResult, err
219224
}
@@ -223,8 +228,8 @@ func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader)
223228
conglomerate = matchResult
224229
}
225230

226-
if ar.Archival != nil {
227-
matchResult, err := ar.Archival.Match(ctx, filename, stream)
231+
if ca.Archival != nil {
232+
matchResult, err := ca.Archival.Match(ctx, filename, stream)
228233
if err != nil {
229234
return MatchResult{}, err
230235
}
@@ -238,33 +243,33 @@ func (ar Archive) Match(ctx context.Context, filename string, stream io.Reader)
238243
return conglomerate, nil
239244
}
240245

241-
// Archive adds files to the output archive while compressing the result.
242-
func (ar Archive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
243-
if ar.Archival == nil {
246+
// Archive writes an archive to the output stream while compressing the result.
247+
func (ca CompressedArchive) Archive(ctx context.Context, output io.Writer, files []FileInfo) error {
248+
if ca.Archival == nil {
244249
return fmt.Errorf("no archival format")
245250
}
246-
if ar.Compression != nil {
247-
wc, err := ar.Compression.OpenWriter(output)
251+
if ca.Compression != nil {
252+
wc, err := ca.Compression.OpenWriter(output)
248253
if err != nil {
249254
return err
250255
}
251256
defer wc.Close()
252257
output = wc
253258
}
254-
return ar.Archival.Archive(ctx, output, files)
259+
return ca.Archival.Archive(ctx, output, files)
255260
}
256261

257262
// ArchiveAsync adds files to the output archive while compressing the result asynchronously.
258-
func (ar Archive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
259-
if ar.Archival == nil {
263+
func (ca CompressedArchive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-chan ArchiveAsyncJob) error {
264+
if ca.Archival == nil {
260265
return fmt.Errorf("no archival format")
261266
}
262-
do, ok := ar.Archival.(ArchiverAsync)
267+
do, ok := ca.Archival.(ArchiverAsync)
263268
if !ok {
264-
return fmt.Errorf("%T archive does not support async writing", ar.Archival)
269+
return fmt.Errorf("%T archive does not support async writing", ca.Archival)
265270
}
266-
if ar.Compression != nil {
267-
wc, err := ar.Compression.OpenWriter(output)
271+
if ca.Compression != nil {
272+
wc, err := ca.Compression.OpenWriter(output)
268273
if err != nil {
269274
return err
270275
}
@@ -274,20 +279,20 @@ func (ar Archive) ArchiveAsync(ctx context.Context, output io.Writer, jobs <-cha
274279
return do.ArchiveAsync(ctx, output, jobs)
275280
}
276281

277-
// Extract reads files out of an archive while decompressing the results.
278-
func (ar Archive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
279-
if ar.Extraction == nil {
282+
// Extract reads files out of a compressed archive while decompressing the results.
283+
func (ca CompressedArchive) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
284+
if ca.Extraction == nil {
280285
return fmt.Errorf("no extraction format")
281286
}
282-
if ar.Compression != nil {
283-
rc, err := ar.Compression.OpenReader(sourceArchive)
287+
if ca.Compression != nil {
288+
rc, err := ca.Compression.OpenReader(sourceArchive)
284289
if err != nil {
285290
return err
286291
}
287292
defer rc.Close()
288293
sourceArchive = rc
289294
}
290-
return ar.Extraction.Extract(ctx, sourceArchive, handleFile)
295+
return ca.Extraction.Extract(ctx, sourceArchive, handleFile)
291296
}
292297

293298
// MatchResult returns true if the format was matched either
@@ -303,6 +308,10 @@ type MatchResult struct {
303308
// Matched returns true if a match was made by either name or stream.
304309
func (mr MatchResult) Matched() bool { return mr.ByName || mr.ByStream }
305310

311+
func (mr MatchResult) String() string {
312+
return fmt.Sprintf("{ByName=%v ByStream=%v}", mr.ByName, mr.ByStream)
313+
}
314+
306315
// rewindReader is a Reader that can be rewound (reset) to re-read what
307316
// was already read and then continue to read more from the underlying
308317
// stream. When no more rewinding is necessary, call reader() to get a
@@ -422,8 +431,10 @@ var formats = make(map[string]Format)
422431

423432
// Interface guards
424433
var (
425-
_ Format = (*Archive)(nil)
426-
_ Archiver = (*Archive)(nil)
427-
_ ArchiverAsync = (*Archive)(nil)
428-
_ Extractor = (*Archive)(nil)
434+
_ Format = (*CompressedArchive)(nil)
435+
_ Archiver = (*CompressedArchive)(nil)
436+
_ ArchiverAsync = (*CompressedArchive)(nil)
437+
_ Extractor = (*CompressedArchive)(nil)
438+
_ Compressor = (*CompressedArchive)(nil)
439+
_ Decompressor = (*CompressedArchive)(nil)
429440
)

0 commit comments

Comments
 (0)