detect bzip2/xz; add test data

Signed-off-by: liangchenye <liangchenye@huawei.com>
This commit is contained in:
liangchenye 2015-12-15 16:08:27 +08:00
parent 354c4b3672
commit 8b649af666
4 changed files with 89 additions and 18 deletions

View File

@ -18,10 +18,12 @@ import (
"archive/tar" "archive/tar"
"bufio" "bufio"
"bytes" "bytes"
"compress/bzip2"
"compress/gzip" "compress/gzip"
"errors" "errors"
"io" "io"
"io/ioutil" "io/ioutil"
"os/exec"
"strings" "strings"
) )
@ -32,19 +34,75 @@ var (
// ErrExtractedFileTooBig occurs when a file to extract is too big. // ErrExtractedFileTooBig occurs when a file to extract is too big.
ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big") ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big")
readLen = 6 // max bytes to sniff
gzipHeader = []byte{0x1f, 0x8b} gzipHeader = []byte{0x1f, 0x8b}
bzip2Header = []byte{0x42, 0x5a, 0x68}
xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}
) )
// XzReader is an io.ReadCloser which decompresses xz compressed data.
type XzReader struct {
io.ReadCloser
cmd *exec.Cmd
closech chan error
}
// NewXzReader shells out to a command line xz executable (if
// available) to decompress the given io.Reader using the xz
// compression format and returns an *XzReader.
// It is the caller's responsibility to call Close on the XzReader when done.
func NewXzReader(r io.Reader) (*XzReader, error) {
rpipe, wpipe := io.Pipe()
ex, err := exec.LookPath("xz")
if err != nil {
return nil, err
}
cmd := exec.Command(ex, "--decompress", "--stdout")
closech := make(chan error)
cmd.Stdin = r
cmd.Stdout = wpipe
go func() {
err := cmd.Run()
wpipe.CloseWithError(err)
closech <- err
}()
return &XzReader{rpipe, cmd, closech}, nil
}
func (r *XzReader) Close() error {
r.ReadCloser.Close()
r.cmd.Process.Kill()
return <-r.closech
}
// TarReadCloser embeds a *tar.Reader and the related io.Closer
// It is the caller's responsibility to call Close on TarReadCloser when
// done.
type TarReadCloser struct {
*tar.Reader
io.Closer
}
func (r *TarReadCloser) Close() error {
return r.Closer.Close()
}
// SelectivelyExtractArchive extracts the specified files and folders // SelectivelyExtractArchive extracts the specified files and folders
// from targz data read from the given reader and store them in a map indexed by file paths // from targz data read from the given reader and store them in a map indexed by file paths
func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) { func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) {
data := make(map[string][]byte) data := make(map[string][]byte)
// Create a tar or tar/tar-gzip reader // Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader
tr, err := getTarReader(r) tr, err := getTarReader(r)
if err != nil { if err != nil {
return data, ErrCouldNotExtract return data, ErrCouldNotExtract
} }
defer tr.Close()
// For each element in the archive // For each element in the archive
for { for {
@ -89,22 +147,35 @@ func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, m
return data, nil return data, nil
} }
// getTarReader returns a tar.Reader associated with the specified io.Reader, // getTarReader returns a TarReaderCloser associated with the specified io.Reader.
// optionally backed by a gzip.Reader if gzip compression is detected.
// //
// Gzip detection is done by using the magic numbers defined in the RFC1952 : // Gzip/Bzip2/XZ detection is done by using the magic numbers:
// the first two bytes should be 0x1f and 0x8b.. // Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952.
func getTarReader(r io.Reader) (*tar.Reader, error) { // Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC.
// XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC.
func getTarReader(r io.Reader) (*TarReadCloser, error) {
br := bufio.NewReader(r) br := bufio.NewReader(r)
header, err := br.Peek(2) header, err := br.Peek(readLen)
if err == nil {
if err == nil && bytes.Equal(header, gzipHeader) { switch {
case bytes.HasPrefix(header, gzipHeader):
gr, err := gzip.NewReader(br) gr, err := gzip.NewReader(br)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return tar.NewReader(gr), nil return &TarReadCloser{tar.NewReader(gr), gr}, nil
case bytes.HasPrefix(header, bzip2Header):
bzip2r := ioutil.NopCloser(bzip2.NewReader(br))
return &TarReadCloser{tar.NewReader(bzip2r), bzip2r}, nil
case bytes.HasPrefix(header, xzHeader):
xzr, err := NewXzReader(br)
if err != nil {
return nil, err
}
return &TarReadCloser{tar.NewReader(xzr), xzr}, nil
}
} }
return tar.NewReader(br), nil dr := ioutil.NopCloser(br)
return &TarReadCloser{tar.NewReader(dr), dr}, nil
} }

BIN
utils/testdata/utils_test.tar.bz2 vendored Normal file

Binary file not shown.

BIN
utils/testdata/utils_test.tar.xz vendored Normal file

Binary file not shown.

View File

@ -60,9 +60,9 @@ func TestTar(t *testing.T) {
var err error var err error
var data map[string][]byte var data map[string][]byte
_, filepath, _, _ := runtime.Caller(0) _, filepath, _, _ := runtime.Caller(0)
testDataDir := "/testdata"
for _, filename := range []string{"/testdata/utils_test.tar.gz", "/testdata/utils_test.tar"} { for _, filename := range []string{"utils_test.tar.gz", "utils_test.tar.bz2", "utils_test.tar.xz", "utils_test.tar"} {
testArchivePath := path.Join(path.Dir(filepath)) + filename testArchivePath := path.Join(path.Dir(filepath), testDataDir, filename)
// Extract non compressed data // Extract non compressed data
data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0) data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0)