detect bzip2/xz; add test data
Signed-off-by: liangchenye <liangchenye@huawei.com>
This commit is contained in:
parent
354c4b3672
commit
8b649af666
93
utils/tar.go
93
utils/tar.go
@ -18,10 +18,12 @@ import (
|
|||||||
"archive/tar"
|
"archive/tar"
|
||||||
"bufio"
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"compress/bzip2"
|
||||||
"compress/gzip"
|
"compress/gzip"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -32,19 +34,75 @@ var (
|
|||||||
// ErrExtractedFileTooBig occurs when a file to extract is too big.
|
// ErrExtractedFileTooBig occurs when a file to extract is too big.
|
||||||
ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big")
|
ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big")
|
||||||
|
|
||||||
|
readLen = 6 // max bytes to sniff
|
||||||
|
|
||||||
gzipHeader = []byte{0x1f, 0x8b}
|
gzipHeader = []byte{0x1f, 0x8b}
|
||||||
|
bzip2Header = []byte{0x42, 0x5a, 0x68}
|
||||||
|
xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// XzReader is an io.ReadCloser which decompresses xz compressed data.
|
||||||
|
type XzReader struct {
|
||||||
|
io.ReadCloser
|
||||||
|
cmd *exec.Cmd
|
||||||
|
closech chan error
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewXzReader shells out to a command line xz executable (if
|
||||||
|
// available) to decompress the given io.Reader using the xz
|
||||||
|
// compression format and returns an *XzReader.
|
||||||
|
// It is the caller's responsibility to call Close on the XzReader when done.
|
||||||
|
func NewXzReader(r io.Reader) (*XzReader, error) {
|
||||||
|
rpipe, wpipe := io.Pipe()
|
||||||
|
ex, err := exec.LookPath("xz")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
cmd := exec.Command(ex, "--decompress", "--stdout")
|
||||||
|
|
||||||
|
closech := make(chan error)
|
||||||
|
|
||||||
|
cmd.Stdin = r
|
||||||
|
cmd.Stdout = wpipe
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
err := cmd.Run()
|
||||||
|
wpipe.CloseWithError(err)
|
||||||
|
closech <- err
|
||||||
|
}()
|
||||||
|
|
||||||
|
return &XzReader{rpipe, cmd, closech}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *XzReader) Close() error {
|
||||||
|
r.ReadCloser.Close()
|
||||||
|
r.cmd.Process.Kill()
|
||||||
|
return <-r.closech
|
||||||
|
}
|
||||||
|
|
||||||
|
// TarReadCloser embeds a *tar.Reader and the related io.Closer
|
||||||
|
// It is the caller's responsibility to call Close on TarReadCloser when
|
||||||
|
// done.
|
||||||
|
type TarReadCloser struct {
|
||||||
|
*tar.Reader
|
||||||
|
io.Closer
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *TarReadCloser) Close() error {
|
||||||
|
return r.Closer.Close()
|
||||||
|
}
|
||||||
|
|
||||||
// SelectivelyExtractArchive extracts the specified files and folders
|
// SelectivelyExtractArchive extracts the specified files and folders
|
||||||
// from targz data read from the given reader and store them in a map indexed by file paths
|
// from targz data read from the given reader and store them in a map indexed by file paths
|
||||||
func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) {
|
func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) {
|
||||||
data := make(map[string][]byte)
|
data := make(map[string][]byte)
|
||||||
|
|
||||||
// Create a tar or tar/tar-gzip reader
|
// Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader
|
||||||
tr, err := getTarReader(r)
|
tr, err := getTarReader(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return data, ErrCouldNotExtract
|
return data, ErrCouldNotExtract
|
||||||
}
|
}
|
||||||
|
defer tr.Close()
|
||||||
|
|
||||||
// For each element in the archive
|
// For each element in the archive
|
||||||
for {
|
for {
|
||||||
@ -89,22 +147,35 @@ func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, m
|
|||||||
return data, nil
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// getTarReader returns a tar.Reader associated with the specified io.Reader,
|
// getTarReader returns a TarReaderCloser associated with the specified io.Reader.
|
||||||
// optionally backed by a gzip.Reader if gzip compression is detected.
|
|
||||||
//
|
//
|
||||||
// Gzip detection is done by using the magic numbers defined in the RFC1952 :
|
// Gzip/Bzip2/XZ detection is done by using the magic numbers:
|
||||||
// the first two bytes should be 0x1f and 0x8b..
|
// Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952.
|
||||||
func getTarReader(r io.Reader) (*tar.Reader, error) {
|
// Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC.
|
||||||
|
// XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC.
|
||||||
|
func getTarReader(r io.Reader) (*TarReadCloser, error) {
|
||||||
br := bufio.NewReader(r)
|
br := bufio.NewReader(r)
|
||||||
header, err := br.Peek(2)
|
header, err := br.Peek(readLen)
|
||||||
|
if err == nil {
|
||||||
if err == nil && bytes.Equal(header, gzipHeader) {
|
switch {
|
||||||
|
case bytes.HasPrefix(header, gzipHeader):
|
||||||
gr, err := gzip.NewReader(br)
|
gr, err := gzip.NewReader(br)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return tar.NewReader(gr), nil
|
return &TarReadCloser{tar.NewReader(gr), gr}, nil
|
||||||
|
case bytes.HasPrefix(header, bzip2Header):
|
||||||
|
bzip2r := ioutil.NopCloser(bzip2.NewReader(br))
|
||||||
|
return &TarReadCloser{tar.NewReader(bzip2r), bzip2r}, nil
|
||||||
|
case bytes.HasPrefix(header, xzHeader):
|
||||||
|
xzr, err := NewXzReader(br)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &TarReadCloser{tar.NewReader(xzr), xzr}, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return tar.NewReader(br), nil
|
dr := ioutil.NopCloser(br)
|
||||||
|
return &TarReadCloser{tar.NewReader(dr), dr}, nil
|
||||||
}
|
}
|
||||||
|
BIN
utils/testdata/utils_test.tar.bz2
vendored
Normal file
BIN
utils/testdata/utils_test.tar.bz2
vendored
Normal file
Binary file not shown.
BIN
utils/testdata/utils_test.tar.xz
vendored
Normal file
BIN
utils/testdata/utils_test.tar.xz
vendored
Normal file
Binary file not shown.
@ -60,9 +60,9 @@ func TestTar(t *testing.T) {
|
|||||||
var err error
|
var err error
|
||||||
var data map[string][]byte
|
var data map[string][]byte
|
||||||
_, filepath, _, _ := runtime.Caller(0)
|
_, filepath, _, _ := runtime.Caller(0)
|
||||||
|
testDataDir := "/testdata"
|
||||||
for _, filename := range []string{"/testdata/utils_test.tar.gz", "/testdata/utils_test.tar"} {
|
for _, filename := range []string{"utils_test.tar.gz", "utils_test.tar.bz2", "utils_test.tar.xz", "utils_test.tar"} {
|
||||||
testArchivePath := path.Join(path.Dir(filepath)) + filename
|
testArchivePath := path.Join(path.Dir(filepath), testDataDir, filename)
|
||||||
|
|
||||||
// Extract non compressed data
|
// Extract non compressed data
|
||||||
data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0)
|
data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0)
|
||||||
|
Loading…
Reference in New Issue
Block a user