From 8b649af666f77d40512bfaae1ac37bd0e39ea60b Mon Sep 17 00:00:00 2001 From: liangchenye Date: Tue, 15 Dec 2015 16:08:27 +0800 Subject: [PATCH] detect bzip2/xz; add test data Signed-off-by: liangchenye --- utils/tar.go | 101 +++++++++++++++++++++++++----- utils/testdata/utils_test.tar.bz2 | Bin 0 -> 644 bytes utils/testdata/utils_test.tar.xz | Bin 0 -> 608 bytes utils/utils_test.go | 6 +- 4 files changed, 89 insertions(+), 18 deletions(-) create mode 100644 utils/testdata/utils_test.tar.bz2 create mode 100644 utils/testdata/utils_test.tar.xz diff --git a/utils/tar.go b/utils/tar.go index 26a47a50..f2cff669 100644 --- a/utils/tar.go +++ b/utils/tar.go @@ -18,10 +18,12 @@ import ( "archive/tar" "bufio" "bytes" + "compress/bzip2" "compress/gzip" "errors" "io" "io/ioutil" + "os/exec" "strings" ) @@ -32,19 +34,75 @@ var ( // ErrExtractedFileTooBig occurs when a file to extract is too big. ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big") - gzipHeader = []byte{0x1f, 0x8b} + readLen = 6 // max bytes to sniff + + gzipHeader = []byte{0x1f, 0x8b} + bzip2Header = []byte{0x42, 0x5a, 0x68} + xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00} ) +// XzReader is an io.ReadCloser which decompresses xz compressed data. +type XzReader struct { + io.ReadCloser + cmd *exec.Cmd + closech chan error +} + +// NewXzReader shells out to a command line xz executable (if +// available) to decompress the given io.Reader using the xz +// compression format and returns an *XzReader. +// It is the caller's responsibility to call Close on the XzReader when done. +func NewXzReader(r io.Reader) (*XzReader, error) { + rpipe, wpipe := io.Pipe() + ex, err := exec.LookPath("xz") + if err != nil { + return nil, err + } + cmd := exec.Command(ex, "--decompress", "--stdout") + + closech := make(chan error) + + cmd.Stdin = r + cmd.Stdout = wpipe + + go func() { + err := cmd.Run() + wpipe.CloseWithError(err) + closech <- err + }() + + return &XzReader{rpipe, cmd, closech}, nil +} + +func (r *XzReader) Close() error { + r.ReadCloser.Close() + r.cmd.Process.Kill() + return <-r.closech +} + +// TarReadCloser embeds a *tar.Reader and the related io.Closer +// It is the caller's responsibility to call Close on TarReadCloser when +// done. +type TarReadCloser struct { + *tar.Reader + io.Closer +} + +func (r *TarReadCloser) Close() error { + return r.Closer.Close() +} + // SelectivelyExtractArchive extracts the specified files and folders // from targz data read from the given reader and store them in a map indexed by file paths func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) { data := make(map[string][]byte) - // Create a tar or tar/tar-gzip reader + // Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader tr, err := getTarReader(r) if err != nil { return data, ErrCouldNotExtract } + defer tr.Close() // For each element in the archive for { @@ -89,22 +147,35 @@ func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, m return data, nil } -// getTarReader returns a tar.Reader associated with the specified io.Reader, -// optionally backed by a gzip.Reader if gzip compression is detected. +// getTarReader returns a TarReaderCloser associated with the specified io.Reader. // -// Gzip detection is done by using the magic numbers defined in the RFC1952 : -// the first two bytes should be 0x1f and 0x8b.. -func getTarReader(r io.Reader) (*tar.Reader, error) { +// Gzip/Bzip2/XZ detection is done by using the magic numbers: +// Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952. +// Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC. +// XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC. +func getTarReader(r io.Reader) (*TarReadCloser, error) { br := bufio.NewReader(r) - header, err := br.Peek(2) - - if err == nil && bytes.Equal(header, gzipHeader) { - gr, err := gzip.NewReader(br) - if err != nil { - return nil, err + header, err := br.Peek(readLen) + if err == nil { + switch { + case bytes.HasPrefix(header, gzipHeader): + gr, err := gzip.NewReader(br) + if err != nil { + return nil, err + } + return &TarReadCloser{tar.NewReader(gr), gr}, nil + case bytes.HasPrefix(header, bzip2Header): + bzip2r := ioutil.NopCloser(bzip2.NewReader(br)) + return &TarReadCloser{tar.NewReader(bzip2r), bzip2r}, nil + case bytes.HasPrefix(header, xzHeader): + xzr, err := NewXzReader(br) + if err != nil { + return nil, err + } + return &TarReadCloser{tar.NewReader(xzr), xzr}, nil } - return tar.NewReader(gr), nil } - return tar.NewReader(br), nil + dr := ioutil.NopCloser(br) + return &TarReadCloser{tar.NewReader(dr), dr}, nil } diff --git a/utils/testdata/utils_test.tar.bz2 b/utils/testdata/utils_test.tar.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a359247228a1dbff32f83675fcf96336d686458d GIT binary patch literal 644 zcmV-~0(<>JT4*^jL0KkKStl@5v;YDZfAs(O&EP@>|A2P}|6%{9-oQWr03ZgS1ONd@ zKmvu;N=7s!l{QDHiK7q=0MG!?0009fLuv^U1cfxn$+a?iM$%$!CN!8sO*CROJs{Dc zz%Yy^n1%rW$N_|DlO_>>hJjKEnwP{)?*!6{HFL{f{=hms*AF`o5~eDVy+L*1Zk#O)KoNg9T{Q!g zokt#rWsU4sv$6L1WAXdDb2-?c`@g%j$H!0CrQyl3#b%PIs_03YNN97Vh*c2UL%9lu ztp5G~%U1^mv^49XXl%pLcxI^w^Fq>@R1rB4wj*jq$&|~rg4l2(D}IO?S6+af(1NHe zg0eKxWTsGT6v9cTx-`&}i3Cy^ub`O*ELgfWXuDe? zl2XKEs)EEd5J_4vK*d+AP-B$TK`ZjL2y9?x!(Epy%#Nfrp_hI#ZJeBSV95%G8bx%i eXOC?dUn-OqkX1e4iishAi@744C`c2SDp~;aHzgbZ literal 0 HcmV?d00001 diff --git a/utils/testdata/utils_test.tar.xz b/utils/testdata/utils_test.tar.xz new file mode 100644 index 0000000000000000000000000000000000000000..a530986b9f3ab77cac0e2bb84434fee2235d543b GIT binary patch literal 608 zcmV-m0-yc;H+ooF000E$*0e?f03iVu0001VFXf})JO2V6T>uvgyc~T2m4X!T9%rf# z@pd6uzQ%FkEmUW8SYFUdkMvASY#Q*>+E^1VvHqc9}-KHl*>DfEYThGbS-?#+6N!yly(PAfbnW`P?nUyaFYk3uu4 zh+nvx!vUK7KTs)9CvnWtNI8PfE2rhaD;z*oRq%ubIl0VHV*LcPMc_1K0P;$JF_Lp7xa@g(> zTz;16;`_Aem4*3tXui0K*bQ&^(FAa{z@w%#D%}%vBfbv%Wi+0BeK2YJi;!qlJXo(>lUhbBO*Qd z7tTo}0QzLW>q%yq->CR1tc7(b_Ztt~Ir?5=v%X19EKsB4PW-0lEZ$cmM$1%|AM^#Ao{g000001X)@Wv?2=t literal 0 HcmV?d00001 diff --git a/utils/utils_test.go b/utils/utils_test.go index 6b46b055..aafcc2dc 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -60,9 +60,9 @@ func TestTar(t *testing.T) { var err error var data map[string][]byte _, filepath, _, _ := runtime.Caller(0) - - for _, filename := range []string{"/testdata/utils_test.tar.gz", "/testdata/utils_test.tar"} { - testArchivePath := path.Join(path.Dir(filepath)) + filename + testDataDir := "/testdata" + for _, filename := range []string{"utils_test.tar.gz", "utils_test.tar.bz2", "utils_test.tar.xz", "utils_test.tar"} { + testArchivePath := path.Join(path.Dir(filepath), testDataDir, filename) // Extract non compressed data data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0)