clair/utils/tar.go

182 lines
4.8 KiB
Go
Raw Normal View History

2015-11-13 19:11:28 +00:00
// Copyright 2015 clair authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"archive/tar"
"bufio"
"bytes"
"compress/bzip2"
2015-11-13 19:11:28 +00:00
"compress/gzip"
"errors"
"io"
"io/ioutil"
"os/exec"
2015-11-13 19:11:28 +00:00
"strings"
)
var (
// ErrCouldNotExtract occurs when an extraction fails.
ErrCouldNotExtract = errors.New("utils: could not extract the archive")
// ErrExtractedFileTooBig occurs when a file to extract is too big.
ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big")
readLen = 6 // max bytes to sniff
gzipHeader = []byte{0x1f, 0x8b}
bzip2Header = []byte{0x42, 0x5a, 0x68}
xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}
2015-11-13 19:11:28 +00:00
)
// XzReader is an io.ReadCloser which decompresses xz compressed data.
type XzReader struct {
io.ReadCloser
cmd *exec.Cmd
closech chan error
}
// NewXzReader shells out to a command line xz executable (if
// available) to decompress the given io.Reader using the xz
// compression format and returns an *XzReader.
// It is the caller's responsibility to call Close on the XzReader when done.
func NewXzReader(r io.Reader) (*XzReader, error) {
rpipe, wpipe := io.Pipe()
ex, err := exec.LookPath("xz")
if err != nil {
return nil, err
}
cmd := exec.Command(ex, "--decompress", "--stdout")
closech := make(chan error)
cmd.Stdin = r
cmd.Stdout = wpipe
go func() {
err := cmd.Run()
wpipe.CloseWithError(err)
closech <- err
}()
return &XzReader{rpipe, cmd, closech}, nil
}
func (r *XzReader) Close() error {
r.ReadCloser.Close()
r.cmd.Process.Kill()
return <-r.closech
}
// TarReadCloser embeds a *tar.Reader and the related io.Closer
// It is the caller's responsibility to call Close on TarReadCloser when
// done.
type TarReadCloser struct {
*tar.Reader
io.Closer
}
func (r *TarReadCloser) Close() error {
return r.Closer.Close()
}
2015-11-13 19:11:28 +00:00
// SelectivelyExtractArchive extracts the specified files and folders
// from targz data read from the given reader and store them in a map indexed by file paths
func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) {
2015-11-13 19:11:28 +00:00
data := make(map[string][]byte)
// Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader
2015-11-13 19:11:28 +00:00
tr, err := getTarReader(r)
if err != nil {
return data, ErrCouldNotExtract
}
defer tr.Close()
2015-11-13 19:11:28 +00:00
// For each element in the archive
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return data, ErrCouldNotExtract
}
// Get element filename
filename := hdr.Name
filename = strings.TrimPrefix(filename, "./")
if prefix != "" {
filename = strings.TrimPrefix(filename, prefix)
}
2015-11-13 19:11:28 +00:00
// Determine if we should extract the element
toBeExtracted := false
for _, s := range toExtract {
if strings.HasPrefix(filename, s) {
toBeExtracted = true
break
}
}
if toBeExtracted {
// File size limit
if maxFileSize > 0 && hdr.Size > maxFileSize {
return data, ErrExtractedFileTooBig
}
// Extract the element
if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
d, _ := ioutil.ReadAll(tr)
data[filename] = d
}
}
}
return data, nil
}
// getTarReader returns a TarReaderCloser associated with the specified io.Reader.
2015-11-13 19:11:28 +00:00
//
// Gzip/Bzip2/XZ detection is done by using the magic numbers:
// Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952.
// Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC.
// XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC.
func getTarReader(r io.Reader) (*TarReadCloser, error) {
2015-11-13 19:11:28 +00:00
br := bufio.NewReader(r)
header, err := br.Peek(readLen)
if err == nil {
switch {
case bytes.HasPrefix(header, gzipHeader):
gr, err := gzip.NewReader(br)
if err != nil {
return nil, err
}
return &TarReadCloser{tar.NewReader(gr), gr}, nil
case bytes.HasPrefix(header, bzip2Header):
bzip2r := ioutil.NopCloser(bzip2.NewReader(br))
return &TarReadCloser{tar.NewReader(bzip2r), bzip2r}, nil
case bytes.HasPrefix(header, xzHeader):
xzr, err := NewXzReader(br)
if err != nil {
return nil, err
}
return &TarReadCloser{tar.NewReader(xzr), xzr}, nil
2015-11-13 19:11:28 +00:00
}
}
dr := ioutil.NopCloser(br)
return &TarReadCloser{tar.NewReader(dr), dr}, nil
2015-11-13 19:11:28 +00:00
}