pkg: utils/tar.go -> pkg/tarutil

2017-01-13 00:56:08 -05:00 · 2017-01-13 00:56:08 -05:00 · 03bac0f1b6
commit 03bac0f1b6
parent 6c90635848
9 changed files with 160 additions and 111 deletions
--- a/api/v1/routes.go
+++ b/api/v1/routes.go
@ -27,7 +27,7 @@ import (

 	"github.com/coreos/clair/api/context"
 	"github.com/coreos/clair/database"
-	"github.com/coreos/clair/utils"
+	"github.com/coreos/clair/pkg/tarutil"
 	cerrors "github.com/coreos/clair/utils/errors"
 	"github.com/coreos/clair/worker"
 )
@ -111,8 +111,8 @@ func postLayer(w http.ResponseWriter, r *http.Request, p httprouter.Params, ctx

 	err = worker.Process(ctx.Store, request.Layer.Format, request.Layer.Name, request.Layer.ParentName, request.Layer.Path, request.Layer.Headers)
 	if err != nil {
-		if err == utils.ErrCouldNotExtract ||
-			err == utils.ErrExtractedFileTooBig ||
+		if err == tarutil.ErrCouldNotExtract ||
+			err == tarutil.ErrExtractedFileTooBig ||
 			err == worker.ErrUnsupported {
 			writeResponse(w, r, statusUnprocessableEntity, LayerEnvelope{Error: &Error{err.Error()}})
 			return postLayerRoute, statusUnprocessableEntity
--- a/pkg/tarutil/tarutil.go
+++ b/pkg/tarutil/tarutil.go
@ -1,4 +1,4 @@
-// Copyright 2015 clair authors
+// Copyright 2017 clair authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-package utils
+// Package tarutil implements some tar utility functions.
+package tarutil

 import (
 	"archive/tar"
@ -29,28 +30,87 @@ import (

 var (
 	// ErrCouldNotExtract occurs when an extraction fails.
-	ErrCouldNotExtract = errors.New("utils: could not extract the archive")
+	ErrCouldNotExtract = errors.New("tarutil: could not extract the archive")

 	// ErrExtractedFileTooBig occurs when a file to extract is too big.
-	ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big")
+	ErrExtractedFileTooBig = errors.New("tarutil: could not extract one or more files from the archive: file too big")

-	readLen = 6 // max bytes to sniff
+	// MaxExtractableFileSize enforces the maximum size of a single file within a
+	// tarball that will be extracted. This protects against malicious files that
+	// may used in an attempt to perform a Denial of Service attack.
+	MaxExtractableFileSize int64 = 200 * 1024 * 1024 // 200 MiB

+	readLen     = 6 // max bytes to sniff
 	gzipHeader  = []byte{0x1f, 0x8b}
 	bzip2Header = []byte{0x42, 0x5a, 0x68}
 	xzHeader    = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}
 )

-// XzReader is an io.ReadCloser which decompresses xz compressed data.
+// FilesMap is a map of files' paths to their contents.
+type FilesMap map[string][]byte
+
+// ExtractFiles decompresses and extracts only the specified files from an
+// io.Reader representing an archive.
+func ExtractFiles(r io.Reader, filenames []string) (FilesMap, error) {
+	data := make(map[string][]byte)
+
+	// Decompress the archive.
+	tr, err := NewTarReadCloser(r)
+	if err != nil {
+		return data, ErrCouldNotExtract
+	}
+	defer tr.Close()
+
+	// For each element in the archive
+	for {
+		hdr, err := tr.Next()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return data, ErrCouldNotExtract
+		}
+
+		// Get element filename
+		filename := hdr.Name
+		filename = strings.TrimPrefix(filename, "./")
+
+		// Determine if we should extract the element
+		toBeExtracted := false
+		for _, s := range filenames {
+			if strings.HasPrefix(filename, s) {
+				toBeExtracted = true
+				break
+			}
+		}
+
+		if toBeExtracted {
+			// File size limit
+			if hdr.Size > MaxExtractableFileSize {
+				return data, ErrExtractedFileTooBig
+			}
+
+			// Extract the element
+			if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
+				d, _ := ioutil.ReadAll(tr)
+				data[filename] = d
+			}
+		}
+	}
+
+	return data, nil
+}
+
+// XzReader implements io.ReadCloser for data compressed via `xz`.
 type XzReader struct {
 	io.ReadCloser
 	cmd     *exec.Cmd
 	closech chan error
 }

-// NewXzReader shells out to a command line xz executable (if
-// available) to decompress the given io.Reader using the xz
-// compression format and returns an *XzReader.
+// NewXzReader returns an io.ReadCloser by executing a command line `xz`
+// executable to decompress the provided io.Reader.
+//
 // It is the caller's responsibility to call Close on the XzReader when done.
 func NewXzReader(r io.Reader) (*XzReader, error) {
 	rpipe, wpipe := io.Pipe()
@ -74,6 +134,7 @@ func NewXzReader(r io.Reader) (*XzReader, error) {
 	return &XzReader{rpipe, cmd, closech}, nil
 }

+// Close cleans up the resources used by an XzReader.
 func (r *XzReader) Close() error {
 	r.ReadCloser.Close()
 	r.cmd.Process.Kill()
@ -88,72 +149,20 @@ type TarReadCloser struct {
 	io.Closer
 }

+// Close cleans up the resources used by a TarReadCloser.
 func (r *TarReadCloser) Close() error {
 	return r.Closer.Close()
 }

-// SelectivelyExtractArchive extracts the specified files and folders
-// from targz data read from the given reader and store them in a map indexed by file paths
-func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) {
-	data := make(map[string][]byte)
-
-	// Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader
-	tr, err := getTarReader(r)
-	if err != nil {
-		return data, ErrCouldNotExtract
-	}
-	defer tr.Close()
-
-	// For each element in the archive
-	for {
-		hdr, err := tr.Next()
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			return data, ErrCouldNotExtract
-		}
-
-		// Get element filename
-		filename := hdr.Name
-		filename = strings.TrimPrefix(filename, "./")
-		if prefix != "" {
-			filename = strings.TrimPrefix(filename, prefix)
-		}
-
-		// Determine if we should extract the element
-		toBeExtracted := false
-		for _, s := range toExtract {
-			if strings.HasPrefix(filename, s) {
-				toBeExtracted = true
-				break
-			}
-		}
-
-		if toBeExtracted {
-			// File size limit
-			if maxFileSize > 0 && hdr.Size > maxFileSize {
-				return data, ErrExtractedFileTooBig
-			}
-
-			// Extract the element
-			if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
-				d, _ := ioutil.ReadAll(tr)
-				data[filename] = d
-			}
-		}
-	}
-
-	return data, nil
-}
-
-// getTarReader returns a TarReaderCloser associated with the specified io.Reader.
+// NewTarReadCloser attempts to detect the compression algorithm for an
+// io.Reader and returns a TarReadCloser wrapping the Reader to transparently
+// decompress the contents.
 //
 // Gzip/Bzip2/XZ detection is done by using the magic numbers:
 // Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952.
 // Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC.
 // XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC.
-func getTarReader(r io.Reader) (*TarReadCloser, error) {
+func NewTarReadCloser(r io.Reader) (*TarReadCloser, error) {
 	br := bufio.NewReader(r)
 	header, err := br.Peek(readLen)
 	if err == nil {
--- a/pkg/tarutil/tarutil_test.go
+++ b/pkg/tarutil/tarutil_test.go
@ -0,0 +1,80 @@
+// Copyright 2017 clair authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package tarutil
+
+import (
+	"bytes"
+	"os"
+	"path/filepath"
+	"runtime"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+var testTarballs = []string{
+	"utils_test.tar",
+	"utils_test.tar.gz",
+	"utils_test.tar.bz2",
+	"utils_test.tar.xz",
+}
+
+func testfilepath(filename string) string {
+	_, path, _, _ := runtime.Caller(0)
+	testDataDir := "/testdata"
+	return filepath.Join(filepath.Dir(path), testDataDir, filename)
+}
+
+func TestExtract(t *testing.T) {
+	for _, filename := range testTarballs {
+		f, err := os.Open(testfilepath(filename))
+		assert.Nil(t, err)
+		defer f.Close()
+
+		data, err := ExtractFiles(f, []string{"test/"})
+		assert.Nil(t, err)
+
+		if c, n := data["test/test.txt"]; !n {
+			assert.Fail(t, "test/test.txt should have been extracted")
+		} else {
+			assert.NotEqual(t, 0, len(c) > 0, "test/test.txt file is empty")
+		}
+		if _, n := data["test.txt"]; n {
+			assert.Fail(t, "test.txt should not be extracted")
+		}
+	}
+}
+
+func TestExtractUncompressedData(t *testing.T) {
+	for _, filename := range testTarballs {
+		f, err := os.Open(testfilepath(filename))
+		assert.Nil(t, err)
+		defer f.Close()
+
+		_, err = ExtractFiles(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), []string{})
+		assert.Error(t, err, "Extracting uncompressed data should return an error")
+	}
+}
+
+func TestMaxExtractableFileSize(t *testing.T) {
+	for _, filename := range testTarballs {
+		f, err := os.Open(testfilepath(filename))
+		assert.Nil(t, err)
+		defer f.Close()
+		MaxExtractableFileSize = 50
+		_, err = ExtractFiles(f, []string{"test"})
+		assert.Equal(t, ErrExtractedFileTooBig, err)
+	}
+}
--- a/pkg/tarutil/testdata/utils_test.tar
+++ b/pkg/tarutil/testdata/utils_test.tar
--- a/pkg/tarutil/testdata/utils_test.tar.bz2
+++ b/pkg/tarutil/testdata/utils_test.tar.bz2
--- a/pkg/tarutil/testdata/utils_test.tar.gz
+++ b/pkg/tarutil/testdata/utils_test.tar.gz
--- a/pkg/tarutil/testdata/utils_test.tar.xz
+++ b/pkg/tarutil/testdata/utils_test.tar.xz
--- a/utils/http/http.go
+++ b/utils/http/http.go
@ -21,7 +21,7 @@ import (
 	"net/http"

 	"github.com/coreos/clair/database"
-	"github.com/coreos/clair/utils"
+	"github.com/coreos/clair/pkg/tarutil"
 	cerrors "github.com/coreos/clair/utils/errors"
 	"github.com/coreos/clair/worker"
 )
@ -56,7 +56,7 @@ func WriteHTTPError(w http.ResponseWriter, httpStatus int, err error) {
 				httpStatus = http.StatusNotFound
 			case database.ErrBackendException:
 				httpStatus = http.StatusServiceUnavailable
-			case worker.ErrParentUnknown, worker.ErrUnsupported, utils.ErrCouldNotExtract, utils.ErrExtractedFileTooBig:
+			case worker.ErrParentUnknown, worker.ErrUnsupported, tarutil.ErrCouldNotExtract, tarutil.ErrExtractedFileTooBig:
 				httpStatus = http.StatusBadRequest
 			}
 		}
--- a/utils/utils_test.go
+++ b/utils/utils_test.go
@ -15,10 +15,6 @@
 package utils

 import (
-	"bytes"
-	"os"
-	"path/filepath"
-	"runtime"
 	"testing"

 	"github.com/pborman/uuid"
@ -61,42 +57,6 @@ func TestString(t *testing.T) {
 	assert.False(t, Contains("c", []string{"a", "b"}))
 }

-// TestTar tests the tar.go file
-func TestTar(t *testing.T) {
-	var err error
-	var data map[string][]byte
-	_, path, _, _ := runtime.Caller(0)
-	testDataDir := "/testdata"
-	for _, filename := range []string{"utils_test.tar.gz", "utils_test.tar.bz2", "utils_test.tar.xz", "utils_test.tar"} {
-		testArchivePath := filepath.Join(filepath.Dir(path), testDataDir, filename)
-
-		// Extract non compressed data
-		data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0)
-		assert.Error(t, err, "Extracting non compressed data should return an error")
-
-		// Extract an archive
-		f, _ := os.Open(testArchivePath)
-		defer f.Close()
-		data, err = SelectivelyExtractArchive(f, "", []string{"test/"}, 0)
-		assert.Nil(t, err)
-
-		if c, n := data["test/test.txt"]; !n {
-			assert.Fail(t, "test/test.txt should have been extracted")
-		} else {
-			assert.NotEqual(t, 0, len(c) > 0, "test/test.txt file is empty")
-		}
-		if _, n := data["test.txt"]; n {
-			assert.Fail(t, "test.txt should not be extracted")
-		}
-
-		// File size limit
-		f, _ = os.Open(testArchivePath)
-		defer f.Close()
-		data, err = SelectivelyExtractArchive(f, "", []string{"test"}, 50)
-		assert.Equal(t, ErrExtractedFileTooBig, err)
-	}
-}
-
 func TestCleanURL(t *testing.T) {
 	assert.Equal(t, "Test http://test.cn/test Test", CleanURL("Test http://test.cn/test?foo=bar&bar=foo Test"))
 }