From 03bac0f1b6f504a416937e309a62fdfc308dc397 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Fri, 13 Jan 2017 00:56:08 -0500 Subject: [PATCH] pkg: utils/tar.go -> pkg/tarutil --- api/v1/routes.go | 6 +- utils/tar.go => pkg/tarutil/tarutil.go | 141 ++++++++++-------- pkg/tarutil/tarutil_test.go | 80 ++++++++++ .../tarutil}/testdata/utils_test.tar | Bin .../tarutil}/testdata/utils_test.tar.bz2 | Bin .../tarutil}/testdata/utils_test.tar.gz | Bin .../tarutil}/testdata/utils_test.tar.xz | Bin utils/http/http.go | 4 +- utils/utils_test.go | 40 ----- 9 files changed, 160 insertions(+), 111 deletions(-) rename utils/tar.go => pkg/tarutil/tarutil.go (70%) create mode 100644 pkg/tarutil/tarutil_test.go rename {utils => pkg/tarutil}/testdata/utils_test.tar (100%) rename {utils => pkg/tarutil}/testdata/utils_test.tar.bz2 (100%) rename {utils => pkg/tarutil}/testdata/utils_test.tar.gz (100%) rename {utils => pkg/tarutil}/testdata/utils_test.tar.xz (100%) diff --git a/api/v1/routes.go b/api/v1/routes.go index ed82ebe2..344e2e28 100644 --- a/api/v1/routes.go +++ b/api/v1/routes.go @@ -27,7 +27,7 @@ import ( "github.com/coreos/clair/api/context" "github.com/coreos/clair/database" - "github.com/coreos/clair/utils" + "github.com/coreos/clair/pkg/tarutil" cerrors "github.com/coreos/clair/utils/errors" "github.com/coreos/clair/worker" ) @@ -111,8 +111,8 @@ func postLayer(w http.ResponseWriter, r *http.Request, p httprouter.Params, ctx err = worker.Process(ctx.Store, request.Layer.Format, request.Layer.Name, request.Layer.ParentName, request.Layer.Path, request.Layer.Headers) if err != nil { - if err == utils.ErrCouldNotExtract || - err == utils.ErrExtractedFileTooBig || + if err == tarutil.ErrCouldNotExtract || + err == tarutil.ErrExtractedFileTooBig || err == worker.ErrUnsupported { writeResponse(w, r, statusUnprocessableEntity, LayerEnvelope{Error: &Error{err.Error()}}) return postLayerRoute, statusUnprocessableEntity diff --git a/utils/tar.go b/pkg/tarutil/tarutil.go similarity index 70% rename from utils/tar.go rename to pkg/tarutil/tarutil.go index f2cff669..357f96da 100644 --- a/utils/tar.go +++ b/pkg/tarutil/tarutil.go @@ -1,4 +1,4 @@ -// Copyright 2015 clair authors +// Copyright 2017 clair authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -package utils +// Package tarutil implements some tar utility functions. +package tarutil import ( "archive/tar" @@ -29,28 +30,87 @@ import ( var ( // ErrCouldNotExtract occurs when an extraction fails. - ErrCouldNotExtract = errors.New("utils: could not extract the archive") + ErrCouldNotExtract = errors.New("tarutil: could not extract the archive") // ErrExtractedFileTooBig occurs when a file to extract is too big. - ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big") + ErrExtractedFileTooBig = errors.New("tarutil: could not extract one or more files from the archive: file too big") - readLen = 6 // max bytes to sniff + // MaxExtractableFileSize enforces the maximum size of a single file within a + // tarball that will be extracted. This protects against malicious files that + // may used in an attempt to perform a Denial of Service attack. + MaxExtractableFileSize int64 = 200 * 1024 * 1024 // 200 MiB + readLen = 6 // max bytes to sniff gzipHeader = []byte{0x1f, 0x8b} bzip2Header = []byte{0x42, 0x5a, 0x68} xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00} ) -// XzReader is an io.ReadCloser which decompresses xz compressed data. +// FilesMap is a map of files' paths to their contents. +type FilesMap map[string][]byte + +// ExtractFiles decompresses and extracts only the specified files from an +// io.Reader representing an archive. +func ExtractFiles(r io.Reader, filenames []string) (FilesMap, error) { + data := make(map[string][]byte) + + // Decompress the archive. + tr, err := NewTarReadCloser(r) + if err != nil { + return data, ErrCouldNotExtract + } + defer tr.Close() + + // For each element in the archive + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return data, ErrCouldNotExtract + } + + // Get element filename + filename := hdr.Name + filename = strings.TrimPrefix(filename, "./") + + // Determine if we should extract the element + toBeExtracted := false + for _, s := range filenames { + if strings.HasPrefix(filename, s) { + toBeExtracted = true + break + } + } + + if toBeExtracted { + // File size limit + if hdr.Size > MaxExtractableFileSize { + return data, ErrExtractedFileTooBig + } + + // Extract the element + if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg { + d, _ := ioutil.ReadAll(tr) + data[filename] = d + } + } + } + + return data, nil +} + +// XzReader implements io.ReadCloser for data compressed via `xz`. type XzReader struct { io.ReadCloser cmd *exec.Cmd closech chan error } -// NewXzReader shells out to a command line xz executable (if -// available) to decompress the given io.Reader using the xz -// compression format and returns an *XzReader. +// NewXzReader returns an io.ReadCloser by executing a command line `xz` +// executable to decompress the provided io.Reader. +// // It is the caller's responsibility to call Close on the XzReader when done. func NewXzReader(r io.Reader) (*XzReader, error) { rpipe, wpipe := io.Pipe() @@ -74,6 +134,7 @@ func NewXzReader(r io.Reader) (*XzReader, error) { return &XzReader{rpipe, cmd, closech}, nil } +// Close cleans up the resources used by an XzReader. func (r *XzReader) Close() error { r.ReadCloser.Close() r.cmd.Process.Kill() @@ -88,72 +149,20 @@ type TarReadCloser struct { io.Closer } +// Close cleans up the resources used by a TarReadCloser. func (r *TarReadCloser) Close() error { return r.Closer.Close() } -// SelectivelyExtractArchive extracts the specified files and folders -// from targz data read from the given reader and store them in a map indexed by file paths -func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) { - data := make(map[string][]byte) - - // Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader - tr, err := getTarReader(r) - if err != nil { - return data, ErrCouldNotExtract - } - defer tr.Close() - - // For each element in the archive - for { - hdr, err := tr.Next() - if err == io.EOF { - break - } - if err != nil { - return data, ErrCouldNotExtract - } - - // Get element filename - filename := hdr.Name - filename = strings.TrimPrefix(filename, "./") - if prefix != "" { - filename = strings.TrimPrefix(filename, prefix) - } - - // Determine if we should extract the element - toBeExtracted := false - for _, s := range toExtract { - if strings.HasPrefix(filename, s) { - toBeExtracted = true - break - } - } - - if toBeExtracted { - // File size limit - if maxFileSize > 0 && hdr.Size > maxFileSize { - return data, ErrExtractedFileTooBig - } - - // Extract the element - if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg { - d, _ := ioutil.ReadAll(tr) - data[filename] = d - } - } - } - - return data, nil -} - -// getTarReader returns a TarReaderCloser associated with the specified io.Reader. +// NewTarReadCloser attempts to detect the compression algorithm for an +// io.Reader and returns a TarReadCloser wrapping the Reader to transparently +// decompress the contents. // // Gzip/Bzip2/XZ detection is done by using the magic numbers: // Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952. // Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC. // XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC. -func getTarReader(r io.Reader) (*TarReadCloser, error) { +func NewTarReadCloser(r io.Reader) (*TarReadCloser, error) { br := bufio.NewReader(r) header, err := br.Peek(readLen) if err == nil { diff --git a/pkg/tarutil/tarutil_test.go b/pkg/tarutil/tarutil_test.go new file mode 100644 index 00000000..9db377ab --- /dev/null +++ b/pkg/tarutil/tarutil_test.go @@ -0,0 +1,80 @@ +// Copyright 2017 clair authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package tarutil + +import ( + "bytes" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/stretchr/testify/assert" +) + +var testTarballs = []string{ + "utils_test.tar", + "utils_test.tar.gz", + "utils_test.tar.bz2", + "utils_test.tar.xz", +} + +func testfilepath(filename string) string { + _, path, _, _ := runtime.Caller(0) + testDataDir := "/testdata" + return filepath.Join(filepath.Dir(path), testDataDir, filename) +} + +func TestExtract(t *testing.T) { + for _, filename := range testTarballs { + f, err := os.Open(testfilepath(filename)) + assert.Nil(t, err) + defer f.Close() + + data, err := ExtractFiles(f, []string{"test/"}) + assert.Nil(t, err) + + if c, n := data["test/test.txt"]; !n { + assert.Fail(t, "test/test.txt should have been extracted") + } else { + assert.NotEqual(t, 0, len(c) > 0, "test/test.txt file is empty") + } + if _, n := data["test.txt"]; n { + assert.Fail(t, "test.txt should not be extracted") + } + } +} + +func TestExtractUncompressedData(t *testing.T) { + for _, filename := range testTarballs { + f, err := os.Open(testfilepath(filename)) + assert.Nil(t, err) + defer f.Close() + + _, err = ExtractFiles(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), []string{}) + assert.Error(t, err, "Extracting uncompressed data should return an error") + } +} + +func TestMaxExtractableFileSize(t *testing.T) { + for _, filename := range testTarballs { + f, err := os.Open(testfilepath(filename)) + assert.Nil(t, err) + defer f.Close() + MaxExtractableFileSize = 50 + _, err = ExtractFiles(f, []string{"test"}) + assert.Equal(t, ErrExtractedFileTooBig, err) + } +} diff --git a/utils/testdata/utils_test.tar b/pkg/tarutil/testdata/utils_test.tar similarity index 100% rename from utils/testdata/utils_test.tar rename to pkg/tarutil/testdata/utils_test.tar diff --git a/utils/testdata/utils_test.tar.bz2 b/pkg/tarutil/testdata/utils_test.tar.bz2 similarity index 100% rename from utils/testdata/utils_test.tar.bz2 rename to pkg/tarutil/testdata/utils_test.tar.bz2 diff --git a/utils/testdata/utils_test.tar.gz b/pkg/tarutil/testdata/utils_test.tar.gz similarity index 100% rename from utils/testdata/utils_test.tar.gz rename to pkg/tarutil/testdata/utils_test.tar.gz diff --git a/utils/testdata/utils_test.tar.xz b/pkg/tarutil/testdata/utils_test.tar.xz similarity index 100% rename from utils/testdata/utils_test.tar.xz rename to pkg/tarutil/testdata/utils_test.tar.xz diff --git a/utils/http/http.go b/utils/http/http.go index 1aac81f3..b02430a4 100644 --- a/utils/http/http.go +++ b/utils/http/http.go @@ -21,7 +21,7 @@ import ( "net/http" "github.com/coreos/clair/database" - "github.com/coreos/clair/utils" + "github.com/coreos/clair/pkg/tarutil" cerrors "github.com/coreos/clair/utils/errors" "github.com/coreos/clair/worker" ) @@ -56,7 +56,7 @@ func WriteHTTPError(w http.ResponseWriter, httpStatus int, err error) { httpStatus = http.StatusNotFound case database.ErrBackendException: httpStatus = http.StatusServiceUnavailable - case worker.ErrParentUnknown, worker.ErrUnsupported, utils.ErrCouldNotExtract, utils.ErrExtractedFileTooBig: + case worker.ErrParentUnknown, worker.ErrUnsupported, tarutil.ErrCouldNotExtract, tarutil.ErrExtractedFileTooBig: httpStatus = http.StatusBadRequest } } diff --git a/utils/utils_test.go b/utils/utils_test.go index 57bfc9e0..83cc6cce 100644 --- a/utils/utils_test.go +++ b/utils/utils_test.go @@ -15,10 +15,6 @@ package utils import ( - "bytes" - "os" - "path/filepath" - "runtime" "testing" "github.com/pborman/uuid" @@ -61,42 +57,6 @@ func TestString(t *testing.T) { assert.False(t, Contains("c", []string{"a", "b"})) } -// TestTar tests the tar.go file -func TestTar(t *testing.T) { - var err error - var data map[string][]byte - _, path, _, _ := runtime.Caller(0) - testDataDir := "/testdata" - for _, filename := range []string{"utils_test.tar.gz", "utils_test.tar.bz2", "utils_test.tar.xz", "utils_test.tar"} { - testArchivePath := filepath.Join(filepath.Dir(path), testDataDir, filename) - - // Extract non compressed data - data, err = SelectivelyExtractArchive(bytes.NewReader([]byte("that string does not represent a tar or tar-gzip file")), "", []string{}, 0) - assert.Error(t, err, "Extracting non compressed data should return an error") - - // Extract an archive - f, _ := os.Open(testArchivePath) - defer f.Close() - data, err = SelectivelyExtractArchive(f, "", []string{"test/"}, 0) - assert.Nil(t, err) - - if c, n := data["test/test.txt"]; !n { - assert.Fail(t, "test/test.txt should have been extracted") - } else { - assert.NotEqual(t, 0, len(c) > 0, "test/test.txt file is empty") - } - if _, n := data["test.txt"]; n { - assert.Fail(t, "test.txt should not be extracted") - } - - // File size limit - f, _ = os.Open(testArchivePath) - defer f.Close() - data, err = SelectivelyExtractArchive(f, "", []string{"test"}, 50) - assert.Equal(t, ErrExtractedFileTooBig, err) - } -} - func TestCleanURL(t *testing.T) { assert.Equal(t, "Test http://test.cn/test Test", CleanURL("Test http://test.cn/test?foo=bar&bar=foo Test")) }