From afd7fe2554d65040b27291d658af21af8f8ae521 Mon Sep 17 00:00:00 2001 From: Chris Northwood Date: Thu, 21 Mar 2019 17:01:07 +0000 Subject: [PATCH 1/2] tarutil: allow file names to be specified by regexp Some features do not exist in set locations, but can be anywhere in the layer. This allows those featurefmt to specify the filenames to be scanned by regexp, as opposed to purely by path prefix. If any current users of this express paths which use regexp special characters this could be a breaking change for them (with the exception of . which will continue to work as it matches against the literal character .), however none of the code in this repo does that. fixes #456 --- pkg/tarutil/tarutil.go | 6 ++++-- pkg/tarutil/tarutil_test.go | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pkg/tarutil/tarutil.go b/pkg/tarutil/tarutil.go index 357f96da..bf47bcb6 100644 --- a/pkg/tarutil/tarutil.go +++ b/pkg/tarutil/tarutil.go @@ -25,6 +25,7 @@ import ( "io" "io/ioutil" "os/exec" + "regexp" "strings" ) @@ -50,7 +51,8 @@ var ( type FilesMap map[string][]byte // ExtractFiles decompresses and extracts only the specified files from an -// io.Reader representing an archive. +// io.Reader representing an archive. The files to be extracted are specified +// by regexp func ExtractFiles(r io.Reader, filenames []string) (FilesMap, error) { data := make(map[string][]byte) @@ -78,7 +80,7 @@ func ExtractFiles(r io.Reader, filenames []string) (FilesMap, error) { // Determine if we should extract the element toBeExtracted := false for _, s := range filenames { - if strings.HasPrefix(filename, s) { + if match, err := regexp.MatchString("^"+s, filename); err == nil && match { toBeExtracted = true break } diff --git a/pkg/tarutil/tarutil_test.go b/pkg/tarutil/tarutil_test.go index 9db377ab..fd41cdc2 100644 --- a/pkg/tarutil/tarutil_test.go +++ b/pkg/tarutil/tarutil_test.go @@ -57,6 +57,26 @@ func TestExtract(t *testing.T) { } } +func TestExtractGlob(t *testing.T) { + for _, filename := range testTarballs { + f, err := os.Open(testfilepath(filename)) + assert.Nil(t, err) + defer f.Close() + + data, err := ExtractFiles(f, []string{`.+\.txt`}) + assert.Nil(t, err) + + if c, n := data["test/test.txt"]; !n { + assert.Fail(t, "test/test.txt should have been extracted") + } else { + assert.NotEqual(t, 0, len(c) > 0, "test/test.txt file is empty") + } + if _, n := data["test.txt"]; !n { + assert.Fail(t, "test.txt should also have been extracted") + } + } +} + func TestExtractUncompressedData(t *testing.T) { for _, filename := range testTarballs { f, err := os.Open(testfilepath(filename)) From a3a37072b54840aaebde1cd0bba62b8939dafbdc Mon Sep 17 00:00:00 2001 From: Chris Northwood Date: Fri, 22 Mar 2019 10:13:24 +0000 Subject: [PATCH 2/2] tarutil: convert all filename specs to regexps This removes the previous behaviour from tarutil to do simple prefix matching. All places where the previous prefix-based matches were specified have been updated to use a regexp instead, maintaining previous behaviour. --- ext/featurefmt/apk/apk.go | 2 +- ext/featurefmt/dpkg/dpkg.go | 2 +- ext/featurefmt/driver.go | 12 +++++++----- ext/featurefmt/rpm/rpm.go | 2 +- ext/featurens/alpinerelease/alpinerelease.go | 2 +- ext/featurens/aptsources/aptsources.go | 2 +- ext/featurens/driver.go | 12 +++++++----- ext/featurens/lsbrelease/lsbrelease.go | 2 +- ext/featurens/osrelease/osrelease.go | 6 ++++-- ext/featurens/redhatrelease/redhatrelease.go | 6 ++++-- pkg/tarutil/tarutil.go | 2 +- pkg/tarutil/tarutil_test.go | 2 +- 12 files changed, 30 insertions(+), 22 deletions(-) diff --git a/ext/featurefmt/apk/apk.go b/ext/featurefmt/apk/apk.go index d245de5d..bc07c16c 100644 --- a/ext/featurefmt/apk/apk.go +++ b/ext/featurefmt/apk/apk.go @@ -90,5 +90,5 @@ func (l lister) ListFeatures(files tarutil.FilesMap) ([]database.LayerFeature, e } func (l lister) RequiredFilenames() []string { - return []string{"lib/apk/db/installed"} + return []string{"^lib/apk/db/installed"} } diff --git a/ext/featurefmt/dpkg/dpkg.go b/ext/featurefmt/dpkg/dpkg.go index ea88c372..f30f3f33 100644 --- a/ext/featurefmt/dpkg/dpkg.go +++ b/ext/featurefmt/dpkg/dpkg.go @@ -38,7 +38,7 @@ var ( type lister struct{} func (l lister) RequiredFilenames() []string { - return []string{"var/lib/dpkg/status"} + return []string{"^var/lib/dpkg/status"} } func init() { diff --git a/ext/featurefmt/driver.go b/ext/featurefmt/driver.go index 524973dc..8f975c0c 100644 --- a/ext/featurefmt/driver.go +++ b/ext/featurefmt/driver.go @@ -35,10 +35,11 @@ type Lister interface { // ListFeatures produces a list of Features present in an image layer. ListFeatures(tarutil.FilesMap) ([]database.LayerFeature, error) - // RequiredFilenames returns the list of files required to be in the FilesMap - // provided to the ListFeatures method. + // RequiredFilenames returns a list of patterns for filenames that will + // be in the FilesMap provided to the ListFeatures method. // - // Filenames must not begin with "/". + // The patterns are expressed as regexps, and will be matched against + // full paths that do not include the leading "/". RequiredFilenames() []string } @@ -102,8 +103,9 @@ func ListFeatures(files tarutil.FilesMap, toUse []database.Detector) ([]database return features, nil } -// RequiredFilenames returns all files required by the give extensions. Any -// extension metadata that has non feature-detector type will be skipped. +// RequiredFilenames returns all file patterns that will be passed to the +// given extensions. These patterns are expressed as regexps. Any extension +// metadata that has non feature-detector type will be skipped. func RequiredFilenames(toUse []database.Detector) (files []string) { listersM.RLock() defer listersM.RUnlock() diff --git a/ext/featurefmt/rpm/rpm.go b/ext/featurefmt/rpm/rpm.go index f8d91055..71553fc3 100644 --- a/ext/featurefmt/rpm/rpm.go +++ b/ext/featurefmt/rpm/rpm.go @@ -46,7 +46,7 @@ func init() { } func (l lister) RequiredFilenames() []string { - return []string{"var/lib/rpm/Packages"} + return []string{"^var/lib/rpm/Packages"} } func isIgnored(packageName string) bool { diff --git a/ext/featurens/alpinerelease/alpinerelease.go b/ext/featurens/alpinerelease/alpinerelease.go index 8fe6f808..09163cb8 100644 --- a/ext/featurens/alpinerelease/alpinerelease.go +++ b/ext/featurens/alpinerelease/alpinerelease.go @@ -62,5 +62,5 @@ func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { } func (d detector) RequiredFilenames() []string { - return []string{alpineReleasePath} + return []string{"^" + alpineReleasePath} } diff --git a/ext/featurens/aptsources/aptsources.go b/ext/featurens/aptsources/aptsources.go index 287073f4..7ae3f5cb 100644 --- a/ext/featurens/aptsources/aptsources.go +++ b/ext/featurens/aptsources/aptsources.go @@ -88,5 +88,5 @@ func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { } func (d detector) RequiredFilenames() []string { - return []string{"etc/apt/sources.list"} + return []string{`^etc/apt/sources\.list`} } diff --git a/ext/featurens/driver.go b/ext/featurens/driver.go index 781ba5d4..e157cdbe 100644 --- a/ext/featurens/driver.go +++ b/ext/featurens/driver.go @@ -39,10 +39,11 @@ type Detector interface { // layer. Detect(tarutil.FilesMap) (*database.Namespace, error) - // RequiredFilenames returns the list of files required to be in the FilesMap - // provided to the Detect method. + // RequiredFilenames returns a list of patterns for filenames that will + // be in the FilesMap provided to the Detect method. // - // Filenames must not begin with "/". + // The patterns are expressed as regexps, and will be matched against + // full paths that do not include the leading "/". RequiredFilenames() []string } @@ -108,8 +109,9 @@ func Detect(files tarutil.FilesMap, toUse []database.Detector) ([]database.Layer return namespaces, nil } -// RequiredFilenames returns all files required by the give extensions. Any -// extension metadata that has non namespace-detector type will be skipped. +// RequiredFilenames returns all file patterns that will be passed to the +// given extensions. These patterns are expressed as regexps. Any extension +// metadata that has non namespace-detector type will be skipped. func RequiredFilenames(toUse []database.Detector) (files []string) { detectorsM.RLock() defer detectorsM.RUnlock() diff --git a/ext/featurens/lsbrelease/lsbrelease.go b/ext/featurens/lsbrelease/lsbrelease.go index de528327..3d4559be 100644 --- a/ext/featurens/lsbrelease/lsbrelease.go +++ b/ext/featurens/lsbrelease/lsbrelease.go @@ -94,5 +94,5 @@ func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { } func (d *detector) RequiredFilenames() []string { - return []string{"etc/lsb-release"} + return []string{"^etc/lsb-release"} } diff --git a/ext/featurens/osrelease/osrelease.go b/ext/featurens/osrelease/osrelease.go index e52ff7b2..03c4bfc7 100644 --- a/ext/featurens/osrelease/osrelease.go +++ b/ext/featurens/osrelease/osrelease.go @@ -34,6 +34,8 @@ var ( osReleaseOSRegexp = regexp.MustCompile(`^ID=(.*)`) osReleaseVersionRegexp = regexp.MustCompile(`^VERSION_ID=(.*)`) + filenames = []string{"etc/os-release", "usr/lib/os-release"} + // blacklistFilenames are files that should exclude this detector. blacklistFilenames = []string{ "etc/oracle-release", @@ -57,7 +59,7 @@ func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { } } - for _, filePath := range d.RequiredFilenames() { + for _, filePath := range filenames { f, hasFile := files[filePath] if !hasFile { continue @@ -100,5 +102,5 @@ func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { } func (d detector) RequiredFilenames() []string { - return []string{"etc/os-release", "usr/lib/os-release"} + return []string{"^(etc|usr/lib)/os-release"} } diff --git a/ext/featurens/redhatrelease/redhatrelease.go b/ext/featurens/redhatrelease/redhatrelease.go index a8d27081..3610d435 100644 --- a/ext/featurens/redhatrelease/redhatrelease.go +++ b/ext/featurens/redhatrelease/redhatrelease.go @@ -33,6 +33,8 @@ var ( oracleReleaseRegexp = regexp.MustCompile(`(?POracle) (Linux Server release) (?P[\d]+)`) centosReleaseRegexp = regexp.MustCompile(`(?P[^\s]*) (Linux release|release) (?P[\d]+)`) redhatReleaseRegexp = regexp.MustCompile(`(?PRed Hat Enterprise Linux) (Client release|Server release|Workstation release) (?P[\d]+)`) + + filenames = []string{"etc/oracle-release", "etc/centos-release", "etc/redhat-release", "etc/system-release"} ) type detector struct{} @@ -42,7 +44,7 @@ func init() { } func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { - for _, filePath := range d.RequiredFilenames() { + for _, filePath := range filenames { f, hasFile := files[filePath] if !hasFile { continue @@ -83,5 +85,5 @@ func (d detector) Detect(files tarutil.FilesMap) (*database.Namespace, error) { } func (d detector) RequiredFilenames() []string { - return []string{"etc/oracle-release", "etc/centos-release", "etc/redhat-release", "etc/system-release"} + return []string{`^etc/(oracle|centos|redhat|system)-release`} } diff --git a/pkg/tarutil/tarutil.go b/pkg/tarutil/tarutil.go index bf47bcb6..23d49fa3 100644 --- a/pkg/tarutil/tarutil.go +++ b/pkg/tarutil/tarutil.go @@ -80,7 +80,7 @@ func ExtractFiles(r io.Reader, filenames []string) (FilesMap, error) { // Determine if we should extract the element toBeExtracted := false for _, s := range filenames { - if match, err := regexp.MatchString("^"+s, filename); err == nil && match { + if match, err := regexp.MatchString(s, filename); err == nil && match { toBeExtracted = true break } diff --git a/pkg/tarutil/tarutil_test.go b/pkg/tarutil/tarutil_test.go index fd41cdc2..b809aee6 100644 --- a/pkg/tarutil/tarutil_test.go +++ b/pkg/tarutil/tarutil_test.go @@ -57,7 +57,7 @@ func TestExtract(t *testing.T) { } } -func TestExtractGlob(t *testing.T) { +func TestExtractRegex(t *testing.T) { for _, filename := range testTarballs { f, err := os.Open(testfilepath(filename)) assert.Nil(t, err)