clair/worker.go

227 lines
8.2 KiB
Go
Raw Normal View History

2017-01-13 07:08:52 +00:00
// Copyright 2017 clair authors
2015-11-13 19:11:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clair
2015-11-13 19:11:28 +00:00
import (
2017-01-18 02:40:59 +00:00
"regexp"
log "github.com/sirupsen/logrus"
2015-11-13 19:11:28 +00:00
"github.com/coreos/clair/database"
2017-01-13 23:49:02 +00:00
"github.com/coreos/clair/ext/featurefmt"
2017-01-13 21:48:12 +00:00
"github.com/coreos/clair/ext/featurens"
2017-01-13 08:07:35 +00:00
"github.com/coreos/clair/ext/imagefmt"
2017-01-13 07:08:52 +00:00
"github.com/coreos/clair/pkg/commonerr"
2017-01-13 21:48:12 +00:00
"github.com/coreos/clair/pkg/tarutil"
2015-11-13 19:11:28 +00:00
)
const (
// Version (integer) represents the worker version.
// Increased each time the engine changes.
Version = 3
logLayerName = "layer"
2015-11-13 19:11:28 +00:00
)
var (
// ErrUnsupported is the error that should be raised when an OS or package
// manager is not supported.
2017-01-13 07:08:52 +00:00
ErrUnsupported = commonerr.NewBadRequestError("worker: OS and/or package manager are not supported")
2015-11-13 19:11:28 +00:00
// ErrParentUnknown is the error that should be raised when a parent layer
// has yet to be processed for the current layer.
2017-01-13 07:08:52 +00:00
ErrParentUnknown = commonerr.NewBadRequestError("worker: parent layer is unknown, it must be processed first")
2017-01-18 02:40:59 +00:00
urlParametersRegexp = regexp.MustCompile(`(\?|\&)([^=]+)\=([^ &]+)`)
2015-11-13 19:11:28 +00:00
)
2017-01-18 02:40:59 +00:00
// cleanURL removes all parameters from an URL.
func cleanURL(str string) string {
return urlParametersRegexp.ReplaceAllString(str, "")
}
// ProcessLayer detects the Namespace of a layer, the features it adds/removes,
// and then stores everything in the database.
//
// TODO(Quentin-M): We could have a goroutine that looks for layers that have
// been analyzed with an older engine version and that processes them.
func ProcessLayer(datastore database.Datastore, imageFormat, name, parentName, path string, headers map[string]string) error {
// Verify parameters.
if name == "" {
2017-01-13 07:08:52 +00:00
return commonerr.NewBadRequestError("could not process a layer which does not have a name")
2015-11-13 19:11:28 +00:00
}
2015-11-13 19:11:28 +00:00
if path == "" {
2017-01-13 07:08:52 +00:00
return commonerr.NewBadRequestError("could not process a layer which does not have a path")
2015-11-13 19:11:28 +00:00
}
if imageFormat == "" {
2017-01-13 07:08:52 +00:00
return commonerr.NewBadRequestError("could not process a layer which does not have a format")
}
log.WithFields(log.Fields{logLayerName: name, "path": cleanURL(path), "engine version": Version, "parent layer": parentName, "format": imageFormat}).Debug("processing layer")
2015-11-13 19:11:28 +00:00
// Check to see if the layer is already in the database.
layer, err := datastore.FindLayer(name, false, false)
2017-01-13 07:08:52 +00:00
if err != nil && err != commonerr.ErrNotFound {
2015-11-13 19:11:28 +00:00
return err
}
2017-01-13 07:08:52 +00:00
if err == commonerr.ErrNotFound {
// New layer case.
layer = database.Layer{Name: name, EngineVersion: Version}
2015-11-13 19:11:28 +00:00
// Retrieve the parent if it has one.
// We need to get it with its Features in order to diff them.
if parentName != "" {
parent, err := datastore.FindLayer(parentName, true, false)
2017-01-13 07:08:52 +00:00
if err != nil && err != commonerr.ErrNotFound {
2015-11-13 19:11:28 +00:00
return err
}
2017-01-13 07:08:52 +00:00
if err == commonerr.ErrNotFound {
log.WithFields(log.Fields{logLayerName: name, "parent layer": parentName}).Warning("the parent layer is unknown. it must be processed first")
2015-11-13 19:11:28 +00:00
return ErrParentUnknown
}
layer.Parent = &parent
}
} else {
// The layer is already in the database, check if we need to update it.
if layer.EngineVersion >= Version {
log.WithFields(log.Fields{logLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. skipping analysis")
return nil
2015-11-13 19:11:28 +00:00
}
log.WithFields(log.Fields{logLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. analyzing again")
2015-11-13 19:11:28 +00:00
}
// Analyze the content.
layer.Namespaces, layer.Features, err = detectContent(imageFormat, name, path, headers, layer.Parent)
2015-11-13 19:11:28 +00:00
if err != nil {
return err
}
return datastore.InsertLayer(layer)
2015-11-13 19:11:28 +00:00
}
2017-01-13 21:48:12 +00:00
// detectContent downloads a layer's archive and extracts its Namespace and
// Features.
func detectContent(imageFormat, name, path string, headers map[string]string, parent *database.Layer) (namespaces []database.Namespace, featureVersions []database.FeatureVersion, err error) {
2017-01-13 23:49:02 +00:00
totalRequiredFiles := append(featurefmt.RequiredFilenames(), featurens.RequiredFilenames()...)
2017-01-13 21:48:12 +00:00
files, err := imagefmt.Extract(imageFormat, path, headers, totalRequiredFiles)
2015-11-13 19:11:28 +00:00
if err != nil {
log.WithError(err).WithFields(log.Fields{logLayerName: name, "path": cleanURL(path)}).Error("failed to extract data from path")
2015-11-13 19:11:28 +00:00
return
}
namespaces, err = detectNamespaces(name, files, parent)
2017-01-13 21:48:12 +00:00
if err != nil {
return
}
2015-11-13 19:11:28 +00:00
// Detect features.
var fv []database.FeatureVersion
// detect feature versions in all namespaces
for _, namespace := range namespaces {
fv, err = detectFeatureVersions(name, files, &namespace, parent)
if err != nil {
return
}
featureVersions = append(featureVersions, fv...)
2015-11-13 19:11:28 +00:00
}
if len(featureVersions) > 0 {
log.WithFields(log.Fields{logLayerName: name, "feature count": len(featureVersions)}).Debug("detected features")
}
2015-11-13 19:11:28 +00:00
return
}
// detectNamespaces returns a list of unique namespaces detected in a layer and its ancestry.
func detectNamespaces(name string, files tarutil.FilesMap, parent *database.Layer) (namespaces []database.Namespace, err error) {
nsSet := map[string]*database.Namespace{}
nsCurrent, err := featurens.Detect(files)
2017-01-13 21:48:12 +00:00
if err != nil {
return
}
for _, ns := range nsCurrent {
nsSet[ns.Name] = &ns
log.WithFields(log.Fields{logLayerName: name, "detected namespace": ns.Name}).Debug("detected namespace")
}
2015-11-13 19:11:28 +00:00
if parent != nil {
for _, ns := range parent.Namespaces {
nsSet[ns.Name] = &ns
log.WithFields(log.Fields{logLayerName: name, "detected namespace": ns.Name}).Debug("detected namespace (from parent)")
2015-11-13 19:11:28 +00:00
}
}
for _, ns := range nsSet {
namespaces = append(namespaces, *ns)
}
2015-11-13 19:11:28 +00:00
return
}
2017-01-13 23:49:02 +00:00
func detectFeatureVersions(name string, files tarutil.FilesMap, namespace *database.Namespace, parent *database.Layer) (features []database.FeatureVersion, err error) {
// TODO(Quentin-M): We need to pass the parent image to DetectFeatures because it's possible that
// some detectors would need it in order to produce the entire feature list (if they can only
// detect a diff). Also, we should probably pass the detected namespace so detectors could
// make their own decision.
2017-01-13 23:49:02 +00:00
features, err = featurefmt.ListFeatures(files)
2015-11-13 19:11:28 +00:00
if err != nil {
return
2015-11-13 19:11:28 +00:00
}
// If there are no FeatureVersions, use parent's FeatureVersions if possible.
// TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some of
// their parent's FeatureVersions. It would be useful for detectors that can't find their entire
// result using one Layer.
if len(features) == 0 && parent != nil {
features = parent.Features
return
}
// Build a map of the namespaces for each FeatureVersion in our parent layer.
parentFeatureNamespaces := make(map[string]database.Namespace)
if parent != nil {
for _, parentFeature := range parent.Features {
parentFeatureNamespaces[parentFeature.Feature.Name+":"+parentFeature.Version] = parentFeature.Feature.Namespace
}
}
// Ensure that each FeatureVersion has an associated Namespace.
for i, feature := range features {
if feature.Feature.Namespace.Name != "" {
// There is a Namespace associated.
continue
2015-11-13 19:11:28 +00:00
}
if parentFeatureNamespace, ok := parentFeatureNamespaces[feature.Feature.Name+":"+feature.Version]; ok {
// The FeatureVersion is present in the parent layer; associate with their Namespace.
features[i].Feature.Namespace = parentFeatureNamespace
continue
}
if namespace != nil {
// The Namespace has been detected in this layer; associate it.
features[i].Feature.Namespace = *namespace
continue
}
log.WithFields(log.Fields{"feature name": feature.Feature.Name, "feature version": feature.Version, logLayerName: name}).Warning("Namespace unknown")
err = ErrUnsupported
return
2015-11-13 19:11:28 +00:00
}
return
}