// Copyright 2017 clair authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package clair import ( "regexp" log "github.com/sirupsen/logrus" "github.com/coreos/clair/database" "github.com/coreos/clair/ext/featurefmt" "github.com/coreos/clair/ext/featurens" "github.com/coreos/clair/ext/imagefmt" "github.com/coreos/clair/pkg/commonerr" "github.com/coreos/clair/pkg/tarutil" ) const ( // Version (integer) represents the worker version. // Increased each time the engine changes. Version = 3 logLayerName = "layer" ) var ( // ErrUnsupported is the error that should be raised when an OS or package // manager is not supported. ErrUnsupported = commonerr.NewBadRequestError("worker: OS and/or package manager are not supported") // ErrParentUnknown is the error that should be raised when a parent layer // has yet to be processed for the current layer. ErrParentUnknown = commonerr.NewBadRequestError("worker: parent layer is unknown, it must be processed first") urlParametersRegexp = regexp.MustCompile(`(\?|\&)([^=]+)\=([^ &]+)`) ) // cleanURL removes all parameters from an URL. func cleanURL(str string) string { return urlParametersRegexp.ReplaceAllString(str, "") } // ProcessLayer detects the Namespace of a layer, the features it adds/removes, // and then stores everything in the database. // // TODO(Quentin-M): We could have a goroutine that looks for layers that have // been analyzed with an older engine version and that processes them. func ProcessLayer(datastore database.Datastore, imageFormat, name, parentName, path string, headers map[string]string) error { // Verify parameters. if name == "" { return commonerr.NewBadRequestError("could not process a layer which does not have a name") } if path == "" { return commonerr.NewBadRequestError("could not process a layer which does not have a path") } if imageFormat == "" { return commonerr.NewBadRequestError("could not process a layer which does not have a format") } log.WithFields(log.Fields{logLayerName: name, "path": cleanURL(path), "engine version": Version, "parent layer": parentName, "format": imageFormat}).Debug("processing layer") // Check to see if the layer is already in the database. layer, err := datastore.FindLayer(name, false, false) if err != nil && err != commonerr.ErrNotFound { return err } if err == commonerr.ErrNotFound { // New layer case. layer = database.Layer{Name: name, EngineVersion: Version} // Retrieve the parent if it has one. // We need to get it with its Features in order to diff them. if parentName != "" { parent, err := datastore.FindLayer(parentName, true, false) if err != nil && err != commonerr.ErrNotFound { return err } if err == commonerr.ErrNotFound { log.WithFields(log.Fields{logLayerName: name, "parent layer": parentName}).Warning("the parent layer is unknown. it must be processed first") return ErrParentUnknown } layer.Parent = &parent } } else { // The layer is already in the database, check if we need to update it. if layer.EngineVersion >= Version { log.WithFields(log.Fields{logLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. skipping analysis") return nil } log.WithFields(log.Fields{logLayerName: name, "past engine version": layer.EngineVersion, "current engine version": Version}).Debug("layer content has already been processed in the past with older engine. analyzing again") } // Analyze the content. layer.Namespaces, layer.Features, err = detectContent(imageFormat, name, path, headers, layer.Parent) if err != nil { return err } return datastore.InsertLayer(layer) } // detectContent downloads a layer's archive and extracts its Namespace and // Features. func detectContent(imageFormat, name, path string, headers map[string]string, parent *database.Layer) (namespaces []database.Namespace, featureVersions []database.FeatureVersion, err error) { totalRequiredFiles := append(featurefmt.RequiredFilenames(), featurens.RequiredFilenames()...) files, err := imagefmt.Extract(imageFormat, path, headers, totalRequiredFiles) if err != nil { log.WithError(err).WithFields(log.Fields{logLayerName: name, "path": cleanURL(path)}).Error("failed to extract data from path") return } namespaces, err = detectNamespaces(name, files, parent) if err != nil { return } featureVersions, err = detectFeatureVersions(name, files, namespaces, parent) if err != nil { return } if len(featureVersions) > 0 { log.WithFields(log.Fields{logLayerName: name, "feature count": len(featureVersions)}).Debug("detected features") } return } // detectNamespaces returns a list of unique namespaces detected in a layer and its ancestry. func detectNamespaces(name string, files tarutil.FilesMap, parent *database.Layer) (namespaces []database.Namespace, err error) { nsSet := map[string]*database.Namespace{} nsCurrent, err := featurens.Detect(files) if err != nil { return } if parent != nil { for _, ns := range parent.Namespaces { // Under assumption that one version format corresponds to one type // of namespace. nsSet[ns.VersionFormat] = &ns log.WithFields(log.Fields{logLayerName: name, "detected namespace": ns.Name, "version format": ns.VersionFormat}).Debug("detected namespace (from parent)") } } for _, ns := range nsCurrent { nsSet[ns.VersionFormat] = &ns log.WithFields(log.Fields{logLayerName: name, "detected namespace": ns.Name, "version format": ns.VersionFormat}).Debug("detected namespace") } for _, ns := range nsSet { namespaces = append(namespaces, *ns) } return } func detectFeatureVersions(name string, files tarutil.FilesMap, namespaces []database.Namespace, parent *database.Layer) (features []database.FeatureVersion, err error) { // Build a map of the namespaces for each FeatureVersion in our parent layer. parentFeatureNamespaces := make(map[string]database.Namespace) if parent != nil { for _, parentFeature := range parent.Features { parentFeatureNamespaces[parentFeature.Feature.Name+":"+parentFeature.Version] = parentFeature.Feature.Namespace } } for _, ns := range namespaces { // TODO(Quentin-M): We need to pass the parent image to DetectFeatures because it's possible that // some detectors would need it in order to produce the entire feature list (if they can only // detect a diff). Also, we should probably pass the detected namespace so detectors could // make their own decision. detectedFeatures, err := featurefmt.ListFeatures(files, &ns) if err != nil { return features, err } // Ensure that each FeatureVersion has an associated Namespace. for i, feature := range detectedFeatures { if feature.Feature.Namespace.Name != "" { // There is a Namespace associated. continue } if parentFeatureNamespace, ok := parentFeatureNamespaces[feature.Feature.Name+":"+feature.Version]; ok { // The FeatureVersion is present in the parent layer; associate // with their Namespace. // This might cause problem because a package with same feature // name and version could be different in parent layer's // namespace and current layer's namespace detectedFeatures[i].Feature.Namespace = parentFeatureNamespace continue } detectedFeatures[i].Feature.Namespace = ns } features = append(features, detectedFeatures...) } // If there are no FeatureVersions, use parent's FeatureVersions if possible. // TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some of // their parent's FeatureVersions. It would be useful for detectors that can't find their entire // result using one Layer. if len(features) == 0 && parent != nil { features = parent.Features } return }