diff --git a/analyzer.go b/analyzer.go new file mode 100644 index 00000000..b6c62cc2 --- /dev/null +++ b/analyzer.go @@ -0,0 +1,145 @@ +// Copyright 2019 clair authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clair + +import ( + "context" + + log "github.com/sirupsen/logrus" + + "github.com/coreos/clair/database" + "github.com/coreos/clair/ext/featurefmt" + "github.com/coreos/clair/ext/featurens" + "github.com/coreos/clair/ext/imagefmt" +) + +// AnalyzeError represents an failure when analyzing layer or constructing +// ancestry. +type AnalyzeError string + +func (e AnalyzeError) Error() string { + return string(e) +} + +var ( + // StorageError represents an analyze error caused by the storage + StorageError = AnalyzeError("failed to query the database.") + // RetrieveBlobError represents an analyze error caused by failure of + // downloading or extracting layer blobs. + RetrieveBlobError = AnalyzeError("failed to download layer blob.") + // ExtractBlobError represents an analyzer error caused by failure of + // extracting a layer blob by imagefmt. + ExtractBlobError = AnalyzeError("failed to extract files from layer blob.") + // FeatureDetectorError is an error caused by failure of feature listing by + // featurefmt. + FeatureDetectorError = AnalyzeError("failed to scan feature from layer blob files.") + // NamespaceDetectorError is an error caused by failure of namespace + // detection by featurens. + NamespaceDetectorError = AnalyzeError("failed to scan namespace from layer blob files.") +) + +// AnalyzeLayer retrieves the clair layer with all extracted features and namespaces. +// If a layer is already scanned by all enabled detectors in the Clair instance, it returns directly. +// Otherwise, it re-download the layer blob and scan the features and namespaced again. +func AnalyzeLayer(ctx context.Context, store database.Datastore, blobSha256 string, blobFormat string, downloadURI string, downloadHeaders map[string]string) (*database.Layer, error) { + layer, found, err := database.FindLayerAndRollback(store, blobSha256) + logFields := log.Fields{"layer.Hash": blobSha256} + if err != nil { + log.WithError(err).WithFields(logFields).Error("failed to find layer in the storage") + return nil, StorageError + } + + var scannedBy []database.Detector + if found { + scannedBy = layer.By + } + + // layer will be scanned by detectors not scanned the layer already. + toScan := database.DiffDetectors(EnabledDetectors(), scannedBy) + if len(toScan) != 0 { + log.WithFields(logFields).Debug("scan layer blob not already scanned") + newLayerScanResult := &database.Layer{Hash: blobSha256, By: toScan} + blob, err := retrieveLayerBlob(ctx, downloadURI, downloadHeaders) + if err != nil { + log.WithError(err).WithFields(logFields).Error("failed to retrieve layer blob") + return nil, RetrieveBlobError + } + + defer func() { + if err := blob.Close(); err != nil { + log.WithFields(logFields).Error("failed to close layer blob reader") + } + }() + + files := append(featurefmt.RequiredFilenames(toScan), featurens.RequiredFilenames(toScan)...) + fileMap, err := imagefmt.Extract(blobFormat, blob, files) + if err != nil { + log.WithFields(logFields).WithError(err).Error("failed to extract layer blob") + return nil, ExtractBlobError + } + + newLayerScanResult.Features, err = featurefmt.ListFeatures(fileMap, toScan) + if err != nil { + log.WithFields(logFields).WithError(err).Error("failed to detect features") + return nil, FeatureDetectorError + } + + newLayerScanResult.Namespaces, err = featurens.Detect(fileMap, toScan) + if err != nil { + log.WithFields(logFields).WithError(err).Error("failed to detect namespaces") + return nil, NamespaceDetectorError + } + + if err = saveLayerChange(store, newLayerScanResult); err != nil { + log.WithFields(logFields).WithError(err).Error("failed to store layer change") + return nil, StorageError + } + + layer = database.MergeLayers(layer, newLayerScanResult) + } else { + log.WithFields(logFields).Debug("found scanned layer blob") + } + + return layer, nil +} + +// EnabledDetectors retrieves a list of all detectors installed in the Clair +// instance. +func EnabledDetectors() []database.Detector { + return append(featurefmt.ListListers(), featurens.ListDetectors()...) +} + +// RegisterConfiguredDetectors populates the database with registered detectors. +func RegisterConfiguredDetectors(store database.Datastore) { + if err := database.PersistDetectorsAndCommit(store, EnabledDetectors()); err != nil { + panic("failed to initialize Clair analyzer") + } +} + +func saveLayerChange(store database.Datastore, layer *database.Layer) error { + if err := database.PersistFeaturesAndCommit(store, layer.GetFeatures()); err != nil { + return err + } + + if err := database.PersistNamespacesAndCommit(store, layer.GetNamespaces()); err != nil { + return err + } + + if err := database.PersistPartialLayerAndCommit(store, layer); err != nil { + return err + } + + return nil +} diff --git a/ancestry.go b/ancestry.go new file mode 100644 index 00000000..db609406 --- /dev/null +++ b/ancestry.go @@ -0,0 +1,290 @@ +// Copyright 2019 clair authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clair + +import ( + "crypto/sha256" + "encoding/hex" + "strings" + + log "github.com/sirupsen/logrus" + + "github.com/coreos/clair/database" +) + +type layerIndexedFeature struct { + feature *database.LayerFeature + namespace *layerIndexedNamespace + introducedIn int +} + +type layerIndexedNamespace struct { + namespace database.LayerNamespace + introducedIn int +} + +// AncestryBuilder builds an Ancestry, which contains an ordered list of layers +// and their features. +type AncestryBuilder struct { + layerIndex int + layerNames []string + detectors []database.Detector + namespaces map[database.Detector]*layerIndexedNamespace + features map[database.Detector][]layerIndexedFeature +} + +// NewAncestryBuilder creates a new ancestry builder. +// +// ancestry builder takes in the extracted layer information and produce a set of +// namespaces, features, and the relation between features for the whole image. +func NewAncestryBuilder(detectors []database.Detector) *AncestryBuilder { + return &AncestryBuilder{ + layerIndex: 0, + detectors: detectors, + namespaces: make(map[database.Detector]*layerIndexedNamespace), + features: make(map[database.Detector][]layerIndexedFeature), + } +} + +// AddLeafLayer adds a leaf layer to the ancestry builder, and computes the +// namespaced features. +func (b *AncestryBuilder) AddLeafLayer(layer *database.Layer) { + b.layerNames = append(b.layerNames, layer.Hash) + for i := range layer.Namespaces { + b.updateNamespace(&layer.Namespaces[i]) + } + + allFeatureMap := map[database.Detector][]database.LayerFeature{} + for i := range layer.Features { + layerFeature := layer.Features[i] + allFeatureMap[layerFeature.By] = append(allFeatureMap[layerFeature.By], layerFeature) + } + + // we only care about the ones specified by builder's detectors + featureMap := map[database.Detector][]database.LayerFeature{} + for i := range b.detectors { + detector := b.detectors[i] + featureMap[detector] = allFeatureMap[detector] + } + + for detector := range featureMap { + b.addLayerFeatures(detector, featureMap[detector]) + } + + b.layerIndex++ +} + +// Every detector inspects a set of files for the features +// therefore, if that set of files gives a different set of features, it +// should replace the existing features. +func (b *AncestryBuilder) addLayerFeatures(detector database.Detector, features []database.LayerFeature) { + if len(features) == 0 { + // TODO(sidac): we need to differentiate if the detector finds that all + // features are removed ( a file change ), or the package installer is + // removed ( a file deletion ), or there's no change in the file ( file + // does not exist in the blob ) Right now, we're just assuming that no + // change in the file because that's the most common case. + return + } + + existingFeatures := b.features[detector] + currentFeatures := make([]layerIndexedFeature, 0, len(features)) + // Features that are not in the current layer should be removed. + for i := range existingFeatures { + feature := existingFeatures[i] + for j := range features { + if features[j] == *feature.feature { + currentFeatures = append(currentFeatures, feature) + break + } + } + } + + // Features that newly introduced in the current layer should be added. + for i := range features { + found := false + for j := range existingFeatures { + if *existingFeatures[j].feature == features[i] { + found = true + break + } + } + + if !found { + namespace, found := b.lookupNamespace(&features[i]) + if !found { + log.WithField("Layer Hashes", b.layerNames).Error("skip, could not find the proper namespace for feature") + continue + } + + currentFeatures = append(currentFeatures, b.createLayerIndexedFeature(namespace, &features[i])) + } + } + + b.features[detector] = currentFeatures +} + +// updateNamespace update the namespaces for the ancestry. It does the following things: +// 1. when a detector detects a new namespace, it's added to the ancestry. +// 2. when a detector detects a difference in the detected namespace, it +// replaces the namespace, and also move all features under that namespace to +// the new namespace. +func (b *AncestryBuilder) updateNamespace(layerNamespace *database.LayerNamespace) { + var ( + previous *layerIndexedNamespace + ok bool + ) + + if previous, ok = b.namespaces[layerNamespace.By]; !ok { + b.namespaces[layerNamespace.By] = &layerIndexedNamespace{ + namespace: *layerNamespace, + introducedIn: b.layerIndex, + } + + return + } + + // All features referencing to this namespace are now pointing to the new namespace. + // Also those features are now treated as introduced in the same layer as + // when this new namespace is introduced. + previous.namespace = *layerNamespace + previous.introducedIn = b.layerIndex + + for _, features := range b.features { + for i, feature := range features { + if feature.namespace == previous { + features[i].introducedIn = previous.introducedIn + } + } + } +} + +func (b *AncestryBuilder) createLayerIndexedFeature(namespace *layerIndexedNamespace, feature *database.LayerFeature) layerIndexedFeature { + return layerIndexedFeature{ + feature: feature, + namespace: namespace, + introducedIn: b.layerIndex, + } +} + +func (b *AncestryBuilder) lookupNamespace(feature *database.LayerFeature) (*layerIndexedNamespace, bool) { + for _, namespace := range b.namespaces { + if namespace.namespace.VersionFormat == feature.VersionFormat { + return namespace, true + } + } + + return nil, false +} + +func (b *AncestryBuilder) ancestryFeatures(index int) []database.AncestryFeature { + ancestryFeatures := []database.AncestryFeature{} + for detector, features := range b.features { + for _, feature := range features { + if feature.introducedIn == index { + ancestryFeatures = append(ancestryFeatures, database.AncestryFeature{ + NamespacedFeature: database.NamespacedFeature{ + Feature: feature.feature.Feature, + Namespace: feature.namespace.namespace.Namespace, + }, + FeatureBy: detector, + NamespaceBy: feature.namespace.namespace.By, + }) + } + } + } + + return ancestryFeatures +} + +func (b *AncestryBuilder) ancestryLayers() []database.AncestryLayer { + layers := make([]database.AncestryLayer, 0, b.layerIndex) + for i := 0; i < b.layerIndex; i++ { + layers = append(layers, database.AncestryLayer{ + Hash: b.layerNames[i], + Features: b.ancestryFeatures(i), + }) + } + + return layers +} + +// Ancestry produces an Ancestry from the builder. +func (b *AncestryBuilder) Ancestry(name string) *database.Ancestry { + if name == "" { + // TODO(sidac): we'll use the computed ancestry name in the future. + // During the transition, it still requires the user to use the correct + // ancestry name. + name = ancestryName(b.layerNames) + log.WithField("ancestry.Name", name).Warn("generated ancestry name since it's not specified") + } + + return &database.Ancestry{ + Name: name, + By: b.detectors, + Layers: b.ancestryLayers(), + } +} + +// SaveAncestry saves an ancestry to the datastore. +func SaveAncestry(store database.Datastore, ancestry *database.Ancestry) error { + log.WithField("ancestry.Name", ancestry.Name).Debug("saving ancestry") + features := []database.NamespacedFeature{} + for _, layer := range ancestry.Layers { + features = append(features, layer.GetFeatures()...) + } + + if err := database.PersistNamespacedFeaturesAndCommit(store, features); err != nil { + return StorageError + } + + if err := database.UpsertAncestryAndCommit(store, ancestry); err != nil { + return StorageError + } + + if err := database.CacheRelatedVulnerabilityAndCommit(store, features); err != nil { + return StorageError + } + + return nil +} + +// IsAncestryCached checks if the ancestry is already cached in the database with the current set of detectors. +func IsAncestryCached(store database.Datastore, name string, layerHashes []string) (bool, error) { + if name == "" { + // TODO(sidac): we'll use the computed ancestry name in the future. + // During the transition, it still requires the user to use the correct + // ancestry name. + name = ancestryName(layerHashes) + log.WithField("ancestry.Name", name).Warn("generated ancestry name since it's not specified") + } + + ancestry, found, err := database.FindAncestryAndRollback(store, name) + if err != nil { + log.WithError(err).WithField("ancestry.Name", name).Error("failed to query ancestry in database") + return false, StorageError + } + + if found { + log.WithField("ancestry.Name", name).Debug("found cached ancestry") + } + + return found && len(database.DiffDetectors(EnabledDetectors(), ancestry.By)) == 0, nil +} + +func ancestryName(layerHashes []string) string { + tag := sha256.Sum256([]byte(strings.Join(layerHashes, ","))) + return hex.EncodeToString(tag[:]) +} diff --git a/ancestry_test.go b/ancestry_test.go new file mode 100644 index 00000000..2884d6e1 --- /dev/null +++ b/ancestry_test.go @@ -0,0 +1,267 @@ +// Copyright 2019 clair authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package clair + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/coreos/clair/database" +) + +var ( + dpkg = database.NewFeatureDetector("dpkg", "1.0") + rpm = database.NewFeatureDetector("rpm", "1.0") + pip = database.NewFeatureDetector("pip", "1.0") + python = database.NewNamespaceDetector("python", "1.0") + osrelease = database.NewNamespaceDetector("os-release", "1.0") + ubuntu = *database.NewNamespace("ubuntu:14.04", "dpkg") + ubuntu16 = *database.NewNamespace("ubuntu:16.04", "dpkg") + python2 = *database.NewNamespace("python:2", "pip") + sed = *database.NewSourcePackage("sed", "4.4-2", "dpkg") + sedBin = *database.NewBinaryPackage("sed", "4.4-2", "dpkg") + tar = *database.NewBinaryPackage("tar", "1.29b-2", "dpkg") + scipy = *database.NewSourcePackage("scipy", "3.0.0", "pip") + + detectors = []database.Detector{dpkg, osrelease, rpm} + multinamespaceDetectors = []database.Detector{dpkg, osrelease, pip} +) + +// layerBuilder is for helping constructing the layer test artifacts. +type layerBuilder struct { + layer *database.Layer +} + +func newLayerBuilder(hash string) *layerBuilder { + return &layerBuilder{&database.Layer{Hash: hash, By: detectors}} +} + +func (b *layerBuilder) addNamespace(detector database.Detector, ns database.Namespace) *layerBuilder { + b.layer.Namespaces = append(b.layer.Namespaces, database.LayerNamespace{ + Namespace: ns, + By: detector, + }) + return b +} + +func (b *layerBuilder) addFeature(detector database.Detector, f database.Feature) *layerBuilder { + b.layer.Features = append(b.layer.Features, database.LayerFeature{ + Feature: f, + By: detector, + }) + + return b +} + +var testImage = []*database.Layer{ + // empty layer + newLayerBuilder("0").layer, + // ubuntu namespace + newLayerBuilder("1").addNamespace(osrelease, ubuntu).layer, + // install sed + newLayerBuilder("2").addFeature(dpkg, sed).layer, + // install tar + newLayerBuilder("3").addFeature(dpkg, sed).addFeature(dpkg, tar).layer, + // remove tar + newLayerBuilder("4").addFeature(dpkg, sed).layer, + // upgrade ubuntu + newLayerBuilder("5").addNamespace(osrelease, ubuntu16).layer, + // no change to the detectable files + newLayerBuilder("6").layer, + // change to the package installer database but no features are affected. + newLayerBuilder("7").addFeature(dpkg, sed).layer, +} + +var clairLimit = []*database.Layer{ + // TODO(sidac): how about install rpm package under ubuntu? + newLayerBuilder("1").addNamespace(osrelease, ubuntu).layer, + newLayerBuilder("2").addFeature(rpm, sed).layer, +} + +var multipleNamespace = []*database.Layer{ + // TODO(sidac): support for multiple namespaces +} + +var invalidNamespace = []*database.Layer{ + // add package without namespace, this indicates that the namespace detector + // could not detect the namespace. + newLayerBuilder("0").addFeature(dpkg, sed).layer, +} + +var multiplePackagesOnFirstLayer = []*database.Layer{ + newLayerBuilder("0").addFeature(dpkg, sed).addFeature(dpkg, tar).addFeature(dpkg, sedBin).addNamespace(osrelease, ubuntu16).layer, +} + +func TestAddLayer(t *testing.T) { + cases := []struct { + title string + image []*database.Layer + + expectedAncestry database.Ancestry + }{ + { + title: "empty image", + expectedAncestry: database.Ancestry{Name: ancestryName([]string{}), By: detectors}, + }, + { + title: "empty layer", + image: testImage[:1], + expectedAncestry: database.Ancestry{Name: ancestryName([]string{"0"}), By: detectors, Layers: []database.AncestryLayer{{Hash: "0"}}}, + }, + { + title: "ubuntu", + image: testImage[:2], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}}, + }, + }, + { + title: "ubuntu install sed", + image: testImage[:3], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1", "2"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}, {Hash: "2", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + }}}, + }, + }, + { + title: "ubuntu install tar", + image: testImage[:4], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1", "2", "3"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}, {Hash: "2", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + }}, { + Hash: "3", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: tar, Namespace: ubuntu}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + }, + }}, + }, + }, { + title: "ubuntu uninstall tar", + image: testImage[:5], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1", "2", "3", "4"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}, {Hash: "2", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + }}, {Hash: "3"}, {Hash: "4"}}, + }, + }, { + title: "ubuntu upgrade", + image: testImage[:6], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1", "2", "3", "4", "5"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}, {Hash: "2"}, {Hash: "3"}, {Hash: "4"}, {Hash: "5", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu16}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }}}, + }, + }, + }, { + title: "no change to the detectable files", + image: testImage[:7], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1", "2", "3", "4", "5", "6"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}, {Hash: "2"}, {Hash: "3"}, {Hash: "4"}, {Hash: "5", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu16}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }}}, {Hash: "6"}}, + }, + }, { + title: "change to the package installer database but no features are affected.", + image: testImage[:8], + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0", "1", "2", "3", "4", "5", "6", "7"}), + By: detectors, + Layers: []database.AncestryLayer{{Hash: "0"}, {Hash: "1"}, {Hash: "2"}, {Hash: "3"}, {Hash: "4"}, {Hash: "5", Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu16}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }}}, {Hash: "6"}, {Hash: "7"}}, + }, + }, { + title: "layers with features and namespace.", + image: multiplePackagesOnFirstLayer, + expectedAncestry: database.Ancestry{ + Name: ancestryName([]string{"0"}), + By: detectors, + Layers: []database.AncestryLayer{ + { + Hash: "0", + Features: []database.AncestryFeature{ + { + NamespacedFeature: database.NamespacedFeature{Feature: sed, Namespace: ubuntu16}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + { + NamespacedFeature: database.NamespacedFeature{Feature: sedBin, Namespace: ubuntu16}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + { + NamespacedFeature: database.NamespacedFeature{Feature: tar, Namespace: ubuntu16}, + FeatureBy: dpkg, + NamespaceBy: osrelease, + }, + }, + }, + }, + }, + }, + } + + for _, test := range cases { + t.Run(test.title, func(t *testing.T) { + builder := NewAncestryBuilder(detectors) + for _, layer := range test.image { + builder.AddLeafLayer(layer) + } + + ancestry := builder.Ancestry("") + require.True(t, database.AssertAncestryEqual(t, &test.expectedAncestry, ancestry)) + }) + } +} diff --git a/api/v3/rpc.go b/api/v3/rpc.go index 51502c2c..34a9d0d3 100644 --- a/api/v3/rpc.go +++ b/api/v3/rpc.go @@ -17,6 +17,8 @@ package v3 import ( "fmt" + "github.com/coreos/clair/ext/imagefmt" + "golang.org/x/net/context" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" @@ -28,6 +30,10 @@ import ( "github.com/coreos/clair/pkg/pagination" ) +func newRPCErrorWithClairError(code codes.Code, err error) error { + return status.Errorf(code, "clair error reason: '%s'", err.Error()) +} + // NotificationServer implements NotificationService interface for serving RPC. type NotificationServer struct { Store database.Datastore @@ -55,23 +61,34 @@ func (s *StatusServer) GetStatus(ctx context.Context, req *pb.GetStatusRequest) // PostAncestry implements posting an ancestry via the Clair gRPC service. func (s *AncestryServer) PostAncestry(ctx context.Context, req *pb.PostAncestryRequest) (*pb.PostAncestryResponse, error) { - ancestryName := req.GetAncestryName() - if ancestryName == "" { - return nil, status.Error(codes.InvalidArgument, "ancestry name should not be empty") + // validate request + blobFormat := req.GetFormat() + if !imagefmt.IsSupported(blobFormat) { + return nil, status.Error(codes.InvalidArgument, "image blob format is not supported") + } + + clairStatus, err := GetClairStatus(s.Store) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + + // check if the ancestry is already processed, if not we build the ancestry again. + layerHashes := make([]string, len(req.Layers)) + for i, layer := range req.Layers { + layerHashes[i] = layer.GetHash() } - layers := req.GetLayers() - if len(layers) == 0 { - return nil, status.Error(codes.InvalidArgument, "ancestry should have at least one layer") + found, err := clair.IsAncestryCached(s.Store, req.AncestryName, layerHashes) + if err != nil { + return nil, newRPCErrorWithClairError(codes.Internal, err) } - ancestryFormat := req.GetFormat() - if ancestryFormat == "" { - return nil, status.Error(codes.InvalidArgument, "ancestry format should not be empty") + if found { + return &pb.PostAncestryResponse{Status: clairStatus}, nil } - ancestryLayers := []clair.LayerRequest{} - for _, layer := range layers { + builder := clair.NewAncestryBuilder(clair.EnabledDetectors()) + for _, layer := range req.Layers { if layer == nil { err := status.Error(codes.InvalidArgument, "ancestry layer is invalid") return nil, err @@ -85,21 +102,22 @@ func (s *AncestryServer) PostAncestry(ctx context.Context, req *pb.PostAncestryR return nil, status.Error(codes.InvalidArgument, "ancestry layer path should not be empty") } - ancestryLayers = append(ancestryLayers, clair.LayerRequest{ - Hash: layer.Hash, - Headers: layer.Headers, - Path: layer.Path, - }) - } + // TODO(sidac): make AnalyzeLayer to be async to ensure + // non-blocking downloads. + // We'll need to deal with two layers post by the same or different + // requests that may have the same hash. In that case, since every + // layer/feature/namespace is unique in the database, it may introduce + // deadlock. + clairLayer, err := clair.AnalyzeLayer(ctx, s.Store, layer.Hash, req.Format, layer.Path, layer.Headers) + if err != nil { + return nil, newRPCErrorWithClairError(codes.Internal, err) + } - err := clair.ProcessAncestry(s.Store, ancestryFormat, ancestryName, ancestryLayers) - if err != nil { - return nil, status.Error(codes.Internal, "ancestry is failed to be processed: "+err.Error()) + builder.AddLeafLayer(clairLayer) } - clairStatus, err := GetClairStatus(s.Store) - if err != nil { - return nil, status.Error(codes.Internal, err.Error()) + if err := clair.SaveAncestry(s.Store, builder.Ancestry(req.AncestryName)); err != nil { + return nil, newRPCErrorWithClairError(codes.Internal, err) } return &pb.PostAncestryResponse{Status: clairStatus}, nil diff --git a/api/v3/util.go b/api/v3/util.go index 392a147e..fa0ff3bc 100644 --- a/api/v3/util.go +++ b/api/v3/util.go @@ -13,7 +13,7 @@ import ( // protobuf struct. func GetClairStatus(store database.Datastore) (*pb.ClairStatus, error) { status := &pb.ClairStatus{ - Detectors: pb.DetectorsFromDatabaseModel(clair.EnabledDetectors), + Detectors: pb.DetectorsFromDatabaseModel(clair.EnabledDetectors()), } t, firstUpdate, err := clair.GetLastUpdateTime(store) diff --git a/blob.go b/blob.go index afcfcb02..160f0cb7 100644 --- a/blob.go +++ b/blob.go @@ -1,61 +1,43 @@ +// Copyright 2019 clair authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package clair import ( "context" - "crypto/tls" "io" "net/http" "os" "strings" - log "github.com/sirupsen/logrus" + "github.com/coreos/clair/pkg/httputil" ) -func retrieveLayerBlob(ctx context.Context, blobSha256 string, path string, headers map[string]string) (io.ReadCloser, error) { +func retrieveLayerBlob(ctx context.Context, path string, headers map[string]string) (io.ReadCloser, error) { if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") { - return downloadLayerBlob(ctx, blobSha256, path, headers) - } - - return loadLayerBlobFromFS(blobSha256) -} - -func downloadLayerBlob(ctx context.Context, blobSha256 string, uri string, headers map[string]string) (io.ReadCloser, error) { - request, err := http.NewRequest("GET", uri, nil) - if err != nil { - return nil, RetrieveBlobError - } - - if headers != nil { - for k, v := range headers { - request.Header.Set(k, v) + httpHeaders := make(http.Header) + for key, value := range headers { + httpHeaders[key] = []string{value} } - } - tr := &http.Transport{ - TLSClientConfig: &tls.Config{}, - Proxy: http.ProxyFromEnvironment, - } - - client := &http.Client{Transport: tr} - r, err := client.Do(request) - if err != nil { - log.WithError(err).Error("could not download layer") - return nil, RetrieveBlobError - } + reader, err := httputil.GetWithContext(ctx, path, httpHeaders) + if err != nil { + return nil, err + } - // Fail if we don't receive a 2xx HTTP status code. - if is2xx(r.StatusCode) { - log.WithField("status", r.StatusCode).Error("could not download layer: expected 2XX") - return nil, RetrieveBlobError + return reader, nil } - return r.Body, nil -} - -func is2xx(statusCode int) bool { - return statusCode >= 200 && statusCode < 300 -} - -func loadLayerBlobFromFS(path string) (io.ReadCloser, error) { return os.Open(path) } diff --git a/cmd/clair/main.go b/cmd/clair/main.go index 62f5d00c..221489f3 100644 --- a/cmd/clair/main.go +++ b/cmd/clair/main.go @@ -30,9 +30,6 @@ import ( "github.com/coreos/clair" "github.com/coreos/clair/api" "github.com/coreos/clair/database" - "github.com/coreos/clair/ext/featurefmt" - "github.com/coreos/clair/ext/featurens" - "github.com/coreos/clair/ext/imagefmt" "github.com/coreos/clair/ext/vulnsrc" "github.com/coreos/clair/pkg/formatter" "github.com/coreos/clair/pkg/stopper" @@ -103,11 +100,10 @@ func stopCPUProfiling(f *os.File) { } func configClairVersion(config *Config) { - clair.EnabledDetectors = append(featurefmt.ListListers(), featurens.ListDetectors()...) clair.EnabledUpdaters = strutil.Intersect(config.Updater.EnabledUpdaters, vulnsrc.ListUpdaters()) log.WithFields(log.Fields{ - "Detectors": database.SerializeDetectors(clair.EnabledDetectors), + "Detectors": database.SerializeDetectors(clair.EnabledDetectors()), "Updaters": clair.EnabledUpdaters, }).Info("enabled Clair extensions") } @@ -134,7 +130,8 @@ func Boot(config *Config) { defer db.Close() - clair.InitWorker(db) + clair.RegisterConfiguredDetectors(db) + // Start notifier st.Begin() go clair.RunNotifier(config.Notifier, db, st) @@ -173,7 +170,6 @@ func main() { flagConfigPath := flag.String("config", "/etc/clair/config.yaml", "Load configuration from the specified file.") flagCPUProfilePath := flag.String("cpu-profile", "", "Write a CPU profile to the specified file before exiting.") flagLogLevel := flag.String("log-level", "info", "Define the logging level.") - flagInsecureTLS := flag.Bool("insecure-tls", false, "Disable TLS server's certificate chain and hostname verification when pulling layers.") flag.Parse() configureLogger(flagLogLevel) @@ -195,12 +191,6 @@ func main() { defer stopCPUProfiling(startCPUProfiling(*flagCPUProfilePath)) } - // Enable TLS server's certificate chain and hostname verification - // when pulling layers if specified - if *flagInsecureTLS { - imagefmt.SetInsecureTLS(*flagInsecureTLS) - } - // configure updater and worker configClairVersion(config) diff --git a/database/dbutil.go b/database/dbutil.go index 31d48503..dece4027 100644 --- a/database/dbutil.go +++ b/database/dbutil.go @@ -15,8 +15,11 @@ package database import ( + "encoding/json" "time" + log "github.com/sirupsen/logrus" + "github.com/deckarep/golang-set" ) @@ -94,8 +97,11 @@ func PersistFeaturesAndCommit(datastore Datastore, features []Feature) error { defer tx.Rollback() if err := tx.PersistFeatures(features); err != nil { + serialized, _ := json.Marshal(features) + log.WithError(err).WithField("feature", string(serialized)).Error("failed to store features") return err } + return tx.Commit() } @@ -129,14 +135,18 @@ func FindAncestryAndRollback(datastore Datastore, name string) (Ancestry, bool, } // FindLayerAndRollback wraps session FindLayer function with begin and rollback. -func FindLayerAndRollback(datastore Datastore, hash string) (layer Layer, ok bool, err error) { +func FindLayerAndRollback(datastore Datastore, hash string) (layer *Layer, ok bool, err error) { var tx Session if tx, err = datastore.Begin(); err != nil { return } defer tx.Rollback() - layer, ok, err = tx.FindLayer(hash) + // TODO(sidac): In order to make the session interface more idiomatic, we'll + // return the pointer value in the future. + var dereferencedLayer Layer + dereferencedLayer, ok, err = tx.FindLayer(hash) + layer = &dereferencedLayer return } @@ -168,13 +178,17 @@ func GetAncestryFeatures(ancestry Ancestry) []NamespacedFeature { } // UpsertAncestryAndCommit wraps session UpsertAncestry function with begin and commit. -func UpsertAncestryAndCommit(datastore Datastore, ancestry Ancestry) error { +func UpsertAncestryAndCommit(datastore Datastore, ancestry *Ancestry) error { tx, err := datastore.Begin() if err != nil { return err } - if err = tx.UpsertAncestry(ancestry); err != nil { + if err = tx.UpsertAncestry(*ancestry); err != nil { + log.WithError(err).Error("failed to upsert the ancestry") + serialized, _ := json.Marshal(ancestry) + log.Debug(string(serialized)) + tx.Rollback() return err } @@ -350,3 +364,22 @@ func ReleaseLock(datastore Datastore, name, owner string) { return } } + +// PersistDetectorsAndCommit stores the detectors in the data store. +func PersistDetectorsAndCommit(store Datastore, detectors []Detector) error { + tx, err := store.Begin() + if err != nil { + return err + } + + defer tx.Rollback() + if err := tx.PersistDetectors(detectors); err != nil { + return err + } + + if err := tx.Commit(); err != nil { + return err + } + + return nil +} diff --git a/database/detector.go b/database/detector.go index e1295340..e440da64 100644 --- a/database/detector.go +++ b/database/detector.go @@ -93,12 +93,12 @@ func (s DetectorType) Valid() bool { type Detector struct { // Name of an extension should be non-empty and uniquely identifies the // extension. - Name string + Name string `json:"name"` // Version of an extension should be non-empty. - Version string + Version string `json:"version"` // DType is the type of the extension and should be one of the types in // DetectorTypes. - DType DetectorType + DType DetectorType `json:"dtype"` } // Valid checks if all fields in the detector satisfies the spec. diff --git a/database/models.go b/database/models.go index f1c7a3c0..9abba3c0 100644 --- a/database/models.go +++ b/database/models.go @@ -26,13 +26,13 @@ import ( type Ancestry struct { // Name is a globally unique value for a set of layers. This is often the // sha256 digest of an OCI/Docker manifest. - Name string + Name string `json:"name"` // By contains the processors that are used when computing the // content of this ancestry. - By []Detector + By []Detector `json:"by"` // Layers should be ordered and i_th layer is the parent of i+1_th layer in // the slice. - Layers []AncestryLayer + Layers []AncestryLayer `json:"layers"` } // Valid checks if the ancestry is compliant to spec. @@ -63,10 +63,10 @@ func (a *Ancestry) Valid() bool { // AncestryLayer is a layer with all detected namespaced features. type AncestryLayer struct { // Hash is the sha-256 tarsum on the layer's blob content. - Hash string + Hash string `json:"hash"` // Features are the features introduced by this layer when it was // processed. - Features []AncestryFeature + Features []AncestryFeature `json:"features"` } // Valid checks if the Ancestry Layer is compliant to the spec. @@ -95,22 +95,22 @@ func (l *AncestryLayer) GetFeatures() []NamespacedFeature { // AncestryFeature is a namespaced feature with the detectors used to // find this feature. type AncestryFeature struct { - NamespacedFeature + NamespacedFeature `json:"namespacedFeature"` // FeatureBy is the detector that detected the feature. - FeatureBy Detector + FeatureBy Detector `json:"featureBy"` // NamespaceBy is the detector that detected the namespace. - NamespaceBy Detector + NamespaceBy Detector `json:"namespaceBy"` } // Layer is a layer with all the detected features and namespaces. type Layer struct { // Hash is the sha-256 tarsum on the layer's blob content. - Hash string + Hash string `json:"hash"` // By contains a list of detectors scanned this Layer. - By []Detector - Namespaces []LayerNamespace - Features []LayerFeature + By []Detector `json:"by"` + Namespaces []LayerNamespace `json:"namespaces"` + Features []LayerFeature `json:"features"` } func (l *Layer) GetFeatures() []Feature { @@ -133,26 +133,26 @@ func (l *Layer) GetNamespaces() []Namespace { // LayerNamespace is a namespace with detection information. type LayerNamespace struct { - Namespace + Namespace `json:"namespace"` // By is the detector found the namespace. - By Detector + By Detector `json:"by"` } // LayerFeature is a feature with detection information. type LayerFeature struct { - Feature + Feature `json:"feature"` // By is the detector found the feature. - By Detector + By Detector `json:"by"` } // Namespace is the contextual information around features. // // e.g. Debian:7, NodeJS. type Namespace struct { - Name string - VersionFormat string + Name string `json:"name"` + VersionFormat string `json:"versionFormat"` } func NewNamespace(name string, versionFormat string) *Namespace { @@ -166,10 +166,10 @@ func NewNamespace(name string, versionFormat string) *Namespace { // dpkg is the version format of the installer package manager, which in this // case could be dpkg or apk. type Feature struct { - Name string - Version string - VersionFormat string - Type FeatureType + Name string `json:"name"` + Version string `json:"version"` + VersionFormat string `json:"versionFormat"` + Type FeatureType `json:"type"` } func NewFeature(name string, version string, versionFormat string, featureType FeatureType) *Feature { @@ -189,9 +189,9 @@ func NewSourcePackage(name string, version string, versionFormat string) *Featur // // e.g. OpenSSL 1.0 dpkg Debian:7. type NamespacedFeature struct { - Feature + Feature `json:"feature"` - Namespace Namespace + Namespace Namespace `json:"namespace"` } func NewNamespacedFeature(namespace *Namespace, feature *Feature) *NamespacedFeature { diff --git a/pkg/httputil/httputil.go b/pkg/httputil/httputil.go index b0ae2004..da26f518 100644 --- a/pkg/httputil/httputil.go +++ b/pkg/httputil/httputil.go @@ -16,6 +16,10 @@ package httputil import ( + "context" + "crypto/tls" + "fmt" + "io" "net" "net/http" "strings" @@ -62,6 +66,38 @@ func GetClientAddr(r *http.Request) string { return addr } +// GetWithContext do HTTP GET to the URI with headers and returns response blob +// reader. +func GetWithContext(ctx context.Context, uri string, headers http.Header) (io.ReadCloser, error) { + request, err := http.NewRequest("GET", uri, nil) + if err != nil { + return nil, err + } + + if headers != nil { + request.Header = headers + } + + tr := &http.Transport{ + TLSClientConfig: &tls.Config{}, + Proxy: http.ProxyFromEnvironment, + } + + client := &http.Client{Transport: tr} + request = request.WithContext(ctx) + r, err := client.Do(request) + if err != nil { + return nil, err + } + + // Fail if we don't receive a 2xx HTTP status code. + if !Status2xx(r) { + return nil, fmt.Errorf("failed HTTP GET: expected 2XX, got %d", r.StatusCode) + } + + return r.Body, nil +} + // Status2xx returns true if the response's status code is success (2xx) func Status2xx(resp *http.Response) bool { return resp.StatusCode/100 == 2 diff --git a/worker.go b/worker.go deleted file mode 100644 index 07a547b3..00000000 --- a/worker.go +++ /dev/null @@ -1,478 +0,0 @@ -// Copyright 2018 clair authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package clair - -import ( - "errors" - "sync" - - "github.com/deckarep/golang-set" - log "github.com/sirupsen/logrus" - - "github.com/coreos/clair/database" - "github.com/coreos/clair/ext/featurefmt" - "github.com/coreos/clair/ext/featurens" - "github.com/coreos/clair/ext/imagefmt" - "github.com/coreos/clair/pkg/commonerr" - "github.com/coreos/clair/pkg/strutil" - "github.com/coreos/clair/pkg/tarutil" -) - -var ( - // ErrUnsupported is the error that should be raised when an OS or package - // manager is not supported. - ErrUnsupported = commonerr.NewBadRequestError("worker: OS and/or package manager are not supported") - - // EnabledDetectors are detectors to be used to scan the layers. - EnabledDetectors []database.Detector -) - -// LayerRequest represents all information necessary to download and process a -// layer. -type LayerRequest struct { - Hash string - Path string - Headers map[string]string -} - -type processResult struct { - existingLayer *database.Layer - newLayerContent *database.Layer - err error -} - -// processRequest stores parameters used for processing a layer. -type processRequest struct { - LayerRequest - - existingLayer *database.Layer - detectors []database.Detector -} - -type introducedFeature struct { - feature database.AncestryFeature - layerIndex int -} - -// processRequests in parallel processes a set of requests for unique set of layers -// and returns sets of unique namespaces, features and layers to be inserted -// into the database. -func processRequests(imageFormat string, toDetect map[string]*processRequest) (map[string]*processResult, error) { - wg := &sync.WaitGroup{} - wg.Add(len(toDetect)) - - results := map[string]*processResult{} - for i := range toDetect { - results[i] = nil - } - - for i := range toDetect { - result := processResult{} - results[i] = &result - go func(req *processRequest, res *processResult) { - *res = *detectContent(imageFormat, req) - wg.Done() - }(toDetect[i], &result) - } - - wg.Wait() - errs := []error{} - for _, r := range results { - errs = append(errs, r.err) - } - - if err := commonerr.CombineErrors(errs...); err != nil { - return nil, err - } - - return results, nil -} - -func getProcessRequest(datastore database.Datastore, req LayerRequest) (preq *processRequest, err error) { - layer, ok, err := database.FindLayerAndRollback(datastore, req.Hash) - if err != nil { - return - } - - if !ok { - log.WithField("layer", req.Hash).Debug("found no existing layer in database") - preq = &processRequest{ - LayerRequest: req, - existingLayer: &database.Layer{Hash: req.Hash}, - detectors: EnabledDetectors, - } - } else { - log.WithFields(log.Fields{ - "layer": layer.Hash, - "detectors": layer.By, - "feature count": len(layer.Features), - "namespace count": len(layer.Namespaces), - }).Debug("found existing layer in database") - - preq = &processRequest{ - LayerRequest: req, - existingLayer: &layer, - detectors: database.DiffDetectors(EnabledDetectors, layer.By), - } - } - - return -} - -func persistProcessResult(datastore database.Datastore, results map[string]*processResult) error { - features := []database.Feature{} - namespaces := []database.Namespace{} - for _, r := range results { - features = append(features, r.newLayerContent.GetFeatures()...) - namespaces = append(namespaces, r.newLayerContent.GetNamespaces()...) - } - - features = database.DeduplicateFeatures(features...) - namespaces = database.DeduplicateNamespaces(namespaces...) - if err := database.PersistNamespacesAndCommit(datastore, namespaces); err != nil { - return err - } - - if err := database.PersistFeaturesAndCommit(datastore, features); err != nil { - return err - } - - for _, layer := range results { - if err := database.PersistPartialLayerAndCommit(datastore, layer.newLayerContent); err != nil { - return err - } - } - - return nil -} - -// processLayers processes a set of post layer requests, stores layers and -// returns an ordered list of processed layers with detected features and -// namespaces. -func processLayers(datastore database.Datastore, imageFormat string, requests []LayerRequest) ([]database.Layer, error) { - var ( - reqMap = make(map[string]*processRequest) - err error - ) - - for _, r := range requests { - reqMap[r.Hash], err = getProcessRequest(datastore, r) - if err != nil { - return nil, err - } - } - - results, err := processRequests(imageFormat, reqMap) - if err != nil { - return nil, err - } - - if err := persistProcessResult(datastore, results); err != nil { - return nil, err - } - - completeLayers := getProcessResultLayers(results) - layers := make([]database.Layer, 0, len(requests)) - for _, r := range requests { - layers = append(layers, completeLayers[r.Hash]) - } - - return layers, nil -} - -func getProcessResultLayers(results map[string]*processResult) map[string]database.Layer { - layers := map[string]database.Layer{} - for name, r := range results { - layers[name] = *database.MergeLayers(r.existingLayer, r.newLayerContent) - } - - return layers -} - -func isAncestryProcessed(datastore database.Datastore, name string) (bool, error) { - ancestry, ok, err := database.FindAncestryAndRollback(datastore, name) - if err != nil || !ok { - return ok, err - } - - return len(database.DiffDetectors(EnabledDetectors, ancestry.By)) == 0, nil -} - -// ProcessAncestry downloads and scans an ancestry if it's not scanned by all -// enabled processors in this instance of Clair. -func ProcessAncestry(datastore database.Datastore, imageFormat, name string, layerRequest []LayerRequest) error { - var ( - err error - ok bool - layers []database.Layer - ) - - if name == "" { - return commonerr.NewBadRequestError("could not process a layer which does not have a name") - } - - if imageFormat == "" { - return commonerr.NewBadRequestError("could not process a layer which does not have a format") - } - - log.WithField("ancestry", name).Debug("start processing ancestry...") - if ok, err = isAncestryProcessed(datastore, name); err != nil { - log.WithError(err).Error("could not determine if ancestry is processed") - return err - } else if ok { - log.WithField("ancestry", name).Debug("ancestry is already processed") - return nil - } - - if layers, err = processLayers(datastore, imageFormat, layerRequest); err != nil { - return err - } - - return processAncestry(datastore, name, layers) -} - -func processAncestry(datastore database.Datastore, name string, layers []database.Layer) error { - var ( - ancestry = database.Ancestry{Name: name} - err error - ) - - ancestry.Layers, ancestry.By, err = computeAncestryLayers(layers) - if err != nil { - return err - } - - ancestryFeatures := database.GetAncestryFeatures(ancestry) - log.WithFields(log.Fields{ - "ancestry": name, - "processed by": EnabledDetectors, - "features count": len(ancestryFeatures), - "layer count": len(ancestry.Layers), - }).Debug("compute ancestry features") - - if err := database.PersistNamespacedFeaturesAndCommit(datastore, ancestryFeatures); err != nil { - log.WithField("ancestry", name).WithError(err).Error("could not persist namespaced features for ancestry") - return err - } - - if err := database.CacheRelatedVulnerabilityAndCommit(datastore, ancestryFeatures); err != nil { - log.WithField("ancestry", name).WithError(err).Error("failed to cache feature related vulnerability") - return err - } - - if err := database.UpsertAncestryAndCommit(datastore, ancestry); err != nil { - log.WithField("ancestry", name).WithError(err).Error("could not upsert ancestry") - return err - } - - return nil -} - -func getCommonDetectors(layers []database.Layer) mapset.Set { - // find the common detector for all layers and filter the namespaces and - // features based on that. - commonDetectors := mapset.NewSet() - for _, d := range layers[0].By { - commonDetectors.Add(d) - } - - for _, l := range layers { - detectors := mapset.NewSet() - for _, d := range l.By { - detectors.Add(d) - } - - commonDetectors = commonDetectors.Intersect(detectors) - } - - return commonDetectors -} - -// computeAncestryLayers computes ancestry's layers along with what features are -// introduced. -func computeAncestryLayers(layers []database.Layer) ([]database.AncestryLayer, []database.Detector, error) { - if len(layers) == 0 { - return nil, nil, nil - } - - commonDetectors := getCommonDetectors(layers) - // version format -> namespace - namespaces := map[string]database.LayerNamespace{} - // version format -> feature ID -> feature - features := map[string]map[string]introducedFeature{} - ancestryLayers := []database.AncestryLayer{} - for index, layer := range layers { - initializedLayer := database.AncestryLayer{Hash: layer.Hash} - ancestryLayers = append(ancestryLayers, initializedLayer) - - // Precondition: namespaces and features contain the result from union - // of all parents. - for _, ns := range layer.Namespaces { - if !commonDetectors.Contains(ns.By) { - continue - } - - namespaces[ns.VersionFormat] = ns - } - - // version format -> feature ID -> feature - currentFeatures := map[string]map[string]introducedFeature{} - for _, f := range layer.Features { - if !commonDetectors.Contains(f.By) { - continue - } - - if ns, ok := namespaces[f.VersionFormat]; ok { - var currentMap map[string]introducedFeature - if currentMap, ok = currentFeatures[f.VersionFormat]; !ok { - currentFeatures[f.VersionFormat] = make(map[string]introducedFeature) - currentMap = currentFeatures[f.VersionFormat] - } - - inherited := false - if mapF, ok := features[f.VersionFormat]; ok { - if parentFeature, ok := mapF[f.Name+":"+f.Version]; ok { - currentMap[f.Name+":"+f.Version] = parentFeature - inherited = true - } - } - - if !inherited { - currentMap[f.Name+":"+f.Version] = introducedFeature{ - feature: database.AncestryFeature{ - NamespacedFeature: database.NamespacedFeature{ - Feature: f.Feature, - Namespace: ns.Namespace, - }, - NamespaceBy: ns.By, - FeatureBy: f.By, - }, - layerIndex: index, - } - } - - } else { - return nil, nil, errors.New("No corresponding version format") - } - } - - // NOTE(Sida): we update the feature map in some version format - // only if there's at least one feature with that version format. This - // approach won't differentiate feature file removed vs all detectable - // features removed from that file vs feature file not changed. - // - // One way to differentiate (feature file removed or not changed) vs - // all detectable features removed is to pass in the file status. - for vf, mapF := range currentFeatures { - features[vf] = mapF - } - } - - for _, featureMap := range features { - for _, feature := range featureMap { - ancestryLayers[feature.layerIndex].Features = append( - ancestryLayers[feature.layerIndex].Features, - feature.feature, - ) - } - } - - detectors := make([]database.Detector, 0, commonDetectors.Cardinality()) - for d := range commonDetectors.Iter() { - detectors = append(detectors, d.(database.Detector)) - } - - return ancestryLayers, detectors, nil -} - -func extractRequiredFiles(imageFormat string, req *processRequest) (tarutil.FilesMap, error) { - requiredFiles := append(featurefmt.RequiredFilenames(req.detectors), featurens.RequiredFilenames(req.detectors)...) - if len(requiredFiles) == 0 { - log.WithFields(log.Fields{ - "layer": req.Hash, - "detectors": req.detectors, - }).Info("layer requires no file to extract") - return make(tarutil.FilesMap), nil - } - - files, err := imagefmt.Extract(imageFormat, req.Path, req.Headers, requiredFiles) - if err != nil { - log.WithError(err).WithFields(log.Fields{ - "layer": req.Hash, - "path": strutil.CleanURL(req.Path), - }).Error("failed to extract data from path") - return nil, err - } - - return files, err -} - -// detectContent downloads a layer and detects all features and namespaces. -func detectContent(imageFormat string, req *processRequest) (res *processResult) { - var ( - files tarutil.FilesMap - layer = database.Layer{Hash: req.Hash, By: req.detectors} - ) - - res = &processResult{req.existingLayer, &layer, nil} - log.WithFields(log.Fields{ - "layer": req.Hash, - "detectors": req.detectors, - }).Info("detecting layer content...") - - files, res.err = extractRequiredFiles(imageFormat, req) - if res.err != nil { - return - } - - if layer.Namespaces, res.err = featurens.Detect(files, req.detectors); res.err != nil { - return - } - - if layer.Features, res.err = featurefmt.ListFeatures(files, req.detectors); res.err != nil { - return - } - - log.WithFields(log.Fields{ - "layer": req.Hash, - "detectors": req.detectors, - "namespace count": len(layer.Namespaces), - "feature count": len(layer.Features), - }).Info("processed layer") - - return -} - -// InitWorker initializes the worker. -func InitWorker(datastore database.Datastore) { - if len(EnabledDetectors) == 0 { - log.Warn("no enabled detector, and therefore, no ancestry will be processed.") - return - } - - tx, err := datastore.Begin() - if err != nil { - log.WithError(err).Fatal("cannot connect to database to initialize worker") - } - - defer tx.Rollback() - if err := tx.PersistDetectors(EnabledDetectors); err != nil { - log.WithError(err).Fatal("cannot insert detectors to initialize worker") - } - - if err := tx.Commit(); err != nil { - log.WithError(err).Fatal("cannot commit detector changes to initialize worker") - } -} diff --git a/worker_test.go b/worker_test.go deleted file mode 100644 index 2ad86272..00000000 --- a/worker_test.go +++ /dev/null @@ -1,587 +0,0 @@ -// Copyright 2017 clair authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package clair - -import ( - "errors" - "path/filepath" - "runtime" - "strings" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/coreos/clair/database" - "github.com/coreos/clair/ext/featurefmt" - "github.com/coreos/clair/ext/featurens" - "github.com/coreos/clair/ext/versionfmt/dpkg" - - // Register the required detectors. - _ "github.com/coreos/clair/ext/featurefmt/dpkg" - _ "github.com/coreos/clair/ext/featurefmt/rpm" - _ "github.com/coreos/clair/ext/featurens/aptsources" - _ "github.com/coreos/clair/ext/featurens/osrelease" - _ "github.com/coreos/clair/ext/imagefmt/docker" -) - -type mockDatastore struct { - database.MockDatastore - - layers map[string]database.Layer - ancestry map[string]database.Ancestry - namespaces map[string]database.Namespace - features map[string]database.Feature - namespacedFeatures map[string]database.NamespacedFeature -} - -type mockSession struct { - database.MockSession - - store *mockDatastore - copy mockDatastore - terminated bool -} - -func copyDatastore(md *mockDatastore) mockDatastore { - layers := map[string]database.Layer{} - for k, l := range md.layers { - layers[k] = database.Layer{ - Hash: l.Hash, - By: append([]database.Detector{}, l.By...), - Features: append([]database.LayerFeature{}, l.Features...), - Namespaces: append([]database.LayerNamespace{}, l.Namespaces...), - } - } - - ancestry := map[string]database.Ancestry{} - for k, a := range md.ancestry { - ancestryLayers := []database.AncestryLayer{} - for _, layer := range a.Layers { - ancestryLayers = append(ancestryLayers, database.AncestryLayer{ - Hash: layer.Hash, - Features: append([]database.AncestryFeature{}, layer.Features...), - }) - } - - ancestry[k] = database.Ancestry{ - Name: a.Name, - By: append([]database.Detector{}, a.By...), - Layers: ancestryLayers, - } - } - - namespaces := map[string]database.Namespace{} - for k, n := range md.namespaces { - namespaces[k] = n - } - - features := map[string]database.Feature{} - for k, f := range md.features { - features[k] = f - } - - namespacedFeatures := map[string]database.NamespacedFeature{} - for k, f := range md.namespacedFeatures { - namespacedFeatures[k] = f - } - - return mockDatastore{ - layers: layers, - ancestry: ancestry, - namespaces: namespaces, - namespacedFeatures: namespacedFeatures, - features: features, - } -} - -func newMockDatastore() *mockDatastore { - errSessionDone := errors.New("Session Done") - md := &mockDatastore{ - layers: make(map[string]database.Layer), - ancestry: make(map[string]database.Ancestry), - namespaces: make(map[string]database.Namespace), - features: make(map[string]database.Feature), - namespacedFeatures: make(map[string]database.NamespacedFeature), - } - - md.FctBegin = func() (database.Session, error) { - session := &mockSession{ - store: md, - copy: copyDatastore(md), - terminated: false, - } - - session.FctCommit = func() error { - if session.terminated { - return nil - } - session.store.layers = session.copy.layers - session.store.ancestry = session.copy.ancestry - session.store.namespaces = session.copy.namespaces - session.store.features = session.copy.features - session.store.namespacedFeatures = session.copy.namespacedFeatures - session.terminated = true - return nil - } - - session.FctRollback = func() error { - if session.terminated { - return nil - } - session.terminated = true - session.copy = mockDatastore{} - return nil - } - - session.FctFindAncestry = func(name string) (database.Ancestry, bool, error) { - if session.terminated { - return database.Ancestry{}, false, errSessionDone - } - ancestry, ok := session.copy.ancestry[name] - return ancestry, ok, nil - } - - session.FctFindLayer = func(name string) (database.Layer, bool, error) { - if session.terminated { - return database.Layer{}, false, errSessionDone - } - layer, ok := session.copy.layers[name] - return layer, ok, nil - } - - session.FctPersistNamespaces = func(ns []database.Namespace) error { - if session.terminated { - return errSessionDone - } - for _, n := range ns { - session.copy.namespaces[NamespaceKey(&n)] = n - } - return nil - } - - session.FctPersistFeatures = func(fs []database.Feature) error { - if session.terminated { - return errSessionDone - } - for _, f := range fs { - session.copy.features[FeatureKey(&f)] = f - } - - return nil - } - - session.FctPersistLayer = func(hash string, features []database.LayerFeature, namespaces []database.LayerNamespace, by []database.Detector) error { - if session.terminated { - return errSessionDone - } - - for _, ns := range namespaces { - if _, ok := session.copy.namespaces[NamespaceKey(&ns.Namespace)]; !ok { - panic("") - } - } - - for _, f := range features { - if _, ok := session.copy.features[FeatureKey(&f.Feature)]; !ok { - panic("") - } - } - - layer, _ := session.copy.layers[hash] - database.MergeLayers(&layer, &database.Layer{ - Hash: hash, - By: by, - Namespaces: namespaces, - Features: features, - }) - - session.copy.layers[hash] = layer - return nil - } - - session.FctUpsertAncestry = func(ancestry database.Ancestry) error { - if session.terminated { - return errSessionDone - } - - // ensure the namespaces features are in the code base - for _, l := range ancestry.Layers { - for _, f := range l.GetFeatures() { - if _, ok := session.copy.namespacedFeatures[NamespacedFeatureKey(&f)]; !ok { - panic("") - } - } - } - - session.copy.ancestry[ancestry.Name] = ancestry - return nil - } - - session.FctPersistNamespacedFeatures = func(namespacedFeatures []database.NamespacedFeature) error { - for i, f := range namespacedFeatures { - if _, ok := session.copy.features[FeatureKey(&f.Feature)]; !ok { - panic("") - } - - if _, ok := session.copy.namespaces[NamespaceKey(&f.Namespace)]; !ok { - panic("") - } - - session.copy.namespacedFeatures[NamespacedFeatureKey(&f)] = namespacedFeatures[i] - } - return nil - } - - session.FctCacheAffectedNamespacedFeatures = func(namespacedFeatures []database.NamespacedFeature) error { - // The function does nothing because we don't care about the vulnerability cache in worker_test. - return nil - } - - return session, nil - } - return md -} - -func TestMain(m *testing.M) { - EnabledDetectors = append(featurefmt.ListListers(), featurens.ListDetectors()...) - m.Run() -} - -func FeatureKey(f *database.Feature) string { - return strings.Join([]string{f.Name, f.VersionFormat, f.Version}, "__") -} - -func NamespaceKey(ns *database.Namespace) string { - return strings.Join([]string{ns.Name, ns.VersionFormat}, "__") -} - -func NamespacedFeatureKey(f *database.NamespacedFeature) string { - return strings.Join([]string{f.Name, f.Namespace.Name}, "__") -} - -func TestProcessAncestryWithDistUpgrade(t *testing.T) { - // TODO(sidac): Change to use table driven tests. - // Create the list of Features that should not been upgraded from one layer to another. - nonUpgradedFeatures := []database.Feature{ - {Name: "libtext-wrapi18n-perl", Version: "0.06-7"}, - {Name: "libtext-charwidth-perl", Version: "0.04-7"}, - {Name: "libtext-iconv-perl", Version: "1.7-5"}, - {Name: "mawk", Version: "1.3.3-17"}, - {Name: "insserv", Version: "1.14.0-5"}, - {Name: "db", Version: "5.1.29-5"}, - {Name: "ustr", Version: "1.0.4-3"}, - {Name: "xz-utils", Version: "5.1.1alpha+20120614-2"}, - {Name: "libdb5.1", Version: "5.1.29-5"}, - } - - nonUpgradedMap := map[database.Feature]struct{}{} - for _, f := range nonUpgradedFeatures { - f.VersionFormat = "dpkg" - f.Type = database.SourcePackage - nonUpgradedMap[f] = struct{}{} - f.Type = database.BinaryPackage - nonUpgradedMap[f] = struct{}{} - } - - // Process test layers. - // - // blank.tar: MAINTAINER Quentin MACHU - // wheezy.tar: FROM debian:wheezy - // jessie.tar: RUN sed -i "s/precise/trusty/" /etc/apt/sources.list && apt-get update && - // apt-get -y dist-upgrade - _, f, _, _ := runtime.Caller(0) - testDataPath := filepath.Join(filepath.Dir(f)) + "/testdata/DistUpgrade/" - - datastore := newMockDatastore() - - layers := []LayerRequest{ - {Hash: "blank", Path: testDataPath + "blank.tar.gz"}, - {Hash: "wheezy", Path: testDataPath + "wheezy.tar.gz"}, - {Hash: "jessie", Path: testDataPath + "jessie.tar.gz"}, - } - - assert.Nil(t, ProcessAncestry(datastore, "Docker", "Mock", layers)) - - // check the ancestry features - features := []database.AncestryFeature{} - for i, l := range datastore.ancestry["Mock"].Layers { - assert.Equal(t, layers[i].Hash, l.Hash) - features = append(features, l.Features...) - } - - assert.Len(t, features, 161) - for _, f := range features { - if _, ok := nonUpgradedMap[f.Feature]; ok { - assert.Equal(t, "debian:7", f.Namespace.Name, "%#v", f) - } else { - assert.Equal(t, "debian:8", f.Namespace.Name, "#%v", f) - } - } -} - -func TestProcessLayers(t *testing.T) { - _, f, _, _ := runtime.Caller(0) - testDataPath := filepath.Join(filepath.Dir(f)) + "/testdata/DistUpgrade/" - - datastore := newMockDatastore() - - layers := []LayerRequest{ - {Hash: "blank", Path: testDataPath + "blank.tar.gz"}, - {Hash: "wheezy", Path: testDataPath + "wheezy.tar.gz"}, - {Hash: "jessie", Path: testDataPath + "jessie.tar.gz"}, - } - - LayerWithContents, err := processLayers(datastore, "Docker", layers) - assert.Nil(t, err) - assert.Len(t, LayerWithContents, 3) - // ensure resubmit won't break the stuff - LayerWithContents, err = processLayers(datastore, "Docker", layers) - assert.Nil(t, err) - assert.Len(t, LayerWithContents, 3) - // Ensure each processed layer is correct - assert.Len(t, LayerWithContents[0].Namespaces, 0) - assert.Len(t, LayerWithContents[1].Namespaces, 1) - assert.Len(t, LayerWithContents[2].Namespaces, 1) - assert.Len(t, LayerWithContents[0].Features, 0) - assert.Len(t, LayerWithContents[1].Features, 132) - assert.Len(t, LayerWithContents[2].Features, 191) - - // Ensure each layer has expected namespaces and features detected - if blank, ok := datastore.layers["blank"]; ok { - database.AssertDetectorsEqual(t, EnabledDetectors, blank.By) - assert.Len(t, blank.Namespaces, 0) - assert.Len(t, blank.Features, 0) - } else { - assert.Fail(t, "blank is not stored") - return - } - - if wheezy, ok := datastore.layers["wheezy"]; ok { - database.AssertDetectorsEqual(t, EnabledDetectors, wheezy.By) - assert.Equal(t, []database.LayerNamespace{ - {database.Namespace{"debian:7", dpkg.ParserName}, database.NewNamespaceDetector("os-release", "1.0")}, - }, wheezy.Namespaces) - - assert.Len(t, wheezy.Features, 132) - } else { - assert.Fail(t, "wheezy is not stored") - return - } - - if jessie, ok := datastore.layers["jessie"]; ok { - database.AssertDetectorsEqual(t, EnabledDetectors, jessie.By) - assert.Equal(t, []database.LayerNamespace{ - {database.Namespace{"debian:8", dpkg.ParserName}, database.NewNamespaceDetector("os-release", "1.0")}, - }, jessie.Namespaces) - assert.Len(t, jessie.Features, 191) - } else { - assert.Fail(t, "jessie is not stored") - return - } -} - -func getFeatures(a database.Ancestry) []database.AncestryFeature { - features := []database.AncestryFeature{} - for _, l := range a.Layers { - features = append(features, l.Features...) - } - - return features -} - -func TestComputeAncestryFeatures(t *testing.T) { - vf1 := "format 1" - vf2 := "format 2" - - nd1 := database.NewNamespaceDetector("apk", "1.0") - fd1 := database.NewFeatureDetector("fd1", "1.0") - // this detector only scans one layer with one extra feature, this one - // should be omitted. - fd2 := database.NewFeatureDetector("fd2", "1.0") - - ns1a := database.LayerNamespace{ - database.Namespace{ - Name: "namespace 1:a", - VersionFormat: vf1, - }, nd1, - } - - ns1b := database.LayerNamespace{ - database.Namespace{ - Name: "namespace 1:b", - VersionFormat: vf1, - }, nd1} - - ns2a := database.LayerNamespace{ - database.Namespace{ - Name: "namespace 2:a", - VersionFormat: vf2, - }, nd1} - - ns2b := database.LayerNamespace{ - database.Namespace{ - Name: "namespace 2:b", - VersionFormat: vf2, - }, nd1} - - f1 := database.LayerFeature{ - database.Feature{ - Name: "feature 1", - Version: "0.1", - VersionFormat: vf1, - }, fd1} - - f2 := database.LayerFeature{database.Feature{ - Name: "feature 2", - Version: "0.2", - VersionFormat: vf1, - }, fd2} - - f3 := database.LayerFeature{ - database.Feature{ - Name: "feature 1", - Version: "0.3", - VersionFormat: vf2, - }, fd1} - - f4 := database.LayerFeature{ - database.Feature{ - Name: "feature 2", - Version: "0.3", - VersionFormat: vf2, - }, fd1} - - f5 := database.LayerFeature{ - database.Feature{ - Name: "feature 3", - Version: "0.3", - VersionFormat: vf2, - }, - fd2, - } - - // Suppose Clair is watching two files for namespaces one containing ns1 - // changes e.g. os-release and the other one containing ns2 changes e.g. - // node. - blank := database.Layer{ - Hash: "blank", - By: []database.Detector{nd1, fd1, fd1}, - } - initNS1a := database.Layer{ - Hash: "initNS1a", - By: []database.Detector{nd1, fd1, fd1}, - Namespaces: []database.LayerNamespace{ns1a}, - Features: []database.LayerFeature{f1, f2}, - } - - upgradeNS2b := database.Layer{ - Hash: "upgradeNS2b", - By: []database.Detector{nd1, fd1, fd1}, - Namespaces: []database.LayerNamespace{ns2b}, - } - - upgradeNS1b := database.Layer{ - Hash: "upgradeNS1b", - By: []database.Detector{nd1, fd1, fd1, fd2}, - Namespaces: []database.LayerNamespace{ns1b}, - Features: []database.LayerFeature{f1, f2, f5}, - } - - initNS2a := database.Layer{ - Hash: "initNS2a", - By: []database.Detector{nd1, fd1, fd1}, - Namespaces: []database.LayerNamespace{ns2a}, - Features: []database.LayerFeature{f3, f4}, - } - - removeF2 := database.Layer{ - Hash: "removeF2", - By: []database.Detector{nd1, fd1, fd1}, - Features: []database.LayerFeature{f1}, - } - - // blank -> ns1:a, f1 f2 (init) - // -> f1 (feature change) - // -> ns2:a, f3, f4 (init ns2a) - // -> ns2:b (ns2 upgrade without changing features) - // -> blank (empty) - // -> ns1:b, f1 f2 (ns1 upgrade and add f2) - // -> f1 (remove f2) - // -> blank (empty) - - layers := []database.Layer{ - blank, // empty - initNS1a, // namespace: NS1a, features: f1, f2 - removeF2, // namespace: , features: f1 - initNS2a, // namespace: NS2a, features: f3, f4 ( under NS2a ) - upgradeNS2b, // namespace: NS2b, ( f3, f4 are now under NS2b ) - blank, // empty - upgradeNS1b, // namespace: NS1b, ( f1, f2 are now under NS1b, and they are introduced in this layer. ) - removeF2, // namespace: , features: f1 - blank, - } - - expected := []database.AncestryLayer{ - { - "blank", - []database.AncestryFeature{}, - }, - { - "initNS1a", - []database.AncestryFeature{{database.NamespacedFeature{f1.Feature, ns1a.Namespace}, f1.By, ns1a.By}}, - }, - { - "removeF2", - []database.AncestryFeature{}, - }, - { - "initNS2a", - []database.AncestryFeature{ - {database.NamespacedFeature{f3.Feature, ns2a.Namespace}, f3.By, ns2a.By}, - {database.NamespacedFeature{f4.Feature, ns2a.Namespace}, f4.By, ns2a.By}, - }, - }, - { - "upgradeNS2b", - []database.AncestryFeature{}, - }, - { - "blank", - []database.AncestryFeature{}, - }, - { - "upgradeNS1b", - []database.AncestryFeature{}, - }, - { - "removeF2", - []database.AncestryFeature{}, - }, - { - "blank", - []database.AncestryFeature{}, - }, - } - - expectedDetectors := []database.Detector{nd1, fd1} - ancestryLayers, detectors, err := computeAncestryLayers(layers) - require.Nil(t, err) - - database.AssertDetectorsEqual(t, expectedDetectors, detectors) - for i := range expected { - database.AssertAncestryLayerEqual(t, &expected[i], &ancestryLayers[i]) - } -}