clair/ancestry.go
2019-02-26 10:26:49 -05:00

297 lines
9.2 KiB
Go

// Copyright 2019 clair authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clair
import (
"crypto/sha256"
"encoding/hex"
"strings"
log "github.com/sirupsen/logrus"
"github.com/coreos/clair/database"
)
type layerIndexedFeature struct {
feature *database.LayerFeature
namespace *layerIndexedNamespace
introducedIn int
}
type layerIndexedNamespace struct {
namespace database.LayerNamespace
introducedIn int
}
// AncestryBuilder builds an Ancestry, which contains an ordered list of layers
// and their features.
type AncestryBuilder struct {
layerIndex int
layerNames []string
detectors []database.Detector
namespaces map[database.Detector]*layerIndexedNamespace
features map[database.Detector][]layerIndexedFeature
}
// NewAncestryBuilder creates a new ancestry builder.
//
// ancestry builder takes in the extracted layer information and produce a set of
// namespaces, features, and the relation between features for the whole image.
func NewAncestryBuilder(detectors []database.Detector) *AncestryBuilder {
return &AncestryBuilder{
layerIndex: 0,
detectors: detectors,
namespaces: make(map[database.Detector]*layerIndexedNamespace),
features: make(map[database.Detector][]layerIndexedFeature),
}
}
// AddLeafLayer adds a leaf layer to the ancestry builder, and computes the
// namespaced features.
func (b *AncestryBuilder) AddLeafLayer(layer *database.Layer) {
b.layerNames = append(b.layerNames, layer.Hash)
for i := range layer.Namespaces {
b.updateNamespace(&layer.Namespaces[i])
}
allFeatureMap := map[database.Detector][]database.LayerFeature{}
for i := range layer.Features {
layerFeature := layer.Features[i]
allFeatureMap[layerFeature.By] = append(allFeatureMap[layerFeature.By], layerFeature)
}
// we only care about the ones specified by builder's detectors
featureMap := map[database.Detector][]database.LayerFeature{}
for i := range b.detectors {
detector := b.detectors[i]
featureMap[detector] = allFeatureMap[detector]
}
for detector := range featureMap {
b.addLayerFeatures(detector, featureMap[detector])
}
b.layerIndex++
}
// Every detector inspects a set of files for the features
// therefore, if that set of files gives a different set of features, it
// should replace the existing features.
func (b *AncestryBuilder) addLayerFeatures(detector database.Detector, features []database.LayerFeature) {
if len(features) == 0 {
// TODO(sidac): we need to differentiate if the detector finds that all
// features are removed ( a file change ), or the package installer is
// removed ( a file deletion ), or there's no change in the file ( file
// does not exist in the blob ) Right now, we're just assuming that no
// change in the file because that's the most common case.
return
}
existingFeatures := b.features[detector]
currentFeatures := make([]layerIndexedFeature, 0, len(features))
// Features that are not in the current layer should be removed.
for i := range existingFeatures {
feature := existingFeatures[i]
for j := range features {
if features[j] == *feature.feature {
currentFeatures = append(currentFeatures, feature)
break
}
}
}
// Features that newly introduced in the current layer should be added.
for i := range features {
found := false
for j := range existingFeatures {
if *existingFeatures[j].feature == features[i] {
found = true
break
}
}
if !found {
namespace, found := b.lookupNamespace(&features[i])
if !found {
log.WithField("Layer Hashes", b.layerNames).Error("skip, could not find the proper namespace for feature")
continue
}
currentFeatures = append(currentFeatures, b.createLayerIndexedFeature(namespace, &features[i]))
}
}
b.features[detector] = currentFeatures
}
// updateNamespace update the namespaces for the ancestry. It does the following things:
// 1. when a detector detects a new namespace, it's added to the ancestry.
// 2. when a detector detects a difference in the detected namespace, it
// replaces the namespace, and also move all features under that namespace to
// the new namespace.
func (b *AncestryBuilder) updateNamespace(layerNamespace *database.LayerNamespace) {
var (
previous *layerIndexedNamespace
ok bool
)
if previous, ok = b.namespaces[layerNamespace.By]; !ok {
b.namespaces[layerNamespace.By] = &layerIndexedNamespace{
namespace: *layerNamespace,
introducedIn: b.layerIndex,
}
return
}
// All features referencing to this namespace are now pointing to the new namespace.
// Also those features are now treated as introduced in the same layer as
// when this new namespace is introduced.
previous.namespace = *layerNamespace
previous.introducedIn = b.layerIndex
for _, features := range b.features {
for i, feature := range features {
if feature.namespace == previous {
features[i].introducedIn = previous.introducedIn
}
}
}
}
func (b *AncestryBuilder) createLayerIndexedFeature(namespace *layerIndexedNamespace, feature *database.LayerFeature) layerIndexedFeature {
return layerIndexedFeature{
feature: feature,
namespace: namespace,
introducedIn: b.layerIndex,
}
}
func (b *AncestryBuilder) lookupNamespace(feature *database.LayerFeature) (*layerIndexedNamespace, bool) {
for _, namespace := range b.namespaces {
if namespace.namespace.VersionFormat == feature.VersionFormat {
return namespace, true
}
}
return nil, false
}
func (b *AncestryBuilder) ancestryFeatures(index int) []database.AncestryFeature {
ancestryFeatures := []database.AncestryFeature{}
for detector, features := range b.features {
for _, feature := range features {
if feature.introducedIn == index {
ancestryFeatures = append(ancestryFeatures, database.AncestryFeature{
NamespacedFeature: database.NamespacedFeature{
Feature: feature.feature.Feature,
Namespace: feature.namespace.namespace.Namespace,
},
FeatureBy: detector,
NamespaceBy: feature.namespace.namespace.By,
})
}
}
}
return ancestryFeatures
}
func (b *AncestryBuilder) ancestryLayers() []database.AncestryLayer {
layers := make([]database.AncestryLayer, 0, b.layerIndex)
for i := 0; i < b.layerIndex; i++ {
layers = append(layers, database.AncestryLayer{
Hash: b.layerNames[i],
Features: b.ancestryFeatures(i),
})
}
return layers
}
// Ancestry produces an Ancestry from the builder.
func (b *AncestryBuilder) Ancestry(name string) *database.Ancestry {
if name == "" {
// TODO(sidac): we'll use the computed ancestry name in the future.
// During the transition, it still requires the user to use the correct
// ancestry name.
name = ancestryName(b.layerNames)
log.WithField("ancestry.Name", name).Warn("generated ancestry name since it's not specified")
}
return &database.Ancestry{
Name: name,
By: b.detectors,
Layers: b.ancestryLayers(),
}
}
// SaveAncestry saves an ancestry to the datastore.
func SaveAncestry(store database.Datastore, ancestry *database.Ancestry) error {
log.WithField("ancestry.Name", ancestry.Name).Debug("saving ancestry")
features := []database.NamespacedFeature{}
for _, layer := range ancestry.Layers {
features = append(features, layer.GetFeatures()...)
}
if err := database.PersistNamespacedFeaturesAndCommit(store, features); err != nil {
return StorageError
}
if err := database.UpsertAncestryAndCommit(store, ancestry); err != nil {
return StorageError
}
if err := database.CacheRelatedVulnerabilityAndCommit(store, features); err != nil {
return StorageError
}
return nil
}
// IsAncestryCached checks if the ancestry is already cached in the database with the current set of detectors.
func IsAncestryCached(store database.Datastore, name string, layerHashes []string) (bool, error) {
if name == "" {
// TODO(sidac): we'll use the computed ancestry name in the future.
// During the transition, it still requires the user to use the correct
// ancestry name.
name = ancestryName(layerHashes)
log.WithField("ancestry.Name", name).Warn("generated ancestry name since it's not specified")
}
ancestry, found, err := database.FindAncestryAndRollback(store, name)
if err != nil {
log.WithError(err).WithField("ancestry.Name", name).Error("failed to query ancestry in database")
return false, StorageError
}
if found {
if len(database.DiffDetectors(EnabledDetectors(), ancestry.By)) == 0 {
log.WithField("ancestry.Name", name).Debug("found cached ancestry")
} else {
log.WithField("ancestry.Name", name).Debug("found outdated ancestry cache")
}
} else {
log.WithField("ancestry.Name", name).Debug("ancestry not cached")
}
return found && len(database.DiffDetectors(EnabledDetectors(), ancestry.By)) == 0, nil
}
func ancestryName(layerHashes []string) string {
tag := sha256.Sum256([]byte(strings.Join(layerHashes, ",")))
return hex.EncodeToString(tag[:])
}