You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
clair/ancestry.go

356 lines
11 KiB

// Copyright 2019 clair authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clair
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"strings"
log "github.com/sirupsen/logrus"
"github.com/coreos/clair/database"
)
type layerIndexedFeature struct {
Feature *database.LayerFeature
Namespace *layerIndexedNamespace
IntroducedIn int
}
type layerIndexedNamespace struct {
Namespace database.LayerNamespace `json:"namespace"`
IntroducedIn int `json:"introducedIn"`
}
// AncestryBuilder builds an Ancestry, which contains an ordered list of layers
// and their features.
type AncestryBuilder struct {
layerIndex int
layerNames []string
detectors []database.Detector
namespaces []layerIndexedNamespace // unique namespaces
features map[database.Detector][]layerIndexedFeature
}
// NewAncestryBuilder creates a new ancestry builder.
//
// ancestry builder takes in the extracted layer information and produce a set of
// namespaces, features, and the relation between features for the whole image.
func NewAncestryBuilder(detectors []database.Detector) *AncestryBuilder {
return &AncestryBuilder{
layerIndex: 0,
detectors: detectors,
namespaces: make([]layerIndexedNamespace, 0),
features: make(map[database.Detector][]layerIndexedFeature),
}
}
// AddLeafLayer adds a leaf layer to the ancestry builder, and computes the
// namespaced features.
func (b *AncestryBuilder) AddLeafLayer(layer *database.Layer) {
b.layerNames = append(b.layerNames, layer.Hash)
for i := range layer.Namespaces {
b.updateNamespace(&layer.Namespaces[i])
}
allFeatureMap := map[database.Detector][]database.LayerFeature{}
for i := range layer.Features {
layerFeature := layer.Features[i]
allFeatureMap[layerFeature.By] = append(allFeatureMap[layerFeature.By], layerFeature)
}
// we only care about the ones specified by builder's detectors
featureMap := map[database.Detector][]database.LayerFeature{}
for i := range b.detectors {
detector := b.detectors[i]
featureMap[detector] = allFeatureMap[detector]
}
for detector := range featureMap {
b.addLayerFeatures(detector, featureMap[detector])
}
b.layerIndex++
}
// Every detector inspects a set of files for the features
// therefore, if that set of files gives a different set of features, it
// should replace the existing features.
func (b *AncestryBuilder) addLayerFeatures(detector database.Detector, features []database.LayerFeature) {
if len(features) == 0 {
// TODO(sidac): we need to differentiate if the detector finds that all
// features are removed ( a file change ), or the package installer is
// removed ( a file deletion ), or there's no change in the file ( file
// does not exist in the blob ) Right now, we're just assuming that no
// change in the file because that's the most common case.
return
}
existingFeatures := b.features[detector]
currentFeatures := make([]layerIndexedFeature, 0, len(features))
// Features that are not in the current layer should be removed.
for i := range existingFeatures {
feature := existingFeatures[i]
for j := range features {
if features[j] == *feature.Feature {
currentFeatures = append(currentFeatures, feature)
break
}
}
}
// Features that newly introduced in the current layer should be added.
for i := range features {
found := false
for j := range existingFeatures {
if *existingFeatures[j].Feature == features[i] {
found = true
break
}
}
if !found {
namespace, found := b.lookupNamespace(&features[i])
if !found {
continue
}
currentFeatures = append(currentFeatures, b.createLayerIndexedFeature(namespace, &features[i]))
}
}
b.features[detector] = currentFeatures
}
// updateNamespace update the namespaces for the ancestry. It does the following things:
// 1. when a detector detects a new namespace, it's added to the ancestry.
// 2. when a detector detects a difference in the detected namespace, it
// replaces the namespace, and also move all features under that namespace to
// the new namespace.
func (b *AncestryBuilder) updateNamespace(layerNamespace *database.LayerNamespace) {
var (
previous *layerIndexedNamespace
foundUpgrade bool
)
newNSNames := strings.Split(layerNamespace.Name, ":")
if len(newNSNames) != 2 {
log.Error("invalid namespace name")
}
newNSName := newNSNames[0]
newNSVersion := newNSNames[1]
for i, ns := range b.namespaces {
nsNames := strings.Split(ns.Namespace.Name, ":")
if len(nsNames) != 2 {
log.Error("invalid namespace name")
continue
}
nsName := nsNames[0]
nsVersion := nsNames[1]
if ns.Namespace.VersionFormat == layerNamespace.VersionFormat && nsName == newNSName {
if nsVersion != newNSVersion {
previous = &b.namespaces[i]
foundUpgrade = true
break
} else {
// not changed
return
}
}
}
// we didn't found the namespace is a upgrade from another namespace, so we
// simply add it.
if !foundUpgrade {
b.namespaces = append(b.namespaces, layerIndexedNamespace{
Namespace: *layerNamespace,
IntroducedIn: b.layerIndex,
})
return
}
// All features referencing to this namespace are now pointing to the new namespace.
// Also those features are now treated as introduced in the same layer as
// when this new namespace is introduced.
previous.Namespace = *layerNamespace
previous.IntroducedIn = b.layerIndex
for _, features := range b.features {
for i, feature := range features {
if feature.Namespace == previous {
features[i].IntroducedIn = previous.IntroducedIn
}
}
}
}
func (b *AncestryBuilder) createLayerIndexedFeature(namespace *layerIndexedNamespace, feature *database.LayerFeature) layerIndexedFeature {
return layerIndexedFeature{
Feature: feature,
Namespace: namespace,
IntroducedIn: b.layerIndex,
}
}
func (b *AncestryBuilder) lookupNamespace(feature *database.LayerFeature) (*layerIndexedNamespace, bool) {
matchedNamespaces := []*layerIndexedNamespace{}
if feature.PotentialNamespace.Name != "" {
a := &layerIndexedNamespace{
Namespace: database.LayerNamespace{
Namespace: feature.PotentialNamespace,
},
IntroducedIn: b.layerIndex,
}
matchedNamespaces = append(matchedNamespaces, a)
} else {
for i, namespace := range b.namespaces {
if namespace.Namespace.VersionFormat == feature.VersionFormat {
matchedNamespaces = append(matchedNamespaces, &b.namespaces[i])
}
}
}
if len(matchedNamespaces) == 1 {
return matchedNamespaces[0], true
}
serialized, _ := json.Marshal(matchedNamespaces)
fields := log.Fields{
"feature.Name": feature.Name,
"feature.VersionFormat": feature.VersionFormat,
"ancestryBuilder.namespaces": string(serialized),
}
if len(matchedNamespaces) > 1 {
log.WithFields(fields).Warn("skip features with ambiguous namespaces")
} else {
log.WithFields(fields).Warn("skip features with no matching namespace")
}
return nil, false
}
func (b *AncestryBuilder) ancestryFeatures(index int) []database.AncestryFeature {
ancestryFeatures := []database.AncestryFeature{}
for detector, features := range b.features {
for _, feature := range features {
if feature.IntroducedIn == index {
ancestryFeatures = append(ancestryFeatures, database.AncestryFeature{
NamespacedFeature: database.NamespacedFeature{
Feature: feature.Feature.Feature,
Namespace: feature.Namespace.Namespace.Namespace,
},
FeatureBy: detector,
NamespaceBy: feature.Namespace.Namespace.By,
})
}
}
}
return ancestryFeatures
}
func (b *AncestryBuilder) ancestryLayers() []database.AncestryLayer {
layers := make([]database.AncestryLayer, 0, b.layerIndex)
for i := 0; i < b.layerIndex; i++ {
layers = append(layers, database.AncestryLayer{
Hash: b.layerNames[i],
Features: b.ancestryFeatures(i),
})
}
return layers
}
// Ancestry produces an Ancestry from the builder.
func (b *AncestryBuilder) Ancestry(name string) *database.Ancestry {
if name == "" {
// TODO(sidac): we'll use the computed ancestry name in the future.
// During the transition, it still requires the user to use the correct
// ancestry name.
name = ancestryName(b.layerNames)
log.WithField("ancestry.Name", name).Warn("generated ancestry name since it's not specified")
}
return &database.Ancestry{
Name: name,
By: b.detectors,
Layers: b.ancestryLayers(),
}
}
// SaveAncestry saves an ancestry to the datastore.
func SaveAncestry(store database.Datastore, ancestry *database.Ancestry) error {
log.WithField("ancestry.Name", ancestry.Name).Debug("saving ancestry")
features := []database.NamespacedFeature{}
for _, layer := range ancestry.Layers {
features = append(features, layer.GetFeatures()...)
}
if err := database.PersistNamespacedFeaturesAndCommit(store, features); err != nil {
return StorageError
}
if err := database.UpsertAncestryAndCommit(store, ancestry); err != nil {
return StorageError
}
if err := database.CacheRelatedVulnerabilityAndCommit(store, features); err != nil {
return StorageError
}
return nil
}
// IsAncestryCached checks if the ancestry is already cached in the database with the current set of detectors.
func IsAncestryCached(store database.Datastore, name string, layerHashes []string) (bool, error) {
if name == "" {
// TODO(sidac): we'll use the computed ancestry name in the future.
// During the transition, it still requires the user to use the correct
// ancestry name.
name = ancestryName(layerHashes)
log.WithField("ancestry.Name", name).Warn("generated ancestry name since it's not specified")
}
ancestry, found, err := database.FindAncestryAndRollback(store, name)
if err != nil {
log.WithError(err).WithField("ancestry.Name", name).Error("failed to query ancestry in database")
return false, StorageError
}
if found {
if len(database.DiffDetectors(EnabledDetectors(), ancestry.By)) == 0 {
log.WithField("ancestry.Name", name).Debug("found cached ancestry")
} else {
log.WithField("ancestry.Name", name).Debug("found outdated ancestry cache")
}
} else {
log.WithField("ancestry.Name", name).Debug("ancestry not cached")
}
return found && len(database.DiffDetectors(EnabledDetectors(), ancestry.By)) == 0, nil
}
func ancestryName(layerHashes []string) string {
tag := sha256.Sum256([]byte(strings.Join(layerHashes, ",")))
return hex.EncodeToString(tag[:])
}