2017-01-13 07:08:52 +00:00
// Copyright 2017 clair authors
2015-11-13 19:11:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2017-01-26 23:24:04 +00:00
package clair
2015-11-13 19:11:28 +00:00
import (
2017-01-18 02:40:59 +00:00
"regexp"
2017-05-04 17:21:25 +00:00
log "github.com/sirupsen/logrus"
2015-11-13 19:11:28 +00:00
"github.com/coreos/clair/database"
2017-01-13 23:49:02 +00:00
"github.com/coreos/clair/ext/featurefmt"
2017-01-13 21:48:12 +00:00
"github.com/coreos/clair/ext/featurens"
2017-01-13 08:07:35 +00:00
"github.com/coreos/clair/ext/imagefmt"
2017-01-13 07:08:52 +00:00
"github.com/coreos/clair/pkg/commonerr"
2017-01-13 21:48:12 +00:00
"github.com/coreos/clair/pkg/tarutil"
2015-11-13 19:11:28 +00:00
)
const (
// Version (integer) represents the worker version.
// Increased each time the engine changes.
2017-05-04 17:21:25 +00:00
Version = 3
logLayerName = "layer"
2015-11-13 19:11:28 +00:00
)
var (
// ErrUnsupported is the error that should be raised when an OS or package
// manager is not supported.
2017-01-13 07:08:52 +00:00
ErrUnsupported = commonerr . NewBadRequestError ( "worker: OS and/or package manager are not supported" )
2015-11-13 19:11:28 +00:00
// ErrParentUnknown is the error that should be raised when a parent layer
// has yet to be processed for the current layer.
2017-01-13 07:08:52 +00:00
ErrParentUnknown = commonerr . NewBadRequestError ( "worker: parent layer is unknown, it must be processed first" )
2017-01-18 02:40:59 +00:00
urlParametersRegexp = regexp . MustCompile ( ` (\?|\&)([^=]+)\=([^ &]+) ` )
2015-11-13 19:11:28 +00:00
)
2017-01-18 02:40:59 +00:00
// cleanURL removes all parameters from an URL.
func cleanURL ( str string ) string {
return urlParametersRegexp . ReplaceAllString ( str , "" )
}
2017-01-26 23:24:04 +00:00
// ProcessLayer detects the Namespace of a layer, the features it adds/removes,
// and then stores everything in the database.
//
// TODO(Quentin-M): We could have a goroutine that looks for layers that have
// been analyzed with an older engine version and that processes them.
func ProcessLayer ( datastore database . Datastore , imageFormat , name , parentName , path string , headers map [ string ] string ) error {
2015-12-28 20:03:29 +00:00
// Verify parameters.
if name == "" {
2017-01-13 07:08:52 +00:00
return commonerr . NewBadRequestError ( "could not process a layer which does not have a name" )
2015-11-13 19:11:28 +00:00
}
2015-12-28 20:03:29 +00:00
2015-11-13 19:11:28 +00:00
if path == "" {
2017-01-13 07:08:52 +00:00
return commonerr . NewBadRequestError ( "could not process a layer which does not have a path" )
2015-11-13 19:11:28 +00:00
}
2015-12-28 20:03:29 +00:00
2015-12-16 04:29:53 +00:00
if imageFormat == "" {
2017-01-13 07:08:52 +00:00
return commonerr . NewBadRequestError ( "could not process a layer which does not have a format" )
2015-12-28 20:03:29 +00:00
}
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "path" : cleanURL ( path ) , "engine version" : Version , "parent layer" : parentName , "format" : imageFormat } ) . Debug ( "processing layer" )
2015-11-13 19:11:28 +00:00
// Check to see if the layer is already in the database.
2015-12-28 20:03:29 +00:00
layer , err := datastore . FindLayer ( name , false , false )
2017-01-13 07:08:52 +00:00
if err != nil && err != commonerr . ErrNotFound {
2015-11-13 19:11:28 +00:00
return err
}
2017-01-13 07:08:52 +00:00
if err == commonerr . ErrNotFound {
2015-12-28 20:03:29 +00:00
// New layer case.
layer = database . Layer { Name : name , EngineVersion : Version }
2015-11-13 19:11:28 +00:00
2015-12-28 20:03:29 +00:00
// Retrieve the parent if it has one.
// We need to get it with its Features in order to diff them.
if parentName != "" {
parent , err := datastore . FindLayer ( parentName , true , false )
2017-01-13 07:08:52 +00:00
if err != nil && err != commonerr . ErrNotFound {
2015-11-13 19:11:28 +00:00
return err
}
2017-01-13 07:08:52 +00:00
if err == commonerr . ErrNotFound {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "parent layer" : parentName } ) . Warning ( "the parent layer is unknown. it must be processed first" )
2015-11-13 19:11:28 +00:00
return ErrParentUnknown
}
2015-12-28 20:03:29 +00:00
layer . Parent = & parent
}
} else {
// The layer is already in the database, check if we need to update it.
if layer . EngineVersion >= Version {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "past engine version" : layer . EngineVersion , "current engine version" : Version } ) . Debug ( "layer content has already been processed in the past with older engine. skipping analysis" )
2015-12-28 20:03:29 +00:00
return nil
2015-11-13 19:11:28 +00:00
}
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "past engine version" : layer . EngineVersion , "current engine version" : Version } ) . Debug ( "layer content has already been processed in the past with older engine. analyzing again" )
2015-11-13 19:11:28 +00:00
}
// Analyze the content.
2017-05-12 20:59:17 +00:00
layer . Namespaces , layer . Features , err = detectContent ( imageFormat , name , path , headers , layer . Parent )
2015-11-13 19:11:28 +00:00
if err != nil {
return err
}
2015-12-28 20:03:29 +00:00
return datastore . InsertLayer ( layer )
2015-11-13 19:11:28 +00:00
}
2017-01-13 21:48:12 +00:00
// detectContent downloads a layer's archive and extracts its Namespace and
// Features.
2017-05-12 20:59:17 +00:00
func detectContent ( imageFormat , name , path string , headers map [ string ] string , parent * database . Layer ) ( namespaces [ ] database . Namespace , featureVersions [ ] database . FeatureVersion , err error ) {
2017-01-13 23:49:02 +00:00
totalRequiredFiles := append ( featurefmt . RequiredFilenames ( ) , featurens . RequiredFilenames ( ) ... )
2017-01-13 21:48:12 +00:00
files , err := imagefmt . Extract ( imageFormat , path , headers , totalRequiredFiles )
2015-11-13 19:11:28 +00:00
if err != nil {
2017-05-04 17:21:25 +00:00
log . WithError ( err ) . WithFields ( log . Fields { logLayerName : name , "path" : cleanURL ( path ) } ) . Error ( "failed to extract data from path" )
2015-11-13 19:11:28 +00:00
return
}
2017-05-12 20:59:17 +00:00
namespaces , err = detectNamespaces ( name , files , parent )
2017-01-13 21:48:12 +00:00
if err != nil {
return
}
2015-11-13 19:11:28 +00:00
2017-06-22 18:01:41 +00:00
featureVersions , err = detectFeatureVersions ( name , files , namespaces , parent )
if err != nil {
return
2015-11-13 19:11:28 +00:00
}
2017-06-22 18:01:41 +00:00
2016-05-11 22:13:00 +00:00
if len ( featureVersions ) > 0 {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "feature count" : len ( featureVersions ) } ) . Debug ( "detected features" )
2016-01-08 15:27:30 +00:00
}
2015-11-13 19:11:28 +00:00
return
}
2017-05-30 17:45:14 +00:00
// detectNamespaces returns a list of unique namespaces detected in a layer and its ancestry.
2017-05-12 20:59:17 +00:00
func detectNamespaces ( name string , files tarutil . FilesMap , parent * database . Layer ) ( namespaces [ ] database . Namespace , err error ) {
2017-05-30 17:45:14 +00:00
nsSet := map [ string ] * database . Namespace { }
nsCurrent , err := featurens . Detect ( files )
2017-01-13 21:48:12 +00:00
if err != nil {
return
}
2017-05-30 17:45:14 +00:00
2016-05-11 22:13:00 +00:00
if parent != nil {
2017-05-12 20:59:17 +00:00
for _ , ns := range parent . Namespaces {
2017-06-22 18:01:41 +00:00
// Under assumption that one version format corresponds to one type
// of namespace.
nsSet [ ns . VersionFormat ] = & ns
log . WithFields ( log . Fields { logLayerName : name , "detected namespace" : ns . Name , "version format" : ns . VersionFormat } ) . Debug ( "detected namespace (from parent)" )
2015-11-13 19:11:28 +00:00
}
}
2017-06-22 18:01:41 +00:00
for _ , ns := range nsCurrent {
nsSet [ ns . VersionFormat ] = & ns
log . WithFields ( log . Fields { logLayerName : name , "detected namespace" : ns . Name , "version format" : ns . VersionFormat } ) . Debug ( "detected namespace" )
}
2017-05-30 17:45:14 +00:00
for _ , ns := range nsSet {
namespaces = append ( namespaces , * ns )
}
2015-11-13 19:11:28 +00:00
return
}
2017-06-22 18:01:41 +00:00
func detectFeatureVersions ( name string , files tarutil . FilesMap , namespaces [ ] database . Namespace , parent * database . Layer ) ( features [ ] database . FeatureVersion , err error ) {
2016-05-11 22:13:00 +00:00
// Build a map of the namespaces for each FeatureVersion in our parent layer.
parentFeatureNamespaces := make ( map [ string ] database . Namespace )
if parent != nil {
for _ , parentFeature := range parent . Features {
2016-12-28 01:45:11 +00:00
parentFeatureNamespaces [ parentFeature . Feature . Name + ":" + parentFeature . Version ] = parentFeature . Feature . Namespace
2016-05-11 22:13:00 +00:00
}
}
2017-06-22 18:01:41 +00:00
for _ , ns := range namespaces {
// TODO(Quentin-M): We need to pass the parent image to DetectFeatures because it's possible that
// some detectors would need it in order to produce the entire feature list (if they can only
// detect a diff). Also, we should probably pass the detected namespace so detectors could
// make their own decision.
detectedFeatures , err := featurefmt . ListFeatures ( files , & ns )
if err != nil {
return features , err
2015-11-13 19:11:28 +00:00
}
2016-05-11 22:13:00 +00:00
2017-06-22 18:01:41 +00:00
// Ensure that each FeatureVersion has an associated Namespace.
for i , feature := range detectedFeatures {
if feature . Feature . Namespace . Name != "" {
// There is a Namespace associated.
continue
}
if parentFeatureNamespace , ok := parentFeatureNamespaces [ feature . Feature . Name + ":" + feature . Version ] ; ok {
// The FeatureVersion is present in the parent layer; associate
// with their Namespace.
// This might cause problem because a package with same feature
// name and version could be different in parent layer's
// namespace and current layer's namespace
detectedFeatures [ i ] . Feature . Namespace = parentFeatureNamespace
continue
}
2016-05-11 22:13:00 +00:00
2017-06-22 18:01:41 +00:00
detectedFeatures [ i ] . Feature . Namespace = ns
2016-05-11 22:13:00 +00:00
}
2017-06-22 18:01:41 +00:00
features = append ( features , detectedFeatures ... )
}
2016-05-11 22:13:00 +00:00
2017-06-22 18:01:41 +00:00
// If there are no FeatureVersions, use parent's FeatureVersions if possible.
// TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some of
// their parent's FeatureVersions. It would be useful for detectors that can't find their entire
// result using one Layer.
if len ( features ) == 0 && parent != nil {
features = parent . Features
2015-11-13 19:11:28 +00:00
}
return
}