2017-01-13 07:08:52 +00:00
// Copyright 2017 clair authors
2015-11-13 19:11:28 +00:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2017-01-26 23:24:04 +00:00
package clair
2015-11-13 19:11:28 +00:00
import (
2017-01-18 02:40:59 +00:00
"regexp"
2017-05-04 17:21:25 +00:00
log "github.com/sirupsen/logrus"
2015-11-13 19:11:28 +00:00
"github.com/coreos/clair/database"
2017-01-13 23:49:02 +00:00
"github.com/coreos/clair/ext/featurefmt"
2017-01-13 21:48:12 +00:00
"github.com/coreos/clair/ext/featurens"
2017-01-13 08:07:35 +00:00
"github.com/coreos/clair/ext/imagefmt"
2017-01-13 07:08:52 +00:00
"github.com/coreos/clair/pkg/commonerr"
2017-01-13 21:48:12 +00:00
"github.com/coreos/clair/pkg/tarutil"
2015-11-13 19:11:28 +00:00
)
const (
// Version (integer) represents the worker version.
// Increased each time the engine changes.
2017-05-04 17:21:25 +00:00
Version = 3
logLayerName = "layer"
2015-11-13 19:11:28 +00:00
)
var (
// ErrUnsupported is the error that should be raised when an OS or package
// manager is not supported.
2017-01-13 07:08:52 +00:00
ErrUnsupported = commonerr . NewBadRequestError ( "worker: OS and/or package manager are not supported" )
2015-11-13 19:11:28 +00:00
// ErrParentUnknown is the error that should be raised when a parent layer
// has yet to be processed for the current layer.
2017-01-13 07:08:52 +00:00
ErrParentUnknown = commonerr . NewBadRequestError ( "worker: parent layer is unknown, it must be processed first" )
2017-01-18 02:40:59 +00:00
urlParametersRegexp = regexp . MustCompile ( ` (\?|\&)([^=]+)\=([^ &]+) ` )
2015-11-13 19:11:28 +00:00
)
2017-01-18 02:40:59 +00:00
// cleanURL removes all parameters from an URL.
func cleanURL ( str string ) string {
return urlParametersRegexp . ReplaceAllString ( str , "" )
}
2017-01-26 23:24:04 +00:00
// ProcessLayer detects the Namespace of a layer, the features it adds/removes,
// and then stores everything in the database.
//
// TODO(Quentin-M): We could have a goroutine that looks for layers that have
// been analyzed with an older engine version and that processes them.
func ProcessLayer ( datastore database . Datastore , imageFormat , name , parentName , path string , headers map [ string ] string ) error {
2015-12-28 20:03:29 +00:00
// Verify parameters.
if name == "" {
2017-01-13 07:08:52 +00:00
return commonerr . NewBadRequestError ( "could not process a layer which does not have a name" )
2015-11-13 19:11:28 +00:00
}
2015-12-28 20:03:29 +00:00
2015-11-13 19:11:28 +00:00
if path == "" {
2017-01-13 07:08:52 +00:00
return commonerr . NewBadRequestError ( "could not process a layer which does not have a path" )
2015-11-13 19:11:28 +00:00
}
2015-12-28 20:03:29 +00:00
2015-12-16 04:29:53 +00:00
if imageFormat == "" {
2017-01-13 07:08:52 +00:00
return commonerr . NewBadRequestError ( "could not process a layer which does not have a format" )
2015-12-28 20:03:29 +00:00
}
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "path" : cleanURL ( path ) , "engine version" : Version , "parent layer" : parentName , "format" : imageFormat } ) . Debug ( "processing layer" )
2015-11-13 19:11:28 +00:00
// Check to see if the layer is already in the database.
2015-12-28 20:03:29 +00:00
layer , err := datastore . FindLayer ( name , false , false )
2017-01-13 07:08:52 +00:00
if err != nil && err != commonerr . ErrNotFound {
2015-11-13 19:11:28 +00:00
return err
}
2017-01-13 07:08:52 +00:00
if err == commonerr . ErrNotFound {
2015-12-28 20:03:29 +00:00
// New layer case.
layer = database . Layer { Name : name , EngineVersion : Version }
2015-11-13 19:11:28 +00:00
2015-12-28 20:03:29 +00:00
// Retrieve the parent if it has one.
// We need to get it with its Features in order to diff them.
if parentName != "" {
parent , err := datastore . FindLayer ( parentName , true , false )
2017-01-13 07:08:52 +00:00
if err != nil && err != commonerr . ErrNotFound {
2015-11-13 19:11:28 +00:00
return err
}
2017-01-13 07:08:52 +00:00
if err == commonerr . ErrNotFound {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "parent layer" : parentName } ) . Warning ( "the parent layer is unknown. it must be processed first" )
2015-11-13 19:11:28 +00:00
return ErrParentUnknown
}
2015-12-28 20:03:29 +00:00
layer . Parent = & parent
}
} else {
// The layer is already in the database, check if we need to update it.
if layer . EngineVersion >= Version {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "past engine version" : layer . EngineVersion , "current engine version" : Version } ) . Debug ( "layer content has already been processed in the past with older engine. skipping analysis" )
2015-12-28 20:03:29 +00:00
return nil
2015-11-13 19:11:28 +00:00
}
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "past engine version" : layer . EngineVersion , "current engine version" : Version } ) . Debug ( "layer content has already been processed in the past with older engine. analyzing again" )
2015-11-13 19:11:28 +00:00
}
// Analyze the content.
2016-05-05 17:48:10 +00:00
layer . Namespace , layer . Features , err = detectContent ( imageFormat , name , path , headers , layer . Parent )
2015-11-13 19:11:28 +00:00
if err != nil {
return err
}
2015-12-28 20:03:29 +00:00
return datastore . InsertLayer ( layer )
2015-11-13 19:11:28 +00:00
}
2017-01-13 21:48:12 +00:00
// detectContent downloads a layer's archive and extracts its Namespace and
// Features.
2016-05-11 22:13:00 +00:00
func detectContent ( imageFormat , name , path string , headers map [ string ] string , parent * database . Layer ) ( namespace * database . Namespace , featureVersions [ ] database . FeatureVersion , err error ) {
2017-01-13 23:49:02 +00:00
totalRequiredFiles := append ( featurefmt . RequiredFilenames ( ) , featurens . RequiredFilenames ( ) ... )
2017-01-13 21:48:12 +00:00
files , err := imagefmt . Extract ( imageFormat , path , headers , totalRequiredFiles )
2015-11-13 19:11:28 +00:00
if err != nil {
2017-05-04 17:21:25 +00:00
log . WithError ( err ) . WithFields ( log . Fields { logLayerName : name , "path" : cleanURL ( path ) } ) . Error ( "failed to extract data from path" )
2015-11-13 19:11:28 +00:00
return
}
2017-01-13 21:48:12 +00:00
namespace , err = detectNamespace ( name , files , parent )
if err != nil {
return
}
2015-11-13 19:11:28 +00:00
2015-12-28 20:03:29 +00:00
// Detect features.
2017-01-13 23:49:02 +00:00
featureVersions , err = detectFeatureVersions ( name , files , namespace , parent )
2015-11-13 19:11:28 +00:00
if err != nil {
return
}
2016-05-11 22:13:00 +00:00
if len ( featureVersions ) > 0 {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "feature count" : len ( featureVersions ) } ) . Debug ( "detected features" )
2016-01-08 15:27:30 +00:00
}
2015-11-13 19:11:28 +00:00
return
}
2017-01-13 21:48:12 +00:00
func detectNamespace ( name string , files tarutil . FilesMap , parent * database . Layer ) ( namespace * database . Namespace , err error ) {
namespace , err = featurens . Detect ( files )
if err != nil {
return
}
2016-05-11 22:13:00 +00:00
if namespace != nil {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "detected namespace" : namespace . Name } ) . Debug ( "detected namespace" )
2016-05-11 22:13:00 +00:00
return
}
2015-11-13 19:11:28 +00:00
2017-01-13 21:48:12 +00:00
// Fallback to the parent's namespace.
2016-05-11 22:13:00 +00:00
if parent != nil {
2015-12-28 20:03:29 +00:00
namespace = parent . Namespace
2016-05-11 22:13:00 +00:00
if namespace != nil {
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { logLayerName : name , "detected namespace" : namespace . Name } ) . Debug ( "detected namespace (from parent)" )
2015-12-28 20:03:29 +00:00
return
2015-11-13 19:11:28 +00:00
}
}
return
}
2017-01-13 23:49:02 +00:00
func detectFeatureVersions ( name string , files tarutil . FilesMap , namespace * database . Namespace , parent * database . Layer ) ( features [ ] database . FeatureVersion , err error ) {
2016-05-11 22:13:00 +00:00
// TODO(Quentin-M): We need to pass the parent image to DetectFeatures because it's possible that
2015-12-28 20:03:29 +00:00
// some detectors would need it in order to produce the entire feature list (if they can only
// detect a diff). Also, we should probably pass the detected namespace so detectors could
// make their own decision.
2017-01-13 23:49:02 +00:00
features , err = featurefmt . ListFeatures ( files )
2015-11-13 19:11:28 +00:00
if err != nil {
2015-12-28 20:03:29 +00:00
return
2015-11-13 19:11:28 +00:00
}
2016-05-11 22:13:00 +00:00
// If there are no FeatureVersions, use parent's FeatureVersions if possible.
// TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some of
// their parent's FeatureVersions. It would be useful for detectors that can't find their entire
// result using one Layer.
if len ( features ) == 0 && parent != nil {
features = parent . Features
return
}
// Build a map of the namespaces for each FeatureVersion in our parent layer.
parentFeatureNamespaces := make ( map [ string ] database . Namespace )
if parent != nil {
for _ , parentFeature := range parent . Features {
2016-12-28 01:45:11 +00:00
parentFeatureNamespaces [ parentFeature . Feature . Name + ":" + parentFeature . Version ] = parentFeature . Feature . Namespace
2016-05-11 22:13:00 +00:00
}
}
// Ensure that each FeatureVersion has an associated Namespace.
for i , feature := range features {
if feature . Feature . Namespace . Name != "" {
// There is a Namespace associated.
continue
2015-11-13 19:11:28 +00:00
}
2016-05-11 22:13:00 +00:00
2016-12-28 01:45:11 +00:00
if parentFeatureNamespace , ok := parentFeatureNamespaces [ feature . Feature . Name + ":" + feature . Version ] ; ok {
2016-05-11 22:13:00 +00:00
// The FeatureVersion is present in the parent layer; associate with their Namespace.
features [ i ] . Feature . Namespace = parentFeatureNamespace
continue
}
if namespace != nil {
// The Namespace has been detected in this layer; associate it.
features [ i ] . Feature . Namespace = * namespace
continue
}
2017-05-04 17:21:25 +00:00
log . WithFields ( log . Fields { "feature name" : feature . Feature . Name , "feature version" : feature . Version , logLayerName : name } ) . Warning ( "Namespace unknown" )
2016-05-11 22:13:00 +00:00
err = ErrUnsupported
return
2015-11-13 19:11:28 +00:00
}
return
}