2015-11-13 19:11:28 +00:00
|
|
|
// Copyright 2015 clair authors
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// Package worker implements the logic to extract useful informations from a
|
|
|
|
// container layer and store it in the database.
|
|
|
|
package worker
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
|
|
|
|
"github.com/coreos/clair/database"
|
|
|
|
"github.com/coreos/clair/utils"
|
|
|
|
cerrors "github.com/coreos/clair/utils/errors"
|
|
|
|
"github.com/coreos/clair/worker/detectors"
|
2015-11-16 16:33:32 +00:00
|
|
|
"github.com/coreos/pkg/capnslog"
|
2015-11-13 19:11:28 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// Version (integer) represents the worker version.
|
|
|
|
// Increased each time the engine changes.
|
|
|
|
Version = 1
|
|
|
|
|
|
|
|
// maxFileSize is the maximum size of a single file we should extract.
|
|
|
|
maxFileSize = 200 * 1024 * 1024 // 200 MiB
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
log = capnslog.NewPackageLogger("github.com/coreos/clair", "worker")
|
|
|
|
|
|
|
|
// ErrUnsupported is the error that should be raised when an OS or package
|
|
|
|
// manager is not supported.
|
|
|
|
ErrUnsupported = errors.New("worker: OS and/or package manager are not supported")
|
|
|
|
|
|
|
|
// ErrParentUnknown is the error that should be raised when a parent layer
|
|
|
|
// has yet to be processed for the current layer.
|
|
|
|
ErrParentUnknown = errors.New("worker: parent layer is unknown, it must be processed first")
|
|
|
|
)
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
// Process detects the Namespace of a layer, the features it adds/removes, and
|
2015-11-13 19:11:28 +00:00
|
|
|
// then stores everything in the database.
|
2016-01-25 19:50:48 +00:00
|
|
|
// TODO(Quentin-M): We could have a goroutine that looks for layers that have been analyzed with an
|
|
|
|
// older engine version and that processes them.
|
2015-12-28 20:03:29 +00:00
|
|
|
func Process(datastore database.Datastore, name, parentName, path, imageFormat string) error {
|
|
|
|
// Verify parameters.
|
|
|
|
if name == "" {
|
|
|
|
return cerrors.NewBadRequestError("could not process a layer which does not have a name")
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
2015-12-28 20:03:29 +00:00
|
|
|
|
2015-11-13 19:11:28 +00:00
|
|
|
if path == "" {
|
|
|
|
return cerrors.NewBadRequestError("could not process a layer which does not have a path")
|
|
|
|
}
|
2015-12-28 20:03:29 +00:00
|
|
|
|
2015-12-16 04:29:53 +00:00
|
|
|
if imageFormat == "" {
|
2015-12-28 20:03:29 +00:00
|
|
|
return cerrors.NewBadRequestError("could not process a layer which does not have a format")
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Debugf("layer %s: processing (Location: %s, Engine version: %d, Parent: %s, Format: %s)",
|
|
|
|
name, utils.CleanURL(path), Version, parentName, imageFormat)
|
2015-11-13 19:11:28 +00:00
|
|
|
|
|
|
|
// Check to see if the layer is already in the database.
|
2015-12-28 20:03:29 +00:00
|
|
|
layer, err := datastore.FindLayer(name, false, false)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil && err != cerrors.ErrNotFound {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
if err == cerrors.ErrNotFound {
|
|
|
|
// New layer case.
|
|
|
|
layer = database.Layer{Name: name, EngineVersion: Version}
|
2015-11-13 19:11:28 +00:00
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
// Retrieve the parent if it has one.
|
|
|
|
// We need to get it with its Features in order to diff them.
|
|
|
|
if parentName != "" {
|
|
|
|
parent, err := datastore.FindLayer(parentName, true, false)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil && err != cerrors.ErrNotFound {
|
|
|
|
return err
|
|
|
|
}
|
2015-12-28 20:03:29 +00:00
|
|
|
if err == cerrors.ErrNotFound {
|
|
|
|
log.Warningf("layer %s: the parent layer (%s) is unknown. it must be processed first", name,
|
|
|
|
parentName)
|
2015-11-13 19:11:28 +00:00
|
|
|
return ErrParentUnknown
|
|
|
|
}
|
2015-12-28 20:03:29 +00:00
|
|
|
layer.Parent = &parent
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// The layer is already in the database, check if we need to update it.
|
|
|
|
if layer.EngineVersion >= Version {
|
|
|
|
log.Debugf(`layer %s: layer content has already been processed in the past with engine %d.
|
|
|
|
Current engine is %d. skipping analysis`, name, layer.EngineVersion, Version)
|
|
|
|
return nil
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
2015-12-28 20:03:29 +00:00
|
|
|
|
|
|
|
log.Debugf(`layer %s: layer content has been analyzed in the past with engine %d. Current
|
|
|
|
engine is %d. analyzing again`, name, layer.EngineVersion, Version)
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Analyze the content.
|
2015-12-28 20:03:29 +00:00
|
|
|
layer.Namespace, layer.Features, err = detectContent(name, path, imageFormat, layer.Parent)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
return datastore.InsertLayer(layer)
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
// detectContent downloads a layer's archive and extracts its Namespace and Features.
|
|
|
|
func detectContent(name, path, imageFormat string, parent *database.Layer) (namespace *database.Namespace, features []database.FeatureVersion, err error) {
|
|
|
|
data, err := detectors.DetectData(path, imageFormat, append(detectors.GetRequiredFilesFeatures(),
|
|
|
|
detectors.GetRequiredFilesNamespace()...), maxFileSize)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil {
|
2015-12-28 20:03:29 +00:00
|
|
|
log.Errorf("layer %s: failed to extract data from %s: %s", name, utils.CleanURL(path), err)
|
2015-11-13 19:11:28 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
// Detect namespace.
|
|
|
|
namespace, err = detectNamespace(data, parent)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
2016-01-19 20:07:19 +00:00
|
|
|
if namespace != nil {
|
2015-12-28 20:03:29 +00:00
|
|
|
log.Debugf("layer %s: Namespace is %s.", name, namespace.Name)
|
2015-11-16 16:33:32 +00:00
|
|
|
} else {
|
2015-12-28 20:03:29 +00:00
|
|
|
log.Debugf("layer %s: OS is unknown.", name)
|
2015-11-16 16:33:32 +00:00
|
|
|
}
|
2015-11-13 19:11:28 +00:00
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
// Detect features.
|
|
|
|
features, err = detectFeatures(name, data, namespace)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil {
|
2015-12-28 20:03:29 +00:00
|
|
|
log.Errorf("layer %s: package list could not be determined: %s", name, err)
|
2015-11-13 19:11:28 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2016-01-08 15:27:30 +00:00
|
|
|
// If there are no feature detected, use parent's features if possible.
|
|
|
|
// TODO(Quentin-M): We eventually want to give the choice to each detectors to use none/some
|
|
|
|
// parent's Features. It would be useful for dectectors that can't find their entire result using
|
|
|
|
// one Layer.
|
|
|
|
if len(features) == 0 && parent != nil {
|
|
|
|
features = parent.Features
|
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
log.Debugf("layer %s: detected %d features", name, len(features))
|
2015-11-13 19:11:28 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
func detectNamespace(data map[string][]byte, parent *database.Layer) (namespace *database.Namespace, err error) {
|
|
|
|
namespace = detectors.DetectNamespace(data)
|
2015-11-13 19:11:28 +00:00
|
|
|
|
|
|
|
// Attempt to detect the OS from the parent layer.
|
2015-12-28 20:03:29 +00:00
|
|
|
if namespace == nil && parent != nil {
|
|
|
|
namespace = parent.Namespace
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil {
|
2015-12-28 20:03:29 +00:00
|
|
|
return
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
func detectFeatures(name string, data map[string][]byte, namespace *database.Namespace) (features []database.FeatureVersion, err error) {
|
|
|
|
// TODO(Quentin-M): We need to pass the parent image DetectFeatures because it's possible that
|
|
|
|
// some detectors would need it in order to produce the entire feature list (if they can only
|
|
|
|
// detect a diff). Also, we should probably pass the detected namespace so detectors could
|
|
|
|
// make their own decision.
|
|
|
|
features, err = detectors.DetectFeatures(data)
|
2015-11-13 19:11:28 +00:00
|
|
|
if err != nil {
|
2015-12-28 20:03:29 +00:00
|
|
|
return
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
|
|
|
|
2015-12-28 20:03:29 +00:00
|
|
|
// Ensure that every feature has a Namespace associated, otherwise associate the detected
|
|
|
|
// namespace. If there is no detected namespace, we'll throw an error.
|
|
|
|
for i := 0; i < len(features); i++ {
|
|
|
|
if features[i].Feature.Namespace.Name == "" {
|
|
|
|
if namespace != nil {
|
|
|
|
features[i].Feature.Namespace = *namespace
|
|
|
|
} else {
|
|
|
|
log.Errorf("layer %s: Layer's namespace is unknown but non-namespaced features have been detected", name)
|
|
|
|
err = ErrUnsupported
|
|
|
|
return
|
|
|
|
}
|
2015-11-13 19:11:28 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|