clair/worker/worker.go
2015-11-13 14:11:28 -05:00

303 lines
9.7 KiB
Go

// Copyright 2015 clair authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package worker implements the logic to extract useful informations from a
// container layer and store it in the database.
package worker
import (
"errors"
"io"
"net/http"
"os"
"strings"
"github.com/coreos/pkg/capnslog"
"github.com/coreos/clair/database"
"github.com/coreos/clair/utils"
cerrors "github.com/coreos/clair/utils/errors"
"github.com/coreos/clair/worker/detectors"
)
const (
// Version (integer) represents the worker version.
// Increased each time the engine changes.
Version = 1
// maxFileSize is the maximum size of a single file we should extract.
maxFileSize = 200 * 1024 * 1024 // 200 MiB
)
var (
log = capnslog.NewPackageLogger("github.com/coreos/clair", "worker")
// ErrUnsupported is the error that should be raised when an OS or package
// manager is not supported.
ErrUnsupported = errors.New("worker: OS and/or package manager are not supported")
// ErrParentUnknown is the error that should be raised when a parent layer
// has yet to be processed for the current layer.
ErrParentUnknown = errors.New("worker: parent layer is unknown, it must be processed first")
// SupportedOS is the list of operating system names that the worker supports.
SupportedOS = []string{"debian", "ubuntu", "centos"}
)
// Process detects the OS of a layer, the packages it installs/removes, and
// then stores everything in the database.
func Process(ID, parentID, path string) error {
if ID == "" {
return cerrors.NewBadRequestError("could not process a layer which does not have ID")
}
if path == "" {
return cerrors.NewBadRequestError("could not process a layer which does not have a path")
}
log.Debugf("layer %s: processing (Location: %s, Engine version: %d, Parent: %s)", ID, utils.CleanURL(path), Version, parentID)
// Check to see if the layer is already in the database.
layer, err := database.FindOneLayerByID(ID, []string{database.FieldLayerEngineVersion})
if err != nil && err != cerrors.ErrNotFound {
return err
}
var parent *database.Layer
if layer != nil {
// The layer is already in the database, check if we need to update it.
if layer.EngineVersion >= Version {
log.Debugf("layer %s: layer content has already been processed in the past with engine %d. Current engine is %d. skipping analysis", ID, layer.EngineVersion, Version)
return nil
}
log.Debugf("layer %s: layer content has been analyzed in the past with engine %d. Current engine is %d. analyzing again", ID, layer.EngineVersion, Version)
} else {
// The layer is a new one, create a base struct that we will fill.
layer = &database.Layer{ID: ID, EngineVersion: Version}
// Check to make sure that the parent's layer has already been processed.
if parentID != "" {
parent, err = database.FindOneLayerByID(parentID, []string{database.FieldLayerOS, database.FieldLayerPackages, database.FieldLayerPackages})
if err != nil && err != cerrors.ErrNotFound {
return err
}
if parent == nil {
log.Warningf("layer %s: the parent layer (%s) is unknown. it must be processed first", ID, parentID)
return ErrParentUnknown
}
layer.ParentNode = parent.GetNode()
}
}
// Analyze the content.
layer.OS, layer.InstalledPackagesNodes, layer.RemovedPackagesNodes, err = detectContent(ID, path, parent)
if err != nil {
return err
}
return database.InsertLayer(layer)
}
// detectContent downloads a layer's archive, extracts info from it and returns
// an updated Layer struct.
//
// If parent is not nil, database.FieldLayerOS, database.FieldLayerPackages fields must be
// has been selectioned.
func detectContent(ID, path string, parent *database.Layer) (OS string, installedPackagesNodes, removedPackagesNodes []string, err error) {
data, err := getLayerData(path)
if err != nil {
log.Errorf("layer %s: failed to extract data from %s: %s", ID, path, err)
return
}
OS, err = detectOS(data, parent)
if err != nil {
return
}
log.Debugf("layer %s: OS is %s.", ID, OS)
packageList, err := detectors.DetectPackages(data)
if err != nil {
log.Errorf("layer %s: package list could not be determined: %s", ID, err)
return
}
// If there are any packages, that layer modified the package list.
if len(packageList) > 0 {
// It is possible that the OS could not be detected, in the case of a
// first layer setting MAINTAINER only for instance. However, if the OS
// is unknown and packages are detected, we have to return an error.
if OS == "" {
log.Errorf("layer %s: OS is unknown but %d packages have been detected", ID, len(packageList))
err = ErrUnsupported
return
}
// If the layer has no parent, it can only add packages, not remove them.
if parent == nil {
// Build a list of the layer packages' node values.
var installedPackages []*database.Package
for _, p := range packageList {
p.OS = OS
installedPackages = append(installedPackages, p)
}
// Insert that list into the database.
err = database.InsertPackages(installedPackages)
if err != nil {
return
}
// Set the InstalledPackageNodes field on content.
for _, p := range installedPackages {
if p.Node != "" {
installedPackagesNodes = append(installedPackagesNodes, p.Node)
}
}
} else {
installedPackagesNodes, removedPackagesNodes, err = detectAndInsertInstalledAndRemovedPackages(OS, packageList, parent)
if err != nil {
return
}
}
}
log.Debugf("layer %s: detected %d packages: installs %d and removes %d packages", ID, len(packageList), len(installedPackagesNodes), len(removedPackagesNodes))
return
}
// getLayerData downloads/opens a layer archive and extracts it into memory.
func getLayerData(path string) (data map[string][]byte, err error) {
var layerReader io.ReadCloser
if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") {
r, err := http.Get(path)
if err != nil {
return nil, cerrors.ErrCouldNotDownload
}
layerReader = r.Body
} else {
layerReader, err = os.Open(path)
if err != nil {
return nil, cerrors.ErrNotFound
}
}
defer layerReader.Close()
data, err = utils.SelectivelyExtractArchive(layerReader, append(detectors.GetRequiredFilesPackages(), detectors.GetRequiredFilesOS()...), maxFileSize)
if err != nil {
return nil, err
}
return
}
func detectOS(data map[string][]byte, parent *database.Layer) (detectedOS string, err error) {
detectedOS = detectors.DetectOS(data)
// Attempt to detect the OS from the parent layer.
if detectedOS == "" && parent != nil {
detectedOS, err = parent.OperatingSystem()
if err != nil {
return "", err
}
}
// If the detectedOS is not in the supported OS list, the OS is unsupported.
if detectedOS != "" {
isSupported := false
for _, osPrefix := range SupportedOS {
if strings.HasPrefix(detectedOS, osPrefix) {
isSupported = true
break
}
}
if !isSupported {
return "", ErrUnsupported
}
}
return
}
// detectAndInsertInstalledAndRemovedPackages finds the installed and removed
// package nodes and inserts the installed packages into the database.
func detectAndInsertInstalledAndRemovedPackages(detectedOS string, packageList []*database.Package, parent *database.Layer) (installedNodes, removedNodes []string, err error) {
// Get the parent layer's packages.
parentPackageNodes, err := parent.AllPackages()
if err != nil {
return nil, nil, err
}
parentPackages, err := database.FindAllPackagesByNodes(parentPackageNodes, []string{database.FieldPackageName, database.FieldPackageVersion})
if err != nil {
return nil, nil, err
}
// Map detected packages (name:version) string to packages.
packagesNVMapToPackage := make(map[string]*database.Package)
for _, p := range packageList {
packagesNVMapToPackage[p.Name+":"+p.Version.String()] = p
}
// Map parent's packages (name:version) string to nodes.
parentPackagesNVMapToNodes := make(map[string]string)
for _, p := range parentPackages {
parentPackagesNVMapToNodes[p.Name+":"+p.Version.String()] = p.Node
}
// Build a list of the parent layer's packages' node values.
var parentPackagesNV []string
for _, p := range parentPackages {
parentPackagesNV = append(parentPackagesNV, p.Name+":"+p.Version.String())
}
// Build a list of the layer packages' node values.
var layerPackagesNV []string
for _, p := range packageList {
layerPackagesNV = append(layerPackagesNV, p.Name+":"+p.Version.String())
}
// Calculate the installed and removed packages.
removedPackagesNV := utils.CompareStringLists(parentPackagesNV, layerPackagesNV)
installedPackagesNV := utils.CompareStringLists(layerPackagesNV, parentPackagesNV)
// Build a list of all the installed packages.
var installedPackages []*database.Package
for _, nv := range installedPackagesNV {
p, _ := packagesNVMapToPackage[nv]
p.OS = detectedOS
installedPackages = append(installedPackages, p)
}
// Insert that list into the database.
err = database.InsertPackages(installedPackages)
if err != nil {
return nil, nil, err
}
// Build the list of installed package nodes.
for _, p := range installedPackages {
if p.Node != "" {
installedNodes = append(installedNodes, p.Node)
}
}
// Build the list of removed package nodes.
for _, nv := range removedPackagesNV {
node, _ := parentPackagesNVMapToNodes[nv]
removedNodes = append(removedNodes, node)
}
return
}