migration: added migration to ancestry schema
This commit is contained in:
parent
33c623427f
commit
318c85ed20
287
database/pgsql/migrations/00009_ancestry_schema.go
Normal file
287
database/pgsql/migrations/00009_ancestry_schema.go
Normal file
@ -0,0 +1,287 @@
|
||||
// Copyright 2016 clair authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package migrations
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/guregu/null/zero"
|
||||
lru "github.com/hashicorp/golang-lru"
|
||||
"github.com/lib/pq"
|
||||
"github.com/remind101/migrate"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// on error, the function will rollback every migration changes in database.
|
||||
func up(tx *sql.Tx) error {
|
||||
err := migrate.Queries([]string{
|
||||
`CREATE TABLE IF NOT EXISTS ancestry (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(128) NOT NULL,
|
||||
engineversion INT NOT NULL,
|
||||
unique(name, engineversion)
|
||||
)`,
|
||||
`INSERT INTO ancestry(name, engineversion) SELECT name, engineversion FROM layer`,
|
||||
`CREATE TABLE IF NOT EXISTS layer_ancestry(
|
||||
id SERIAL PRIMARY KEY,
|
||||
hash VARCHAR(128) NOT NULL,
|
||||
ancestry_id INT NOT NULL,
|
||||
ancestry_index INT NOT NULL,
|
||||
unique(ancestry_id, ancestry_index)
|
||||
)`,
|
||||
`CREATE INDEX ON ancestry(name)`,
|
||||
`CREATE INDEX ON layer_ancestry(ancestry_id)`,
|
||||
`CREATE INDEX ON layer_ancestry(hash)`,
|
||||
})(tx)
|
||||
countLayerNum := `SELECT count(id) FROM layer`
|
||||
selectLayer := `
|
||||
SELECT layer.name, layer.id, parent_id, ancestry.id, layer.engineversion
|
||||
FROM layer, ancestry
|
||||
WHERE ancestry.name = layer.name
|
||||
AND layer.id >= $1
|
||||
ORDER BY layer.id LIMIT $2`
|
||||
|
||||
selectAncestry := `
|
||||
SELECT layer_ancestry.hash, layer.engineversion
|
||||
FROM ancestry, layer_ancestry, layer
|
||||
WHERE layer.id = $1
|
||||
AND ancestry.name = layer.name
|
||||
AND layer_ancestry.ancestry_id = ancestry.id
|
||||
ORDER BY layer_ancestry.ancestry_index`
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
totalCount := 0
|
||||
count := 0
|
||||
err = tx.QueryRow(countLayerNum).Scan(&totalCount)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
logrus.Debugf("processing %d layers", totalCount)
|
||||
batchSize := 9600
|
||||
nextID := 0
|
||||
cache := newCache(960000)
|
||||
|
||||
// until no layer is selected from database
|
||||
for {
|
||||
// for statistics
|
||||
cachehit := 0
|
||||
startBatch := time.Now()
|
||||
|
||||
rows, err := tx.Query(selectLayer, nextID, batchSize)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// query a batch of layers to compute ancestries
|
||||
var (
|
||||
layerBatch = make([]layerBuffer, 0, batchSize)
|
||||
)
|
||||
|
||||
startLayerSel := time.Now()
|
||||
for rows.Next() {
|
||||
layer := layerBuffer{}
|
||||
err = rows.Scan(&layer.hash, &layer.id, &layer.parentID, &layer.ancestryID, &layer.engineversion)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
// the migration is based on assumption that a layer's parent id
|
||||
// should be less than the layer's id
|
||||
if layer.parentID.Valid && int(layer.parentID.Int64) > layer.id {
|
||||
rows.Close()
|
||||
return errors.New("unsupported in migration: layer id > layer's parent id")
|
||||
}
|
||||
layerBatch = append(layerBatch, layer)
|
||||
}
|
||||
|
||||
err = rows.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
count += len(layerBatch)
|
||||
if len(layerBatch) == 0 {
|
||||
break
|
||||
} else {
|
||||
// set next layer batch's starting id
|
||||
nextID = layerBatch[len(layerBatch)-1].id + 1
|
||||
}
|
||||
layerSelCost := time.Since(startLayerSel)
|
||||
|
||||
ancestrySel := time.Now()
|
||||
// construct ancestries for every layer
|
||||
ancestryBatch := []ancestryBuffer{}
|
||||
for _, layer := range layerBatch {
|
||||
var (
|
||||
ancestry ancestryBuffer
|
||||
incache bool
|
||||
layerHash string
|
||||
layerEngineversion int
|
||||
)
|
||||
|
||||
if layer.parentID.Valid {
|
||||
if ancestry, incache = cache.get(int(layer.parentID.Int64)); !incache {
|
||||
ancestryRows, err := tx.Query(selectAncestry, layer.parentID.Int64)
|
||||
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
for ancestryRows.Next() {
|
||||
err := ancestryRows.Scan(&layerHash, &layerEngineversion)
|
||||
if err != nil {
|
||||
rows.Close()
|
||||
ancestryRows.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
// enforce every layer to have the same
|
||||
// engineversion in an ancestry
|
||||
if layerEngineversion != layer.engineversion {
|
||||
rows.Close()
|
||||
ancestryRows.Close()
|
||||
return errors.New("Ancestry engineversion mismatch")
|
||||
}
|
||||
|
||||
ancestry.layers = append(ancestry.layers, layerHash)
|
||||
}
|
||||
|
||||
ancestryRows.Close()
|
||||
ancestry.layers = ancestry.layers[:]
|
||||
cache.add(int(layer.parentID.Int64), ancestry)
|
||||
} else {
|
||||
cachehit++
|
||||
}
|
||||
}
|
||||
ancestry.id = layer.ancestryID
|
||||
ancestry.layers = append(ancestry.layers, layer.hash)
|
||||
cache.add(layer.id, ancestry)
|
||||
ancestryBatch = append(ancestryBatch, ancestry)
|
||||
}
|
||||
ancestrySelCost := time.Since(ancestrySel)
|
||||
ancestryIns := time.Now()
|
||||
// batch insert ancestries
|
||||
insertAncestry, err := tx.Prepare(pq.CopyIn("layer_ancestry", "hash", "ancestry_id", "ancestry_index"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, ancestry := range ancestryBatch {
|
||||
for i, layerHash := range ancestry.layers {
|
||||
_, err := insertAncestry.Exec(layerHash, ancestry.id, i)
|
||||
if err != nil {
|
||||
insertAncestry.Close()
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, err = insertAncestry.Exec()
|
||||
if err != nil {
|
||||
insertAncestry.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
err = insertAncestry.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
ancestryInsCost := time.Since(ancestryIns)
|
||||
logrus.WithFields(logrus.Fields{
|
||||
"progress": fmt.Sprintf("%f%%(%d/%d)", 100*float32(count)/float32(totalCount), count, totalCount),
|
||||
"layer sel": fmt.Sprintf("%v", layerSelCost),
|
||||
"ancestry sel": fmt.Sprintf("%v", ancestrySelCost),
|
||||
"ancestry ins": fmt.Sprintf("%v", ancestryInsCost),
|
||||
"total cost": fmt.Sprint(time.Since(startBatch)),
|
||||
"cache hit": fmt.Sprintf("%f%%", 100*float32(cachehit)/float32(len(ancestryBatch))),
|
||||
}).Debugln("finished ancestry")
|
||||
}
|
||||
|
||||
err = migrate.Queries([]string{
|
||||
`ALTER TABLE layer_ancestry
|
||||
ADD CONSTRAINT layer_ancestry_ancestry_id_fkey
|
||||
FOREIGN KEY (ancestry_id)
|
||||
REFERENCES ancestry
|
||||
ON DELETE CASCADE`,
|
||||
})(tx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newCache(size int) ancestryCache {
|
||||
cache, err := lru.NewARC(size)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return &arcCache{lru: cache}
|
||||
}
|
||||
|
||||
type layerBuffer struct {
|
||||
id int
|
||||
hash string
|
||||
parentID zero.Int
|
||||
ancestryID int
|
||||
engineversion int
|
||||
}
|
||||
|
||||
type ancestryBuffer struct {
|
||||
id int
|
||||
layers []string
|
||||
}
|
||||
|
||||
type ancestryCache interface {
|
||||
add(id int, ancestry ancestryBuffer)
|
||||
get(id int) (ancestryBuffer, bool)
|
||||
}
|
||||
|
||||
type arcCache struct {
|
||||
lru *lru.ARCCache
|
||||
}
|
||||
|
||||
func (c *arcCache) add(layerID int, ancestry ancestryBuffer) {
|
||||
c.lru.Add(layerID, ancestry)
|
||||
}
|
||||
|
||||
func (c *arcCache) get(layerID int) (ancestryBuffer, bool) {
|
||||
v, ok := c.lru.Get(layerID)
|
||||
if !ok {
|
||||
return ancestryBuffer{}, ok
|
||||
}
|
||||
r, ok := v.(ancestryBuffer)
|
||||
if !ok {
|
||||
panic("invalid cache")
|
||||
}
|
||||
return r, ok
|
||||
}
|
||||
|
||||
func init() {
|
||||
RegisterMigration(migrate.Migration{
|
||||
ID: 9,
|
||||
Up: up,
|
||||
Down: migrate.Queries([]string{
|
||||
`DROP TABLE IF EXISTS layer_ancestry`,
|
||||
`DROP TABLE IF EXISTS ancestry`,
|
||||
}),
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue
Block a user