feat(group-layers): Finish layering algorithm implementation
This commit adds the actual logic for extracting layer groupings and merging them until the layer budget is satisfied. The implementation conforms to the design doc as of the time of this commit.
This commit is contained in:
parent
590ce994bb
commit
56a426952c
1 changed files with 103 additions and 58 deletions
|
@ -65,12 +65,11 @@
|
||||||
//
|
//
|
||||||
// If the list of layers fits within the layer budget, it is returned.
|
// If the list of layers fits within the layer budget, it is returned.
|
||||||
//
|
//
|
||||||
// Otherwise layers are merged together in this order:
|
// Otherwise, a merge rating is calculated for each layer. This is the
|
||||||
|
// product of the layer's total size and its root node's popularity.
|
||||||
//
|
//
|
||||||
// * layers whose root meets neither condition above
|
// Layers are then merged in ascending order of merge ratings until
|
||||||
// * layers whose root is popular
|
// they fit into the layer budget.
|
||||||
// * layers whose root is big
|
|
||||||
// * layers whose root meets both conditions
|
|
||||||
//
|
//
|
||||||
// # Threshold values
|
// # Threshold values
|
||||||
//
|
//
|
||||||
|
@ -109,10 +108,10 @@ import (
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sort"
|
||||||
|
|
||||||
"gonum.org/v1/gonum/graph/simple"
|
|
||||||
"gonum.org/v1/gonum/graph/flow"
|
"gonum.org/v1/gonum/graph/flow"
|
||||||
"gonum.org/v1/gonum/graph/encoding/dot"
|
"gonum.org/v1/gonum/graph/simple"
|
||||||
)
|
)
|
||||||
|
|
||||||
// closureGraph represents the structured attributes Nix outputs when asking it
|
// closureGraph represents the structured attributes Nix outputs when asking it
|
||||||
|
@ -123,7 +122,7 @@ type exportReferences struct {
|
||||||
} `json:"exportReferencesGraph"`
|
} `json:"exportReferencesGraph"`
|
||||||
|
|
||||||
Graph []struct {
|
Graph []struct {
|
||||||
Size uint64 `json:"closureSize`
|
Size uint64 `json:"closureSize"`
|
||||||
Path string `json:"path"`
|
Path string `json:"path"`
|
||||||
Refs []string `json:"references"`
|
Refs []string `json:"references"`
|
||||||
} `json:"graph"`
|
} `json:"graph"`
|
||||||
|
@ -136,14 +135,26 @@ type exportReferences struct {
|
||||||
// of the nixpkgs tree.
|
// of the nixpkgs tree.
|
||||||
type pkgsMetadata = map[string]int
|
type pkgsMetadata = map[string]int
|
||||||
|
|
||||||
|
// layer represents the data returned for each layer that Nix should
|
||||||
|
// build for the container image.
|
||||||
|
type layer struct {
|
||||||
|
Contents []string `json:"contents"`
|
||||||
|
mergeRating uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a layer) merge(b layer) layer {
|
||||||
|
a.Contents = append(a.Contents, b.Contents...)
|
||||||
|
a.mergeRating += b.mergeRating
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
|
||||||
// closure as pointed to by the graph nodes.
|
// closure as pointed to by the graph nodes.
|
||||||
type closure struct {
|
type closure struct {
|
||||||
GraphID int64
|
GraphID int64
|
||||||
Path string
|
Path string
|
||||||
Size uint64
|
Size uint64
|
||||||
Refs []string
|
Refs []string
|
||||||
Popularity int
|
Popularity int
|
||||||
// TODO(tazjin): popularity and other funny business
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *closure) ID() int64 {
|
func (c *closure) ID() int64 {
|
||||||
|
@ -151,6 +162,7 @@ func (c *closure) ID() int64 {
|
||||||
}
|
}
|
||||||
|
|
||||||
var nixRegexp = regexp.MustCompile(`^/nix/store/[a-z0-9]+-`)
|
var nixRegexp = regexp.MustCompile(`^/nix/store/[a-z0-9]+-`)
|
||||||
|
|
||||||
func (c *closure) DOTID() string {
|
func (c *closure) DOTID() string {
|
||||||
return nixRegexp.ReplaceAllString(c.Path, "")
|
return nixRegexp.ReplaceAllString(c.Path, "")
|
||||||
}
|
}
|
||||||
|
@ -158,29 +170,30 @@ func (c *closure) DOTID() string {
|
||||||
// bigOrPopular checks whether this closure should be considered for
|
// bigOrPopular checks whether this closure should be considered for
|
||||||
// separation into its own layer, even if it would otherwise only
|
// separation into its own layer, even if it would otherwise only
|
||||||
// appear in a subtree of the dominator tree.
|
// appear in a subtree of the dominator tree.
|
||||||
func (c *closure) bigOrPopular(pkgs *pkgsMetadata) bool {
|
func (c *closure) bigOrPopular() bool {
|
||||||
const sizeThreshold = 100 * 1000000 // 100MB
|
const sizeThreshold = 100 * 1000000 // 100MB
|
||||||
|
|
||||||
if c.Size > sizeThreshold {
|
if c.Size > sizeThreshold {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(tazjin): After generating the full data, this should
|
// The threshold value used here is currently roughly the
|
||||||
// be changed to something other than a simple inclusion
|
// minimum number of references that only 1% of packages in
|
||||||
// (currently the test-data only contains the top 200
|
// the entire package set have.
|
||||||
// packages).
|
//
|
||||||
pop, ok := (*pkgs)[c.DOTID()]
|
// TODO(tazjin): Do this more elegantly by calculating
|
||||||
if ok {
|
// percentiles for each package and using those instead.
|
||||||
log.Printf("%q is popular!\n", c.DOTID())
|
if c.Popularity >= 1000 {
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
c.Popularity = pop
|
|
||||||
return ok
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func insertEdges(graph *simple.DirectedGraph, pop *pkgsMetadata, cmap *map[string]*closure, node *closure) {
|
func insertEdges(graph *simple.DirectedGraph, cmap *map[string]*closure, node *closure) {
|
||||||
// Big or popular nodes get a separate edge from the top to
|
// Big or popular nodes get a separate edge from the top to
|
||||||
// flag them for their own layer.
|
// flag them for their own layer.
|
||||||
if node.bigOrPopular(pop) && !graph.HasEdgeFromTo(0, node.ID()) {
|
if node.bigOrPopular() && !graph.HasEdgeFromTo(0, node.ID()) {
|
||||||
edge := graph.NewEdge(graph.Node(0), node)
|
edge := graph.NewEdge(graph.Node(0), node)
|
||||||
graph.SetEdge(edge)
|
graph.SetEdge(edge)
|
||||||
}
|
}
|
||||||
|
@ -205,18 +218,24 @@ func buildGraph(refs *exportReferences, pop *pkgsMetadata) *simple.DirectedGraph
|
||||||
//
|
//
|
||||||
// A map from store paths to IDs is kept to actually insert
|
// A map from store paths to IDs is kept to actually insert
|
||||||
// edges below.
|
// edges below.
|
||||||
root := &closure {
|
root := &closure{
|
||||||
GraphID: 0,
|
GraphID: 0,
|
||||||
Path: "image_root",
|
Path: "image_root",
|
||||||
}
|
}
|
||||||
graph.AddNode(root)
|
graph.AddNode(root)
|
||||||
|
|
||||||
for idx, c := range refs.Graph {
|
for idx, c := range refs.Graph {
|
||||||
node := &closure {
|
node := &closure{
|
||||||
GraphID: int64(idx + 1), // inc because of root node
|
GraphID: int64(idx + 1), // inc because of root node
|
||||||
Path: c.Path,
|
Path: c.Path,
|
||||||
Size: c.Size,
|
Size: c.Size,
|
||||||
Refs: c.Refs,
|
Refs: c.Refs,
|
||||||
|
}
|
||||||
|
|
||||||
|
if p, ok := (*pop)[node.DOTID()]; ok {
|
||||||
|
node.Popularity = p
|
||||||
|
} else {
|
||||||
|
node.Popularity = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
graph.AddNode(node)
|
graph.AddNode(node)
|
||||||
|
@ -231,49 +250,74 @@ func buildGraph(refs *exportReferences, pop *pkgsMetadata) *simple.DirectedGraph
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cmap {
|
for _, c := range cmap {
|
||||||
insertEdges(graph, pop, &cmap, c)
|
insertEdges(graph, &cmap, c)
|
||||||
}
|
}
|
||||||
|
|
||||||
// gv, err := dot.Marshal(graph, "deps", "", "")
|
|
||||||
// if err != nil {
|
|
||||||
// log.Fatalf("Could not encode graph: %s\n", err)
|
|
||||||
// }
|
|
||||||
// fmt.Print(string(gv))
|
|
||||||
// os.Exit(0)
|
|
||||||
|
|
||||||
return graph
|
return graph
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extracts a subgraph starting at the specified root from the
|
||||||
|
// dominator tree. The subgraph is converted into a flat list of
|
||||||
|
// layers, each containing the store paths and merge rating.
|
||||||
|
func groupLayer(dt *flow.DominatorTree, root *closure) layer {
|
||||||
|
size := root.Size
|
||||||
|
contents := []string{root.Path}
|
||||||
|
children := dt.DominatedBy(root.ID())
|
||||||
|
|
||||||
|
// This iteration does not use 'range' because the list being
|
||||||
|
// iterated is modified during the iteration (yes, I'm sorry).
|
||||||
|
for i := 0; i < len(children); i++ {
|
||||||
|
child := children[i].(*closure)
|
||||||
|
size += child.Size
|
||||||
|
contents = append(contents, child.Path)
|
||||||
|
children = append(children, dt.DominatedBy(child.ID())...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return layer{
|
||||||
|
Contents: contents,
|
||||||
|
// TODO(tazjin): The point of this is to factor in
|
||||||
|
// both the size and the popularity when making merge
|
||||||
|
// decisions, but there might be a smarter way to do
|
||||||
|
// it than a plain multiplication.
|
||||||
|
mergeRating: uint64(root.Popularity) * size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate the dominator tree of the entire package set and group
|
// Calculate the dominator tree of the entire package set and group
|
||||||
// each top-level subtree into a layer.
|
// each top-level subtree into a layer.
|
||||||
func dominate(graph *simple.DirectedGraph) {
|
//
|
||||||
|
// Layers are merged together until they fit into the layer budget,
|
||||||
|
// based on their merge rating.
|
||||||
|
func dominate(budget int, graph *simple.DirectedGraph) []layer {
|
||||||
dt := flow.Dominators(graph.Node(0), graph)
|
dt := flow.Dominators(graph.Node(0), graph)
|
||||||
|
|
||||||
// convert dominator tree back into encodable graph
|
var layers []layer
|
||||||
dg := simple.NewDirectedGraph()
|
for _, n := range dt.DominatedBy(dt.Root().ID()) {
|
||||||
|
layers = append(layers, groupLayer(&dt, n.(*closure)))
|
||||||
for nodes := graph.Nodes(); nodes.Next(); {
|
|
||||||
dg.AddNode(nodes.Node())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for nodes := dg.Nodes(); nodes.Next(); {
|
sort.Slice(layers, func(i, j int) bool {
|
||||||
node := nodes.Node()
|
return layers[i].mergeRating < layers[j].mergeRating
|
||||||
for _, child := range dt.DominatedBy(node.ID()) {
|
})
|
||||||
edge := dg.NewEdge(node, child)
|
|
||||||
dg.SetEdge(edge)
|
if len(layers) > budget {
|
||||||
}
|
log.Printf("Ideal image has %v layers, but budget is %v\n", len(layers), budget)
|
||||||
}
|
}
|
||||||
|
|
||||||
gv, err := dot.Marshal(dg, "deps", "", "")
|
for len(layers) > budget {
|
||||||
if err != nil {
|
merged := layers[0].merge(layers[1])
|
||||||
log.Fatalf("Could not encode graph: %s\n", err)
|
layers[1] = merged
|
||||||
|
layers = layers[1:]
|
||||||
}
|
}
|
||||||
ioutil.WriteFile("graph.dot", gv, 0644)
|
|
||||||
|
return layers
|
||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
graphFile := flag.String("graph", ".attrs.json", "Input file containing graph")
|
graphFile := flag.String("graph", ".attrs.json", "Input file containing graph")
|
||||||
popFile := flag.String("pop", "popularity.json", "Package popularity data")
|
popFile := flag.String("pop", "popularity.json", "Package popularity data")
|
||||||
|
outFile := flag.String("out", "layers.json", "File to write layers to")
|
||||||
|
layerBudget := flag.Int("budget", 94, "Total layer budget available")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Parse graph data
|
// Parse graph data
|
||||||
|
@ -300,8 +344,9 @@ func main() {
|
||||||
log.Fatalf("Failed to deserialise input: %s\n", err)
|
log.Fatalf("Failed to deserialise input: %s\n", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Printf("%v\n", pop)
|
|
||||||
|
|
||||||
graph := buildGraph(&refs, &pop)
|
graph := buildGraph(&refs, &pop)
|
||||||
dominate(graph)
|
layers := dominate(*layerBudget, graph)
|
||||||
|
|
||||||
|
j, _ := json.Marshal(layers)
|
||||||
|
ioutil.WriteFile(*outFile, j, 0644)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue