c727b3ca9e
This uses a significantly larger percentage of the total available layers (125) than before, which means that cache hits for layers become more likely between images.
292 lines
11 KiB
Nix
292 lines
11 KiB
Nix
# Copyright 2019 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# https://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# This file contains a modified version of dockerTools.buildImage that, instead
|
|
# of outputting a single tarball which can be imported into a running Docker
|
|
# daemon, builds a manifest file that can be used for serving the image over a
|
|
# registry API.
|
|
|
|
{
|
|
# Image Name
|
|
name,
|
|
# Image tag, the Nix's output hash will be used if null
|
|
tag ? null,
|
|
# Files to put on the image (a nix store path or list of paths).
|
|
contents ? [],
|
|
# Packages to install by name (which must refer to top-level attributes of
|
|
# nixpkgs). This is passed in as a JSON-array in string form.
|
|
packages ? "[]",
|
|
# Optional bash script to run on the files prior to fixturizing the layer.
|
|
extraCommands ? "", uid ? 0, gid ? 0,
|
|
# Docker's modern image storage mechanisms have a maximum of 125
|
|
# layers. To allow for some extensibility (via additional layers),
|
|
# the default here is set to something a little less than that.
|
|
maxLayers ? 96,
|
|
|
|
# Configuration for which package set to use when building.
|
|
#
|
|
# Both channels of the public nixpkgs repository as well as imports
|
|
# from private repositories are supported.
|
|
#
|
|
# This setting can be invoked with three different formats:
|
|
#
|
|
# 1. nixpkgs!$channel (e.g. nixpkgs!nixos-19.03)
|
|
# 2. git!$repo!$rev (e.g. git!git@github.com:NixOS/nixpkgs.git!master)
|
|
# 3. path!$path (e.g. path!/var/local/nixpkgs)
|
|
#
|
|
# '!' was chosen as the separator because `builtins.split` does not
|
|
# support regex escapes and there are few other candidates. It
|
|
# doesn't matter much because this is invoked by the server.
|
|
pkgSource ? "nixpkgs!nixos-19.03"
|
|
}:
|
|
|
|
let
|
|
# If a nixpkgs channel is requested, it is retrieved from Github (as
|
|
# a tarball) and imported.
|
|
fetchImportChannel = channel:
|
|
let url = "https://github.com/NixOS/nixpkgs-channels/archive/${channel}.tar.gz";
|
|
in import (builtins.fetchTarball url) {};
|
|
|
|
# If a git repository is requested, it is retrieved via
|
|
# builtins.fetchGit which defaults to the git configuration of the
|
|
# outside environment. This means that user-configured SSH
|
|
# credentials etc. are going to work as expected.
|
|
fetchImportGit = url: rev:
|
|
let
|
|
# builtins.fetchGit needs to know whether 'rev' is a reference
|
|
# (e.g. a branch/tag) or a revision (i.e. a commit hash)
|
|
#
|
|
# Since this data is being extrapolated from the supplied image
|
|
# tag, we have to guess if we want to avoid specifying a format.
|
|
#
|
|
# There are some additional caveats around whether the default
|
|
# branch contains the specified revision, which need to be
|
|
# explained to users.
|
|
spec = if (builtins.stringLength rev) == 40 then {
|
|
inherit url rev;
|
|
} else {
|
|
inherit url;
|
|
ref = rev;
|
|
};
|
|
in import (builtins.fetchGit spec) {};
|
|
|
|
importPath = path: import (builtins.toPath path) {};
|
|
|
|
source = builtins.split "!" pkgSource;
|
|
sourceType = builtins.elemAt source 0;
|
|
pkgs = with builtins;
|
|
if sourceType == "nixpkgs"
|
|
then fetchImportChannel (elemAt source 2)
|
|
else if sourceType == "git"
|
|
then fetchImportGit (elemAt source 2) (elemAt source 4)
|
|
else if sourceType == "path"
|
|
then importPath (elemAt source 2)
|
|
else builtins.throw("Invalid package set source specification: ${pkgSource}");
|
|
in
|
|
|
|
# Since this is essentially a re-wrapping of some of the functionality that is
|
|
# implemented in the dockerTools, we need all of its components in our top-level
|
|
# namespace.
|
|
with builtins;
|
|
with pkgs;
|
|
with dockerTools;
|
|
|
|
let
|
|
tarLayer = "application/vnd.docker.image.rootfs.diff.tar";
|
|
baseName = baseNameOf name;
|
|
|
|
# deepFetch traverses the top-level Nix package set to retrieve an item via a
|
|
# path specified in string form.
|
|
#
|
|
# For top-level items, the name of the key yields the result directly. Nested
|
|
# items are fetched by using dot-syntax, as in Nix itself.
|
|
#
|
|
# Due to a restriction of the registry API specification it is not possible to
|
|
# pass uppercase characters in an image name, however the Nix package set
|
|
# makes use of camelCasing repeatedly (for example for `haskellPackages`).
|
|
#
|
|
# To work around this, if no value is found on the top-level a second lookup
|
|
# is done on the package set using lowercase-names. This is not done for
|
|
# nested sets, as they often have keys that only differ in case.
|
|
#
|
|
# For example, `deepFetch pkgs "xorg.xev"` retrieves `pkgs.xorg.xev` and
|
|
# `deepFetch haskellpackages.stylish-haskell` retrieves
|
|
# `haskellPackages.stylish-haskell`.
|
|
deepFetch = with lib; s: n:
|
|
let path = splitString "." n;
|
|
err = { error = "not_found"; pkg = n; };
|
|
# The most efficient way I've found to do a lookup against
|
|
# case-differing versions of an attribute is to first construct a
|
|
# mapping of all lowercased attribute names to their differently cased
|
|
# equivalents.
|
|
#
|
|
# This map is then used for a second lookup if the top-level
|
|
# (case-sensitive) one does not yield a result.
|
|
hasUpper = str: (match ".*[A-Z].*" str) != null;
|
|
allUpperKeys = filter hasUpper (attrNames s);
|
|
lowercased = listToAttrs (map (k: {
|
|
name = toLower k;
|
|
value = k;
|
|
}) allUpperKeys);
|
|
caseAmendedPath = map (v: if hasAttr v lowercased then lowercased."${v}" else v) path;
|
|
fetchLower = attrByPath caseAmendedPath err s;
|
|
in attrByPath path fetchLower s;
|
|
|
|
# allContents is the combination of all derivations and store paths passed in
|
|
# directly, as well as packages referred to by name.
|
|
#
|
|
# It accumulates potential errors about packages that could not be found to
|
|
# return this information back to the server.
|
|
allContents =
|
|
# Folds over the results of 'deepFetch' on all requested packages to
|
|
# separate them into errors and content. This allows the program to
|
|
# terminate early and return only the errors if any are encountered.
|
|
let splitter = attrs: res:
|
|
if hasAttr "error" res
|
|
then attrs // { errors = attrs.errors ++ [ res ]; }
|
|
else attrs // { contents = attrs.contents ++ [ res ]; };
|
|
init = { inherit contents; errors = []; };
|
|
fetched = (map (deepFetch pkgs) (fromJSON packages));
|
|
in foldl' splitter init fetched;
|
|
|
|
contentsEnv = symlinkJoin {
|
|
name = "bulk-layers";
|
|
paths = allContents.contents;
|
|
};
|
|
|
|
# The image build infrastructure expects to be outputting a slightly different
|
|
# format than the one we serve over the registry protocol. To work around its
|
|
# expectations we need to provide an empty JSON file that it can write some
|
|
# fun data into.
|
|
emptyJson = writeText "empty.json" "{}";
|
|
|
|
bulkLayers = mkManyPureLayers {
|
|
name = baseName;
|
|
configJson = emptyJson;
|
|
closure = writeText "closure" "${contentsEnv} ${emptyJson}";
|
|
# One layer will be taken up by the customisationLayer, so
|
|
# take up one less.
|
|
maxLayers = maxLayers - 1;
|
|
};
|
|
|
|
customisationLayer = mkCustomisationLayer {
|
|
name = baseName;
|
|
contents = contentsEnv;
|
|
baseJson = emptyJson;
|
|
inherit uid gid extraCommands;
|
|
};
|
|
|
|
# Inspect the returned bulk layers to determine which layers belong to the
|
|
# image and how to serve them.
|
|
#
|
|
# This computes both an MD5 and a SHA256 hash of each layer, which are used
|
|
# for different purposes. See the registry server implementation for details.
|
|
#
|
|
# Some of this logic is copied straight from `buildLayeredImage`.
|
|
allLayersJson = runCommand "fs-layer-list.json" {
|
|
buildInputs = [ coreutils findutils jq openssl ];
|
|
} ''
|
|
find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list
|
|
echo ${customisationLayer} >> layer-list
|
|
|
|
for layer in $(cat layer-list); do
|
|
layerPath="$layer/layer.tar"
|
|
layerSha256=$(sha256sum $layerPath | cut -d ' ' -f1)
|
|
# The server application compares binary MD5 hashes and expects base64
|
|
# encoding instead of hex.
|
|
layerMd5=$(openssl dgst -md5 -binary $layerPath | openssl enc -base64)
|
|
layerSize=$(wc -c $layerPath | cut -d ' ' -f1)
|
|
|
|
jq -n -c --arg sha256 $layerSha256 --arg md5 $layerMd5 --arg size $layerSize --arg path $layerPath \
|
|
'{ size: ($size | tonumber), sha256: $sha256, md5: $md5, path: $path }' >> fs-layers
|
|
done
|
|
|
|
cat fs-layers | jq -s -c '.' > $out
|
|
'';
|
|
allLayers = fromJSON (readFile allLayersJson);
|
|
|
|
# Image configuration corresponding to the OCI specification for the file type
|
|
# 'application/vnd.oci.image.config.v1+json'
|
|
config = {
|
|
architecture = "amd64";
|
|
os = "linux";
|
|
rootfs.type = "layers";
|
|
rootfs.diff_ids = map (layer: "sha256:${layer.sha256}") allLayers;
|
|
# Required to let Kubernetes import Nixery images
|
|
config = {};
|
|
};
|
|
configJson = writeText "${baseName}-config.json" (toJSON config);
|
|
configMetadata = fromJSON (readFile (runCommand "config-meta" {
|
|
buildInputs = [ jq openssl ];
|
|
} ''
|
|
size=$(wc -c ${configJson} | cut -d ' ' -f1)
|
|
sha256=$(sha256sum ${configJson} | cut -d ' ' -f1)
|
|
md5=$(openssl dgst -md5 -binary ${configJson} | openssl enc -base64)
|
|
jq -n -c --arg size $size --arg sha256 $sha256 --arg md5 $md5 \
|
|
'{ size: ($size | tonumber), sha256: $sha256, md5: $md5 }' \
|
|
>> $out
|
|
''));
|
|
|
|
# Corresponds to the manifest JSON expected by the Registry API.
|
|
#
|
|
# This is Docker's "Image Manifest V2, Schema 2":
|
|
# https://docs.docker.com/registry/spec/manifest-v2-2/
|
|
manifest = {
|
|
schemaVersion = 2;
|
|
mediaType = "application/vnd.docker.distribution.manifest.v2+json";
|
|
|
|
config = {
|
|
mediaType = "application/vnd.docker.container.image.v1+json";
|
|
size = configMetadata.size;
|
|
digest = "sha256:${configMetadata.sha256}";
|
|
};
|
|
|
|
layers = map (layer: {
|
|
mediaType = tarLayer;
|
|
digest = "sha256:${layer.sha256}";
|
|
size = layer.size;
|
|
}) allLayers;
|
|
};
|
|
|
|
# This structure maps each layer digest to the actual tarball that will need
|
|
# to be served. It is used by the controller to cache the paths during a pull.
|
|
layerLocations = {
|
|
"${configMetadata.sha256}" = {
|
|
path = configJson;
|
|
md5 = configMetadata.md5;
|
|
};
|
|
} // (listToAttrs (map (layer: {
|
|
name = "${layer.sha256}";
|
|
value = {
|
|
path = layer.path;
|
|
md5 = layer.md5;
|
|
};
|
|
}) allLayers));
|
|
|
|
# Final output structure returned to the controller in the case of a
|
|
# successful build.
|
|
manifestOutput = {
|
|
inherit manifest layerLocations;
|
|
};
|
|
|
|
# Output structure returned if errors occured during the build. Currently the
|
|
# only error type that is returned in a structured way is 'not_found'.
|
|
errorOutput = {
|
|
error = "not_found";
|
|
pkgs = map (err: err.pkg) allContents.errors;
|
|
};
|
|
in writeText "manifest-output.json" (if (length allContents.errors) == 0
|
|
then toJSON (trace manifestOutput manifestOutput)
|
|
else toJSON (trace errorOutput errorOutput)
|
|
)
|