diff --git a/tools/nixery/README.md b/tools/nixery/README.md new file mode 100644 index 000000000..6b1db4696 --- /dev/null +++ b/tools/nixery/README.md @@ -0,0 +1,99 @@ +# Nixery + +This package implements a Docker-compatible container registry that is capable +of transparently building and serving container images using [Nix][]. + +The project started out with the intention of becoming a Kubernetes controller +that can serve declarative image specifications specified in CRDs as container +images. The design for this is outlined in [a public gist][gist]. + +Currently it focuses on the ad-hoc creation of container images as outlined +below with an example instance available at +[nixery.appspot.com](https://nixery.appspot.com). + +This is not an officially supported Google project. + +## Ad-hoc container images + +Nixery supports building images on-demand based on the *image name*. Every +package that the user intends to include in the image is specified as a path +component of the image name. + +The path components refer to top-level keys in `nixpkgs` and are used to build a +container image using Nix's [buildLayeredImage][] functionality. + +The special meta-package `shell` provides an image base with many core +components (such as `bash` and `coreutils`) that users commonly expect in +interactive images. + +## Usage example + +Using the publicly available Nixery instance at `nixery.appspot.com`, one could +retrieve a container image containing `curl` and an interactive shell like this: + +```shell +tazjin@tazbox:~$ sudo docker run -ti nixery.appspot.com/shell/curl bash +Unable to find image 'nixery.appspot.com/shell/curl:latest' locally +latest: Pulling from shell/curl +7734b79e1ba1: Already exists +b0d2008d18cd: Pull complete +< ... some layers omitted ...> +Digest: sha256:178270bfe84f74548b6a43347d73524e5c2636875b673675db1547ec427cf302 +Status: Downloaded newer image for nixery.appspot.com/shell/curl:latest +bash-4.4# curl --version +curl 7.64.0 (x86_64-pc-linux-gnu) libcurl/7.64.0 OpenSSL/1.0.2q zlib/1.2.11 libssh2/1.8.0 nghttp2/1.35.1 +``` + +## Known issues + +* Initial build times for an image can be somewhat slow while Nixery retrieves + the required derivations from the Nix cache under-the-hood. + + Due to how the Docker Registry API works, there is no way to provide + feedback to the user during this period - hence the UX (in interactive mode) + is currently that "nothing is happening" for a while after the `Unable to + find image` message is printed. + +* For some reason these images do not currently work in GKE clusters. + Launching a Kubernetes pod that uses a Nixery image results in an error + stating `unable to convert a nil pointer to a runtime API image: + ImageInspectError`. + + This error comes from + [here](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockershim/convert.go#L35) + and it occurs *after* the Kubernetes node has retrieved the image from + Nixery (as per the Nixery logs). + +## Kubernetes integration (in the future) + +**Note**: The Kubernetes integration is not yet implemented. + +The basic idea of the Kubernetes integration is to provide a way for users to +specify the contents of a container image as an API object in Kubernetes which +will be transparently built by Nix when the container is started up. + +For example, given a resource that looks like this: + +```yaml +--- +apiVersion: k8s.nixos.org/v1alpha +kind: NixImage +metadata: + name: curl-and-jq +data: + tag: v1 + contents: + - curl + - jq + - bash +``` + +One could create a container that references the `curl-and-jq` image, which will +then be created by Nix when the container image is pulled. + +The controller itself runs as a daemonset on every node in the cluster, +providing a host-mounted `/nix/store`-folder for caching purposes. + +[Nix]: https://nixos.org/ +[gist]: https://gist.github.com/tazjin/08f3d37073b3590aacac424303e6f745 +[buildLayeredImage]: https://grahamc.com/blog/nix-and-layered-docker-images diff --git a/tools/nixery/app.yaml b/tools/nixery/app.yaml new file mode 100644 index 000000000..223fa7582 --- /dev/null +++ b/tools/nixery/app.yaml @@ -0,0 +1,14 @@ +env: flex +runtime: custom + +resources: + cpu: 2 + memory_gb: 4 + disk_size_gb: 50 + +automatic_scaling: + max_num_instances: 3 + cool_down_period_sec: 60 + +env_variables: + BUCKET: "nixery-layers" diff --git a/tools/nixery/build-registry-image.nix b/tools/nixery/build-registry-image.nix new file mode 100644 index 000000000..11030d38a --- /dev/null +++ b/tools/nixery/build-registry-image.nix @@ -0,0 +1,167 @@ +# This file contains a modified version of dockerTools.buildImage that, instead +# of outputting a single tarball which can be imported into a running Docker +# daemon, builds a manifest file that can be used for serving the image over a +# registry API. + +{ + # Image Name + name, + # Image tag, the Nix's output hash will be used if null + tag ? null, + # Files to put on the image (a nix store path or list of paths). + contents ? [], + # Packages to install by name (which must refer to top-level attributes of + # nixpkgs). This is passed in as a JSON-array in string form. + packages ? "[]", + # Optional bash script to run on the files prior to fixturizing the layer. + extraCommands ? "", uid ? 0, gid ? 0, + # Docker's lowest maximum layer limit is 42-layers for an old + # version of the AUFS graph driver. We pick 24 to ensure there is + # plenty of room for extension. I believe the actual maximum is + # 128. + maxLayers ? 24, + # Nix package set to use + pkgs ? (import {}) +}: + +# Since this is essentially a re-wrapping of some of the functionality that is +# implemented in the dockerTools, we need all of its components in our top-level +# namespace. +with pkgs; +with dockerTools; + +let + tarLayer = "application/vnd.docker.image.rootfs.diff.tar"; + baseName = baseNameOf name; + + # deepFetch traverses the top-level Nix package set to retrieve an item via a + # path specified in string form. + # + # For top-level items, the name of the key yields the result directly. Nested + # items are fetched by using dot-syntax, as in Nix itself. + # + # For example, `deepFetch pkgs "xorg.xev"` retrieves `pkgs.xorg.xev`. + deepFetch = s: n: + let path = lib.strings.splitString "." n; + err = builtins.throw "Could not find '${n}' in package set"; + in lib.attrsets.attrByPath path err s; + + # allContents is the combination of all derivations and store paths passed in + # directly, as well as packages referred to by name. + allContents = contents ++ (map (deepFetch pkgs) (builtins.fromJSON packages)); + + contentsEnv = symlinkJoin { + name = "bulk-layers"; + paths = allContents; + }; + + # The image build infrastructure expects to be outputting a slightly different + # format than the one we serve over the registry protocol. To work around its + # expectations we need to provide an empty JSON file that it can write some + # fun data into. + emptyJson = writeText "empty.json" "{}"; + + bulkLayers = mkManyPureLayers { + name = baseName; + configJson = emptyJson; + closure = writeText "closure" "${contentsEnv} ${emptyJson}"; + # One layer will be taken up by the customisationLayer, so + # take up one less. + maxLayers = maxLayers - 1; + }; + + customisationLayer = mkCustomisationLayer { + name = baseName; + contents = contentsEnv; + baseJson = emptyJson; + inherit uid gid extraCommands; + }; + + # Inspect the returned bulk layers to determine which layers belong to the + # image and how to serve them. + # + # This computes both an MD5 and a SHA256 hash of each layer, which are used + # for different purposes. See the registry server implementation for details. + # + # Some of this logic is copied straight from `buildLayeredImage`. + allLayersJson = runCommand "fs-layer-list.json" { + buildInputs = [ coreutils findutils jq openssl ]; + } '' + find ${bulkLayers} -mindepth 1 -maxdepth 1 | sort -t/ -k5 -n > layer-list + echo ${customisationLayer} >> layer-list + + for layer in $(cat layer-list); do + layerPath="$layer/layer.tar" + layerSha256=$(sha256sum $layerPath | cut -d ' ' -f1) + # The server application compares binary MD5 hashes and expects base64 + # encoding instead of hex. + layerMd5=$(openssl dgst -md5 -binary $layerPath | openssl enc -base64) + layerSize=$(wc -c $layerPath | cut -d ' ' -f1) + + jq -n -c --arg sha256 $layerSha256 --arg md5 $layerMd5 --arg size $layerSize --arg path $layerPath \ + '{ size: ($size | tonumber), sha256: $sha256, md5: $md5, path: $path }' >> fs-layers + done + + cat fs-layers | jq -s -c '.' > $out + ''; + allLayers = builtins.fromJSON (builtins.readFile allLayersJson); + + # Image configuration corresponding to the OCI specification for the file type + # 'application/vnd.oci.image.config.v1+json' + config = { + architecture = "amd64"; + os = "linux"; + rootfs.type = "layers"; + rootfs.diff_ids = map (layer: "sha256:${layer.sha256}") allLayers; + }; + configJson = writeText "${baseName}-config.json" (builtins.toJSON config); + configMetadata = with builtins; fromJSON (readFile (runCommand "config-meta" { + buildInputs = [ jq openssl ]; + } '' + size=$(wc -c ${configJson} | cut -d ' ' -f1) + sha256=$(sha256sum ${configJson} | cut -d ' ' -f1) + md5=$(openssl dgst -md5 -binary $layerPath | openssl enc -base64) + jq -n -c --arg size $size --arg sha256 $sha256 --arg md5 $md5 \ + '{ size: ($size | tonumber), sha256: $sha256, md5: $md5 }' \ + >> $out + '')); + + # Corresponds to the manifest JSON expected by the Registry API. + # + # This is Docker's "Image Manifest V2, Schema 2": + # https://docs.docker.com/registry/spec/manifest-v2-2/ + manifest = { + schemaVersion = 2; + mediaType = "application/vnd.docker.distribution.manifest.v2+json"; + + config = { + mediaType = "application/vnd.docker.container.image.v1+json"; + size = configMetadata.size; + digest = "sha256:${configMetadata.sha256}"; + }; + + layers = map (layer: { + mediaType = tarLayer; + digest = "sha256:${layer.sha256}"; + size = layer.size; + }) allLayers; + }; + + # This structure maps each layer digest to the actual tarball that will need + # to be served. It is used by the controller to cache the paths during a pull. + layerLocations = { + "${configMetadata.sha256}" = { + path = configJson; + md5 = configMetadata.md5; + }; + } // (builtins.listToAttrs (map (layer: { + name = "${layer.sha256}"; + value = { + path = layer.path; + md5 = layer.md5; + }; + }) allLayers)); + +in writeText "manifest-output.json" (builtins.toJSON { + inherit manifest layerLocations; +}) diff --git a/tools/nixery/index.html b/tools/nixery/index.html new file mode 100644 index 000000000..ebec9968c --- /dev/null +++ b/tools/nixery/index.html @@ -0,0 +1,90 @@ + + + + + + Nixery + + + +
+

Nixery

+ +
+

What is this?

+

+ Nixery provides the ability to pull ad-hoc container images from a Docker-compatible registry + server. The image names specify the contents the image should contain, which are then + retrieved and built by the Nix package manager. +

+

+ Nix is also responsible for the creation of the container images themselves. To do this it + uses an interesting layering strategy described in + this blog post. +

+

How does it work?

+

+ Simply point your local Docker installation (or other compatible registry client) at Nixery + and ask for an image with the contents you desire. Image contents are path separated in the + name, so for example if you needed an image that contains a shell and emacs you + could pull it as such: +

+

+ nixery.appspot.com/shell/emacs25-nox +

+

+ Image tags are currently ignored. Every package name needs to correspond to a key in the + nixpkgs package set. +

+

+ There are some special meta-packages which you must specify as the + first package in an image. These are: +

+ +

+ Hence if you needed an interactive image with, for example, htop installed you + could run docker run -ti nixery.appspot.com/shell/htop bash. +

+

FAQ

+

+ Technically speaking none of these are frequently-asked questions (because no questions have + been asked so far), but I'm going to take a guess at a few anyways: +

+ + + diff --git a/tools/nixery/main.go b/tools/nixery/main.go new file mode 100644 index 000000000..29b22f301 --- /dev/null +++ b/tools/nixery/main.go @@ -0,0 +1,309 @@ +// Package main provides the implementation of a container registry that +// transparently builds container images based on Nix derivations. +// +// The Nix derivation used for image creation is responsible for creating +// objects that are compatible with the registry API. The targeted registry +// protocol is currently Docker's. +// +// When an image is requested, the required contents are parsed out of the +// request and a Nix-build is initiated that eventually responds with the +// manifest as well as information linking each layer digest to a local +// filesystem path. +// +// Nixery caches the filesystem paths and returns the manifest to the client. +// Subsequent requests for layer content per digest are then fulfilled by +// serving the files from disk. +package main + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "log" + "net/http" + "os" + "os/exec" + "regexp" + "strings" + + "cloud.google.com/go/storage" +) + +// ManifestMediaType stores the Content-Type used for the manifest itself. This +// corresponds to the "Image Manifest V2, Schema 2" described on this page: +// +// https://docs.docker.com/registry/spec/manifest-v2-2/ +const ManifestMediaType string = "application/vnd.docker.distribution.manifest.v2+json" + +// Image represents the information necessary for building a container image. This can +// be either a list of package names (corresponding to keys in the nixpkgs set) or a +// Nix expression that results in a *list* of derivations. +type image struct { + // Name of the container image. + name string + + // Names of packages to include in the image. These must correspond directly to + // top-level names of Nix packages in the nixpkgs tree. + packages []string +} + +// BuildResult represents the output of calling the Nix derivation responsible for building +// registry images. +// +// The `layerLocations` field contains the local filesystem paths to each individual image layer +// that will need to be served, while the `manifest` field contains the JSON-representation of +// the manifest that needs to be served to the client. +// +// The later field is simply treated as opaque JSON and passed through. +type BuildResult struct { + Manifest json.RawMessage `json:"manifest"` + LayerLocations map[string]struct { + Path string `json:"path"` + Md5 []byte `json:"md5"` + } `json:"layerLocations"` +} + +// imageFromName parses an image name into the corresponding structure which can +// be used to invoke Nix. +// +// It will expand convenience names under the hood (see the `convenienceNames` function below). +func imageFromName(name string) image { + packages := strings.Split(name, "/") + return image{ + name: name, + packages: convenienceNames(packages), + } +} + +// convenienceNames expands convenience package names defined by Nixery which let users +// include commonly required sets of tools in a container quickly. +// +// Convenience names must be specified as the first package in an image. +// +// Currently defined convenience names are: +// +// * `shell`: Includes bash, coreutils and other common command-line tools +// * `builder`: Includes the standard build environment, as well as everything from `shell` +func convenienceNames(packages []string) []string { + shellPackages := []string{"bashInteractive", "coreutils", "moreutils", "nano"} + builderPackages := append(shellPackages, "stdenv") + + if packages[0] == "shell" { + return append(packages[1:], shellPackages...) + } else if packages[0] == "builder" { + return append(packages[1:], builderPackages...) + } else { + return packages + } +} + +// Call out to Nix and request that an image be built. Nix will, upon success, return +// a manifest for the container image. +func buildImage(image *image, ctx *context.Context, bucket *storage.BucketHandle) ([]byte, error) { + // This file is made available at runtime via Blaze. See the `data` declaration in `BUILD` + nixPath := "experimental/users/tazjin/nixery/build-registry-image.nix" + + packages, err := json.Marshal(image.packages) + if err != nil { + return nil, err + } + + cmd := exec.Command("nix-build", "--no-out-link", "--show-trace", "--argstr", "name", image.name, "--argstr", "packages", string(packages), nixPath) + + outpipe, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + errpipe, err := cmd.StderrPipe() + if err != nil { + return nil, err + } + + if err = cmd.Start(); err != nil { + log.Println("Error starting nix-build:", err) + return nil, err + } + log.Printf("Started Nix image build for ''%s'", image.name) + + stdout, _ := ioutil.ReadAll(outpipe) + stderr, _ := ioutil.ReadAll(errpipe) + + if err = cmd.Wait(); err != nil { + // TODO(tazjin): Propagate errors upwards in a usable format. + log.Printf("nix-build execution error: %s\nstdout: %s\nstderr: %s\n", err, stdout, stderr) + return nil, err + } + + log.Println("Finished Nix image build") + + buildOutput, err := ioutil.ReadFile(strings.TrimSpace(string(stdout))) + if err != nil { + return nil, err + } + + // The build output returned by Nix is deserialised to add all contained layers to the + // bucket. Only the manifest itself is re-serialised to JSON and returned. + var result BuildResult + err = json.Unmarshal(buildOutput, &result) + if err != nil { + return nil, err + } + + for layer, meta := range result.LayerLocations { + err = uploadLayer(ctx, bucket, layer, meta.Path, meta.Md5) + if err != nil { + return nil, err + } + } + + return json.Marshal(result.Manifest) +} + +// uploadLayer uploads a single layer to Cloud Storage bucket. Before writing any data +// the bucket is probed to see if the file already exists. +// +// If the file does exist, its MD5 hash is verified to ensure that the stored file is +// not - for example - a fragment of a previous, incomplete upload. +func uploadLayer(ctx *context.Context, bucket *storage.BucketHandle, layer string, path string, md5 []byte) error { + layerKey := fmt.Sprintf("layers/%s", layer) + obj := bucket.Object(layerKey) + + // Before uploading a layer to the bucket, probe whether it already exists. + // + // If it does and the MD5 checksum matches the expected one, the layer upload + // can be skipped. + attrs, err := obj.Attrs(*ctx) + + if err == nil && bytes.Equal(attrs.MD5, md5) { + log.Printf("Layer sha256:%s already exists in bucket, skipping upload", layer) + } else { + writer := obj.NewWriter(*ctx) + file, err := os.Open(path) + + if err != nil { + return fmt.Errorf("failed to open layer %s from path %s: %v", layer, path, err) + } + + size, err := io.Copy(writer, file) + if err != nil { + return fmt.Errorf("failed to write layer %s to Cloud Storage: %v", layer, err) + } + + if err = writer.Close(); err != nil { + return fmt.Errorf("failed to write layer %s to Cloud Storage: %v", layer, err) + } + + log.Printf("Uploaded layer sha256:%s (%v bytes written)\n", layer, size) + } + + return nil +} + +// layerRedirect constructs the public URL of the layer object in the Cloud Storage bucket +// and redirects the client there. +// +// The Docker client is known to follow redirects, but this might not be true for all other +// registry clients. +func layerRedirect(w http.ResponseWriter, bucket string, digest string) { + log.Printf("Redirecting layer '%s' request to bucket '%s'\n", digest, bucket) + url := fmt.Sprintf("https://storage.googleapis.com/%s/layers/%s", bucket, digest) + w.Header().Set("Location", url) + w.WriteHeader(303) +} + +// prepareBucket configures the handle to a Cloud Storage bucket in which individual layers will be +// stored after Nix builds. Nixery does not directly serve layers to registry clients, instead it +// redirects them to the public URLs of the Cloud Storage bucket. +// +// The bucket is required for Nixery to function correctly, hence fatal errors are generated in case +// it fails to be set up correctly. +func prepareBucket(ctx *context.Context, bucket string) *storage.BucketHandle { + client, err := storage.NewClient(*ctx) + if err != nil { + log.Fatalln("Failed to set up Cloud Storage client:", err) + } + + bkt := client.Bucket(bucket) + + if _, err := bkt.Attrs(*ctx); err != nil { + log.Fatalln("Could not access configured bucket", err) + } + + return bkt +} + +var manifestRegex = regexp.MustCompile(`^/v2/([\w|\-|\.|\_|\/]+)/manifests/(\w+)$`) +var layerRegex = regexp.MustCompile(`^/v2/([\w|\-|\.|\_|\/]+)/blobs/sha256:(\w+)$`) + +func main() { + bucketName := os.Getenv("BUCKET") + if bucketName == "" { + log.Fatalln("GCS bucket for layer storage must be specified") + } + + port := os.Getenv("PORT") + if port == "" { + port = "5726" + } + + ctx := context.Background() + bucket := prepareBucket(&ctx, bucketName) + + log.Printf("Starting Kubernetes Nix controller on port %s\n", port) + + log.Fatal(http.ListenAndServe(":"+port, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // When running on AppEngine, HTTP traffic should be redirected to HTTPS. + // + // This is achieved here by enforcing HSTS (with a one week duration) on responses. + if r.Header.Get("X-Forwarded-Proto") == "http" && strings.Contains(r.Host, "appspot.com") { + w.Header().Add("Strict-Transport-Security", "max-age=604800") + } + + // Serve an index page to anyone who visits the registry's base URL: + if r.RequestURI == "/" { + index, _ := ioutil.ReadFile("experimental/users/tazjin/nixery/index.html") + w.Header().Add("Content-Type", "text/html") + w.Write(index) + return + } + + // Acknowledge that we speak V2 + if r.RequestURI == "/v2/" { + fmt.Fprintln(w) + return + } + + // Serve the manifest (straight from Nix) + manifestMatches := manifestRegex.FindStringSubmatch(r.RequestURI) + if len(manifestMatches) == 3 { + imageName := manifestMatches[1] + log.Printf("Requesting manifest for image '%s'", imageName) + image := imageFromName(manifestMatches[1]) + manifest, err := buildImage(&image, &ctx, bucket) + + if err != nil { + log.Println("Failed to build image manifest", err) + return + } + + w.Header().Add("Content-Type", ManifestMediaType) + w.Write(manifest) + return + } + + // Serve an image layer. For this we need to first ask Nix for the + // manifest, then proceed to extract the correct layer from it. + layerMatches := layerRegex.FindStringSubmatch(r.RequestURI) + if len(layerMatches) == 3 { + digest := layerMatches[2] + layerRedirect(w, bucketName, digest) + return + } + + w.WriteHeader(404) + }))) +}