refactor: Remove remaining MD5-hash mentions and computations

This commit is contained in:
Vincent Ambo 2019-10-03 12:11:46 +01:00 committed by Vincent Ambo
parent f6b40ed6c7
commit 53906024ff
5 changed files with 18 additions and 30 deletions

View file

@ -137,11 +137,10 @@ let
buildInputs = with pkgs; [ coreutils jq openssl ]; buildInputs = with pkgs; [ coreutils jq openssl ];
}'' }''
layerSha256=$(sha256sum ${symlinkLayer} | cut -d ' ' -f1) layerSha256=$(sha256sum ${symlinkLayer} | cut -d ' ' -f1)
layerMd5=$(openssl dgst -md5 -binary ${symlinkLayer} | openssl enc -base64)
layerSize=$(stat --printf '%s' ${symlinkLayer}) layerSize=$(stat --printf '%s' ${symlinkLayer})
jq -n -c --arg sha256 $layerSha256 --arg md5 $layerMd5 --arg size $layerSize --arg path ${symlinkLayer} \ jq -n -c --arg sha256 $layerSha256 --arg size $layerSize --arg path ${symlinkLayer} \
'{ size: ($size | tonumber), sha256: $sha256, md5: $md5, path: $path }' >> $out '{ size: ($size | tonumber), sha256: $sha256, path: $path }' >> $out
'')); ''));
# Final output structure returned to Nixery if the build succeeded # Final output structure returned to Nixery if the build succeeded

View file

@ -46,9 +46,8 @@ They are stored content-addressably at `$BUCKET/layers/$SHA256HASH` and layer
requests sent to Nixery will redirect directly to this storage location. requests sent to Nixery will redirect directly to this storage location.
The effect of this cache is that Nixery does not need to upload identical layers The effect of this cache is that Nixery does not need to upload identical layers
repeatedly. When Nixery notices that a layer already exists in GCS, it will use repeatedly. When Nixery notices that a layer already exists in GCS it will skip
the object metadata to compare its MD5-hash with the locally computed one and uploading this layer.
skip uploading.
Removing layers from the cache is *potentially problematic* if there are cached Removing layers from the cache is *potentially problematic* if there are cached
manifests or layer builds referencing those layers. manifests or layer builds referencing those layers.
@ -61,8 +60,8 @@ reference these layers.
Layer builds are cached at `$BUCKET/builds/$HASH`, where `$HASH` is a SHA1 of Layer builds are cached at `$BUCKET/builds/$HASH`, where `$HASH` is a SHA1 of
the Nix store paths included in the layer. the Nix store paths included in the layer.
The content of the cached entries is a JSON-object that contains the MD5 and The content of the cached entries is a JSON-object that contains the SHA256
SHA256 hashes of the built layer. hashes and sizes of the built layer.
The effect of this cache is that different instances of Nixery will not build, The effect of this cache is that different instances of Nixery will not build,
hash and upload layers that have identical contents across different instances. hash and upload layers that have identical contents across different instances.

View file

@ -67,8 +67,7 @@ just ... hang, for a moment.
Nixery inspects the returned manifest and uploads each layer to the configured Nixery inspects the returned manifest and uploads each layer to the configured
[Google Cloud Storage][gcs] bucket. To avoid unnecessary uploading, it will [Google Cloud Storage][gcs] bucket. To avoid unnecessary uploading, it will
first check whether layers are already present in the bucket and - just to be check whether layers are already present in the bucket.
safe - compare their MD5-hashes against what was built.
## 4. The image manifest is sent back ## 4. The image manifest is sent back

View file

@ -21,7 +21,6 @@ import (
"bufio" "bufio"
"bytes" "bytes"
"context" "context"
"crypto/md5"
"crypto/sha256" "crypto/sha256"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -108,7 +107,6 @@ type ImageResult struct {
SymlinkLayer struct { SymlinkLayer struct {
Size int `json:"size"` Size int `json:"size"`
SHA256 string `json:"sha256"` SHA256 string `json:"sha256"`
MD5 string `json:"md5"`
Path string `json:"path"` Path string `json:"path"`
} `json:"symlinkLayer"` } `json:"symlinkLayer"`
} }
@ -328,8 +326,7 @@ func uploadHashLayer(ctx context.Context, s *State, key string, data io.Reader)
// algorithms and uploads to the bucket // algorithms and uploads to the bucket
sw := staging.NewWriter(ctx) sw := staging.NewWriter(ctx)
shasum := sha256.New() shasum := sha256.New()
md5sum := md5.New() multi := io.MultiWriter(sw, shasum)
multi := io.MultiWriter(sw, shasum, md5sum)
size, err := io.Copy(multi, data) size, err := io.Copy(multi, data)
if err != nil { if err != nil {
@ -342,27 +339,24 @@ func uploadHashLayer(ctx context.Context, s *State, key string, data io.Reader)
return nil, err return nil, err
} }
build := Build{ sha256sum := fmt.Sprintf("%x", shasum.Sum([]byte{}))
SHA256: fmt.Sprintf("%x", shasum.Sum([]byte{})),
MD5: fmt.Sprintf("%x", md5sum.Sum([]byte{})),
}
// Hashes are now known and the object is in the bucket, what // Hashes are now known and the object is in the bucket, what
// remains is to move it to the correct location and cache it. // remains is to move it to the correct location and cache it.
err = renameObject(ctx, s, "staging/"+key, "layers/"+build.SHA256) err = renameObject(ctx, s, "staging/"+key, "layers/"+sha256sum)
if err != nil { if err != nil {
log.Printf("failed to move layer '%s' from staging: %s\n", key, err) log.Printf("failed to move layer '%s' from staging: %s\n", key, err)
return nil, err return nil, err
} }
cacheBuild(ctx, s, key, build) log.Printf("Uploaded layer sha256:%s (%v bytes written)", sha256sum, size)
log.Printf("Uploaded layer sha256:%s (%v bytes written)", build.SHA256, size) entry := manifest.Entry{
Digest: "sha256:" + sha256sum,
return &manifest.Entry{
Digest: "sha256:" + build.SHA256,
Size: size, Size: size,
}, nil }
return &entry, nil
} }
func BuildImage(ctx context.Context, s *State, image *Image) (*BuildResult, error) { func BuildImage(ctx context.Context, s *State, image *Image) (*BuildResult, error) {

View file

@ -3,7 +3,6 @@
package manifest package manifest
import ( import (
"crypto/md5"
"crypto/sha256" "crypto/sha256"
"encoding/json" "encoding/json"
"fmt" "fmt"
@ -52,12 +51,11 @@ type imageConfig struct {
} }
// ConfigLayer represents the configuration layer to be included in // ConfigLayer represents the configuration layer to be included in
// the manifest, containing its JSON-serialised content and the SHA256 // the manifest, containing its JSON-serialised content and SHA256
// & MD5 hashes of its input. // hash.
type ConfigLayer struct { type ConfigLayer struct {
Config []byte Config []byte
SHA256 string SHA256 string
MD5 string
} }
// imageConfig creates an image configuration with the values set to // imageConfig creates an image configuration with the values set to
@ -78,7 +76,6 @@ func configLayer(hashes []string) ConfigLayer {
return ConfigLayer{ return ConfigLayer{
Config: j, Config: j,
SHA256: fmt.Sprintf("%x", sha256.Sum256(j)), SHA256: fmt.Sprintf("%x", sha256.Sum256(j)),
MD5: fmt.Sprintf("%x", md5.Sum(j)),
} }
} }