feat(tvix/store/protos): add Export

Export will traverse a given PathInfo structure, and write the contents
in NAR format to the passed Writer.
It uses directoryLookupFn and blobLookupFn to resolve references.

This is being moved over from nar-bridge. We need to keep the code there
around until we can bump go.mod to storev1 with this merged, but the
tests can already be moved entirely.

Change-Id: Ie0de3077b09344cafa00ff1e2ddb8b52e9e631bc
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9602
Tested-by: BuildkiteCI
Reviewed-by: Brian McGee <brian@bmcgee.ie>
Autosubmit: flokli <flokli@flokli.de>
This commit is contained in:
Florian Klink 2023-10-10 20:04:19 +02:00 committed by clbot
parent 6fe34b7ba0
commit e6ba84ea50
10 changed files with 387 additions and 79 deletions

View file

@ -108,7 +108,7 @@ in
store-protos-go = pkgs.buildGoModule {
name = "store-golang";
src = depot.third_party.gitignoreSource ./store/protos;
vendorHash = "sha256-619ICDpXuDRHRL5XtPlbUoik8yrTDSxoQiVrhsK7UlQ=";
vendorHash = "sha256-qPtEQTd1Vol8vhE10AdwTleTLfYS7xaOir3Ti4MJ+Vc=";
};
# Build the Rust documentation for publishing on docs.tvix.dev.

View file

@ -0,0 +1,98 @@
package exporter_test
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"io"
"os"
"testing"
castorev1pb "code.tvl.fyi/tvix/castore/protos"
"code.tvl.fyi/tvix/nar-bridge/pkg/exporter"
"code.tvl.fyi/tvix/nar-bridge/pkg/importer"
"github.com/stretchr/testify/require"
"lukechampine.com/blake3"
)
func TestFull(t *testing.T) {
// We pipe nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar to the exporter,
// and store all the file contents and directory objects received in two hashmaps.
// We then feed it to the writer, and test we come up with the same NAR file.
f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar")
require.NoError(t, err)
narContents, err := io.ReadAll(f)
require.NoError(t, err)
blobsMap := make(map[string][]byte, 0)
directoriesMap := make(map[string]*castorev1pb.Directory)
pathInfo, err := importer.Import(
context.Background(),
bytes.NewBuffer(narContents),
func(blobReader io.Reader) ([]byte, error) {
// read in contents, we need to put it into filesMap later.
contents, err := io.ReadAll(blobReader)
require.NoError(t, err)
dgst := mustBlobDigest(bytes.NewReader(contents))
// put it in filesMap
blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents
return dgst, nil
},
func(directory *castorev1pb.Directory) ([]byte, error) {
dgst := mustDirectoryDigest(directory)
directoriesMap[base64.StdEncoding.EncodeToString(dgst)] = directory
return dgst, nil
},
)
require.NoError(t, err)
// done populating everything, now actually test the export :-)
var buf bytes.Buffer
err = exporter.Export(
&buf,
pathInfo,
func(directoryDgst []byte) (*castorev1pb.Directory, error) {
d, found := directoriesMap[base64.StdEncoding.EncodeToString(directoryDgst)]
if !found {
panic(fmt.Sprintf("directory %v not found", base64.StdEncoding.EncodeToString(directoryDgst)))
}
return d, nil
},
func(blobDgst []byte) (io.ReadCloser, error) {
blobContents, found := blobsMap[base64.StdEncoding.EncodeToString(blobDgst)]
if !found {
panic(fmt.Sprintf("blob %v not found", base64.StdEncoding.EncodeToString(blobDgst)))
}
return io.NopCloser(bytes.NewReader(blobContents)), nil
},
)
require.NoError(t, err, "exporter shouldn't fail")
require.Equal(t, narContents, buf.Bytes())
}
func mustDirectoryDigest(d *castorev1pb.Directory) []byte {
dgst, err := d.Digest()
if err != nil {
panic(err)
}
return dgst
}
func mustBlobDigest(r io.Reader) []byte {
hasher := blake3.New(32, nil)
_, err := io.Copy(hasher, r)
if err != nil {
panic(err)
}
return hasher.Sum([]byte{})
}

275
tvix/store/protos/export.go Normal file
View file

@ -0,0 +1,275 @@
package storev1
import (
"fmt"
"io"
"path"
castorev1pb "code.tvl.fyi/tvix/castore/protos"
"github.com/nix-community/go-nix/pkg/nar"
)
type DirectoryLookupFn func([]byte) (*castorev1pb.Directory, error)
type BlobLookupFn func([]byte) (io.ReadCloser, error)
// Export will traverse a given PathInfo structure, and write the contents
// in NAR format to the passed Writer.
// It uses directoryLookupFn and blobLookupFn to resolve references.
func Export(
w io.Writer,
pathInfo *PathInfo,
directoryLookupFn DirectoryLookupFn,
blobLookupFn BlobLookupFn,
) error {
// initialize a NAR writer
narWriter, err := nar.NewWriter(w)
if err != nil {
return fmt.Errorf("unable to initialize nar writer: %w", err)
}
defer narWriter.Close()
// populate rootHeader
rootHeader := &nar.Header{
Path: "/",
}
// populate a stack
// we will push paths and directories to it when entering a directory,
// and emit individual elements to the NAR writer, draining the Directory object.
// once it's empty, we can pop it off the stack.
var stackPaths = []string{}
var stackDirectories = []*castorev1pb.Directory{}
// peek at the pathInfo root and assemble the root node and write to writer
// in the case of a regular file, we retrieve and write the contents, close and exit
// in the case of a symlink, we write the symlink, close and exit
switch v := (pathInfo.GetNode().GetNode()).(type) {
case *castorev1pb.Node_File:
rootHeader.Type = nar.TypeRegular
rootHeader.Size = int64(v.File.GetSize())
rootHeader.Executable = v.File.GetExecutable()
err := narWriter.WriteHeader(rootHeader)
if err != nil {
return fmt.Errorf("unable to write root header: %w", err)
}
// if it's a regular file, retrieve and write the contents
blobReader, err := blobLookupFn(v.File.GetDigest())
if err != nil {
return fmt.Errorf("unable to lookup blob: %w", err)
}
defer blobReader.Close()
_, err = io.Copy(narWriter, blobReader)
if err != nil {
return fmt.Errorf("unable to read from blobReader: %w", err)
}
err = blobReader.Close()
if err != nil {
return fmt.Errorf("unable to close content reader: %w", err)
}
err = narWriter.Close()
if err != nil {
return fmt.Errorf("unable to close nar reader: %w", err)
}
return nil
case *castorev1pb.Node_Symlink:
rootHeader.Type = nar.TypeSymlink
rootHeader.LinkTarget = string(v.Symlink.GetTarget())
err := narWriter.WriteHeader(rootHeader)
if err != nil {
return fmt.Errorf("unable to write root header: %w", err)
}
err = narWriter.Close()
if err != nil {
return fmt.Errorf("unable to close nar reader: %w", err)
}
return nil
case *castorev1pb.Node_Directory:
// We have a directory at the root, look it up and put in on the stack.
directory, err := directoryLookupFn(v.Directory.Digest)
if err != nil {
return fmt.Errorf("unable to lookup directory: %w", err)
}
stackDirectories = append(stackDirectories, directory)
stackPaths = append(stackPaths, "/")
err = narWriter.WriteHeader(&nar.Header{
Path: "/",
Type: nar.TypeDirectory,
})
if err != nil {
return fmt.Errorf("error writing header: %w", err)
}
}
// as long as the stack is not empty, we keep running.
for {
if len(stackDirectories) == 0 {
return nil
}
// Peek at the current top of the stack.
topOfStack := stackDirectories[len(stackDirectories)-1]
topOfStackPath := stackPaths[len(stackPaths)-1]
// get the next element that's lexicographically smallest, and drain it from
// the current directory on top of the stack.
nextNode := drainNextNode(topOfStack)
// If nextNode returns nil, there's nothing left in the directory node, so we
// can emit it from the stack.
// Contrary to the import case, we don't emit the node popping from the stack, but when pushing.
if nextNode == nil {
// pop off stack
stackDirectories = stackDirectories[:len(stackDirectories)-1]
stackPaths = stackPaths[:len(stackPaths)-1]
continue
}
switch n := (nextNode).(type) {
case *castorev1pb.DirectoryNode:
err := narWriter.WriteHeader(&nar.Header{
Path: path.Join(topOfStackPath, string(n.GetName())),
Type: nar.TypeDirectory,
})
if err != nil {
return fmt.Errorf("unable to write nar header: %w", err)
}
d, err := directoryLookupFn(n.GetDigest())
if err != nil {
return fmt.Errorf("unable to lookup directory: %w", err)
}
// add to stack
stackDirectories = append(stackDirectories, d)
stackPaths = append(stackPaths, path.Join(topOfStackPath, string(n.GetName())))
case *castorev1pb.FileNode:
err := narWriter.WriteHeader(&nar.Header{
Path: path.Join(topOfStackPath, string(n.GetName())),
Type: nar.TypeRegular,
Size: int64(n.GetSize()),
Executable: n.GetExecutable(),
})
if err != nil {
return fmt.Errorf("unable to write nar header: %w", err)
}
// copy file contents
contentReader, err := blobLookupFn(n.GetDigest())
if err != nil {
return fmt.Errorf("unable to get blob: %w", err)
}
defer contentReader.Close()
_, err = io.Copy(narWriter, contentReader)
if err != nil {
return fmt.Errorf("unable to copy contents from contentReader: %w", err)
}
err = contentReader.Close()
if err != nil {
return fmt.Errorf("unable to close content reader: %w", err)
}
case *castorev1pb.SymlinkNode:
err := narWriter.WriteHeader(&nar.Header{
Path: path.Join(topOfStackPath, string(n.GetName())),
Type: nar.TypeSymlink,
LinkTarget: string(n.GetTarget()),
})
if err != nil {
return fmt.Errorf("unable to write nar header: %w", err)
}
}
}
}
// drainNextNode will drain a directory message with one of its child nodes,
// whichever comes first alphabetically.
func drainNextNode(d *castorev1pb.Directory) interface{} {
switch v := (smallestNode(d)).(type) {
case *castorev1pb.DirectoryNode:
d.Directories = d.Directories[1:]
return v
case *castorev1pb.FileNode:
d.Files = d.Files[1:]
return v
case *castorev1pb.SymlinkNode:
d.Symlinks = d.Symlinks[1:]
return v
case nil:
return nil
default:
panic("invalid type encountered")
}
}
// smallestNode will return the node from a directory message,
// whichever comes first alphabetically.
func smallestNode(d *castorev1pb.Directory) interface{} {
childDirectories := d.GetDirectories()
childFiles := d.GetFiles()
childSymlinks := d.GetSymlinks()
if len(childDirectories) > 0 {
if len(childFiles) > 0 {
if len(childSymlinks) > 0 {
// directories,files,symlinks
return smallerNode(smallerNode(childDirectories[0], childFiles[0]), childSymlinks[0])
} else {
// directories,files,!symlinks
return smallerNode(childDirectories[0], childFiles[0])
}
} else {
// directories,!files
if len(childSymlinks) > 0 {
// directories,!files,symlinks
return smallerNode(childDirectories[0], childSymlinks[0])
} else {
// directories,!files,!symlinks
return childDirectories[0]
}
}
} else {
// !directories
if len(childFiles) > 0 {
// !directories,files
if len(childSymlinks) > 0 {
// !directories,files,symlinks
return smallerNode(childFiles[0], childSymlinks[0])
} else {
// !directories,files,!symlinks
return childFiles[0]
}
} else {
//!directories,!files
if len(childSymlinks) > 0 {
//!directories,!files,symlinks
return childSymlinks[0]
} else {
//!directories,!files,!symlinks
return nil
}
}
}
}
// smallerNode compares two nodes by their name,
// and returns the one with the smaller name.
// both nodes may not be nil, we do check for these cases in smallestNode.
func smallerNode(a interface{ GetName() []byte }, b interface{ GetName() []byte }) interface{ GetName() []byte } {
if string(a.GetName()) < string(b.GetName()) {
return a
} else {
return b
}
}

View file

@ -1,17 +1,12 @@
package exporter_test
package storev1_test
import (
"bytes"
"context"
"encoding/base64"
"fmt"
"io"
"os"
"testing"
castorev1pb "code.tvl.fyi/tvix/castore/protos"
"code.tvl.fyi/tvix/nar-bridge/pkg/exporter"
"code.tvl.fyi/tvix/nar-bridge/pkg/importer"
storev1pb "code.tvl.fyi/tvix/store/protos"
"github.com/stretchr/testify/require"
"lukechampine.com/blake3"
@ -49,14 +44,14 @@ func TestSymlink(t *testing.T) {
var buf bytes.Buffer
err := exporter.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) {
err := storev1pb.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) {
panic("no directories expected")
}, func([]byte) (io.ReadCloser, error) {
panic("no files expected")
})
require.NoError(t, err, "exporter shouldn't fail")
f, err := os.Open("../../testdata/symlink.nar")
f, err := os.Open("testdata/symlink.nar")
require.NoError(t, err)
bytesExpected, err := io.ReadAll(f)
@ -90,7 +85,7 @@ func TestRegular(t *testing.T) {
var buf bytes.Buffer
err := exporter.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) {
err := storev1pb.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) {
panic("no directories expected")
}, func(blobRef []byte) (io.ReadCloser, error) {
if !bytes.Equal(blobRef, BLAKE3_DIGEST_0X01) {
@ -100,7 +95,7 @@ func TestRegular(t *testing.T) {
})
require.NoError(t, err, "exporter shouldn't fail")
f, err := os.Open("../../testdata/onebyteregular.nar")
f, err := os.Open("testdata/onebyteregular.nar")
require.NoError(t, err)
bytesExpected, err := io.ReadAll(f)
@ -134,7 +129,7 @@ func TestEmptyDirectory(t *testing.T) {
var buf bytes.Buffer
err := exporter.Export(&buf, pathInfo, func(directoryRef []byte) (*castorev1pb.Directory, error) {
err := storev1pb.Export(&buf, pathInfo, func(directoryRef []byte) (*castorev1pb.Directory, error) {
if !bytes.Equal(directoryRef, emptyDirectoryDigest) {
panic("unexpected directoryRef")
}
@ -144,7 +139,7 @@ func TestEmptyDirectory(t *testing.T) {
})
require.NoError(t, err, "exporter shouldn't fail")
f, err := os.Open("../../testdata/emptydirectory.nar")
f, err := os.Open("testdata/emptydirectory.nar")
require.NoError(t, err)
bytesExpected, err := io.ReadAll(f)
@ -154,67 +149,3 @@ func TestEmptyDirectory(t *testing.T) {
require.Equal(t, bytesExpected, buf.Bytes(), "expected nar contents to match")
}
func TestFull(t *testing.T) {
// We pipe nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar to the exporter,
// and store all the file contents and directory objects received in two hashmaps.
// We then feed it to the writer, and test we come up with the same NAR file.
f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar")
require.NoError(t, err)
narContents, err := io.ReadAll(f)
require.NoError(t, err)
blobsMap := make(map[string][]byte, 0)
directoriesMap := make(map[string]*castorev1pb.Directory)
pathInfo, err := importer.Import(
context.Background(),
bytes.NewBuffer(narContents),
func(blobReader io.Reader) ([]byte, error) {
// read in contents, we need to put it into filesMap later.
contents, err := io.ReadAll(blobReader)
require.NoError(t, err)
dgst := mustBlobDigest(bytes.NewReader(contents))
// put it in filesMap
blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents
return dgst, nil
},
func(directory *castorev1pb.Directory) ([]byte, error) {
dgst := mustDirectoryDigest(directory)
directoriesMap[base64.StdEncoding.EncodeToString(dgst)] = directory
return dgst, nil
},
)
require.NoError(t, err)
// done populating everything, now actually test the export :-)
var buf bytes.Buffer
err = exporter.Export(
&buf,
pathInfo,
func(directoryDgst []byte) (*castorev1pb.Directory, error) {
d, found := directoriesMap[base64.StdEncoding.EncodeToString(directoryDgst)]
if !found {
panic(fmt.Sprintf("directory %v not found", base64.StdEncoding.EncodeToString(directoryDgst)))
}
return d, nil
},
func(blobDgst []byte) (io.ReadCloser, error) {
blobContents, found := blobsMap[base64.StdEncoding.EncodeToString(blobDgst)]
if !found {
panic(fmt.Sprintf("blob %v not found", base64.StdEncoding.EncodeToString(blobDgst)))
}
return io.NopCloser(bytes.NewReader(blobContents)), nil
},
)
require.NoError(t, err, "exporter shouldn't fail")
require.Equal(t, narContents, buf.Bytes())
}

View file

@ -4,10 +4,12 @@ go 1.19
require (
code.tvl.fyi/tvix/castore/protos v0.0.0-20230922125121-72355662d742
github.com/google/go-cmp v0.5.6
github.com/nix-community/go-nix v0.0.0-20231009143713-ebca3299475b
github.com/stretchr/testify v1.8.1
google.golang.org/grpc v1.51.0
google.golang.org/protobuf v1.31.0
lukechampine.com/blake3 v1.1.7
)
require (
@ -18,7 +20,7 @@ require (
golang.org/x/net v0.7.0 // indirect
golang.org/x/sys v0.5.0 // indirect
golang.org/x/text v0.7.0 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
lukechampine.com/blake3 v1.1.7 // indirect
)

View file

@ -28,6 +28,7 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/nix-community/go-nix v0.0.0-20231009143713-ebca3299475b h1:AWEKOdDO3JnHApQDOmONEKLXbMCQJhYJJfJpiWB9VGI=
@ -70,6 +71,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=

View file

@ -1,4 +1,4 @@
package exporter
package storev1
import (
"testing"

Binary file not shown.

Binary file not shown.

BIN
tvix/store/protos/testdata/symlink.nar vendored Normal file

Binary file not shown.