2023-10-03 11:57:03 +02:00
|
|
|
package importer
|
2022-11-19 21:34:49 +01:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"crypto/sha256"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"path"
|
|
|
|
"strings"
|
|
|
|
|
2023-10-17 22:41:33 +02:00
|
|
|
castorev1pb "code.tvl.fyi/tvix/castore-go"
|
2022-11-19 21:34:49 +01:00
|
|
|
"github.com/nix-community/go-nix/pkg/nar"
|
|
|
|
)
|
|
|
|
|
|
|
|
// An item on the directories stack
|
2023-10-03 11:42:48 +02:00
|
|
|
type stackItem struct {
|
2022-11-19 21:34:49 +01:00
|
|
|
path string
|
2023-09-22 15:38:10 +02:00
|
|
|
directory *castorev1pb.Directory
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
|
2023-10-11 12:28:10 +02:00
|
|
|
// Import reads a NAR from a reader, and returns a the root node,
|
|
|
|
// NAR size and NAR sha256 digest.
|
2023-10-03 11:57:03 +02:00
|
|
|
func Import(
|
|
|
|
// a context, to support cancellation
|
2022-11-19 21:34:49 +01:00
|
|
|
ctx context.Context,
|
2023-10-03 11:57:03 +02:00
|
|
|
// The reader the data is read from
|
|
|
|
r io.Reader,
|
2022-11-19 21:34:49 +01:00
|
|
|
// callback function called with each regular file content
|
2023-10-03 12:59:13 +02:00
|
|
|
blobCb func(fileReader io.Reader) ([]byte, error),
|
2022-11-19 21:34:49 +01:00
|
|
|
// callback function called with each finalized directory node
|
2023-10-03 12:59:13 +02:00
|
|
|
directoryCb func(directory *castorev1pb.Directory) ([]byte, error),
|
2023-10-11 12:28:10 +02:00
|
|
|
) (*castorev1pb.Node, uint64, []byte, error) {
|
2023-10-03 13:23:46 +02:00
|
|
|
// We need to wrap the underlying reader a bit.
|
|
|
|
// - we want to keep track of the number of bytes read in total
|
|
|
|
// - we calculate the sha256 digest over all data read
|
|
|
|
// Express these two things in a MultiWriter, and give the NAR reader a
|
|
|
|
// TeeReader that writes to it.
|
|
|
|
narCountW := &CountingWriter{}
|
|
|
|
sha256W := sha256.New()
|
|
|
|
multiW := io.MultiWriter(narCountW, sha256W)
|
|
|
|
narReader, err := nar.NewReader(io.TeeReader(r, multiW))
|
2022-11-19 21:34:49 +01:00
|
|
|
if err != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("failed to instantiate nar reader: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
defer narReader.Close()
|
|
|
|
|
|
|
|
// If we store a symlink or regular file at the root, these are not nil.
|
|
|
|
// If they are nil, we instead have a stackDirectory.
|
2023-09-22 15:38:10 +02:00
|
|
|
var rootSymlink *castorev1pb.SymlinkNode
|
|
|
|
var rootFile *castorev1pb.FileNode
|
|
|
|
var stackDirectory *castorev1pb.Directory
|
2022-11-19 21:34:49 +01:00
|
|
|
|
2023-10-03 11:42:48 +02:00
|
|
|
var stack = []stackItem{}
|
2022-11-19 21:34:49 +01:00
|
|
|
|
|
|
|
// popFromStack is used when we transition to a different directory or
|
|
|
|
// drain the stack when we reach the end of the NAR.
|
|
|
|
// It adds the popped element to the element underneath if any,
|
|
|
|
// and passes it to the directoryCb callback.
|
|
|
|
// This function may only be called if the stack is not already empty.
|
|
|
|
popFromStack := func() error {
|
|
|
|
// Keep the top item, and "resize" the stack slice.
|
|
|
|
// This will only make the last element unaccessible, but chances are high
|
|
|
|
// we're re-using that space anyways.
|
|
|
|
toPop := stack[len(stack)-1]
|
|
|
|
stack = stack[:len(stack)-1]
|
|
|
|
|
2023-10-03 12:59:13 +02:00
|
|
|
// call the directoryCb
|
|
|
|
directoryDigest, err := directoryCb(toPop.directory)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed calling directoryCb: %w", err)
|
|
|
|
}
|
|
|
|
|
2022-11-19 21:34:49 +01:00
|
|
|
// if there's still a parent left on the stack, refer to it from there.
|
|
|
|
if len(stack) > 0 {
|
|
|
|
topOfStack := stack[len(stack)-1].directory
|
2023-09-22 15:38:10 +02:00
|
|
|
topOfStack.Directories = append(topOfStack.Directories, &castorev1pb.DirectoryNode{
|
2022-11-19 21:34:49 +01:00
|
|
|
Name: []byte(path.Base(toPop.path)),
|
2023-10-03 12:59:13 +02:00
|
|
|
Digest: directoryDigest,
|
2022-11-19 21:34:49 +01:00
|
|
|
Size: toPop.directory.Size(),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
// Keep track that we have encounter at least one directory
|
|
|
|
stackDirectory = toPop.directory
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
getBasename := func(p string) string {
|
|
|
|
// extract the basename. In case of "/", replace with empty string.
|
|
|
|
basename := path.Base(p)
|
|
|
|
if basename == "/" {
|
|
|
|
basename = ""
|
|
|
|
}
|
|
|
|
return basename
|
|
|
|
}
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, ctx.Err()
|
2022-11-19 21:34:49 +01:00
|
|
|
default:
|
|
|
|
// call narReader.Next() to get the next element
|
|
|
|
hdr, err := narReader.Next()
|
|
|
|
|
|
|
|
// If this returns an error, it's either EOF (when we're done reading from the NAR),
|
2023-10-03 12:59:13 +02:00
|
|
|
// or another error.
|
2022-11-19 21:34:49 +01:00
|
|
|
if err != nil {
|
|
|
|
// if this returns no EOF, bail out
|
|
|
|
if !errors.Is(err, io.EOF) {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("failed getting next nar element: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// The NAR has been read all the way to the end…
|
|
|
|
// Make sure we close the nar reader, which might read some final trailers.
|
|
|
|
if err := narReader.Close(); err != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("unable to close nar reader: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the stack. While it's not empty, we need to pop things off the stack.
|
|
|
|
for len(stack) > 0 {
|
|
|
|
err := popFromStack()
|
|
|
|
if err != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("unable to pop from stack: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-10-11 12:28:10 +02:00
|
|
|
// Stack is empty.
|
|
|
|
// Now either root{File,Symlink,Directory} is not nil,
|
|
|
|
// and we can return the root node.
|
|
|
|
narSize := narCountW.BytesWritten()
|
|
|
|
narSha256 := sha256W.Sum(nil)
|
2023-10-03 11:42:18 +02:00
|
|
|
|
2022-11-19 21:34:49 +01:00
|
|
|
if rootFile != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return &castorev1pb.Node{
|
2023-09-22 15:38:10 +02:00
|
|
|
Node: &castorev1pb.Node_File{
|
2022-11-19 21:34:49 +01:00
|
|
|
File: rootFile,
|
|
|
|
},
|
2023-10-11 12:28:10 +02:00
|
|
|
}, narSize, narSha256, nil
|
|
|
|
} else if rootSymlink != nil {
|
|
|
|
return &castorev1pb.Node{
|
2023-09-22 15:38:10 +02:00
|
|
|
Node: &castorev1pb.Node_Symlink{
|
2022-11-19 21:34:49 +01:00
|
|
|
Symlink: rootSymlink,
|
|
|
|
},
|
2023-10-11 12:28:10 +02:00
|
|
|
}, narSize, narSha256, nil
|
|
|
|
} else if stackDirectory != nil {
|
2022-11-19 21:34:49 +01:00
|
|
|
// calculate directory digest (i.e. after we received all its contents)
|
|
|
|
dgst, err := stackDirectory.Digest()
|
|
|
|
if err != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("unable to calculate root directory digest: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
|
2023-10-11 12:28:10 +02:00
|
|
|
return &castorev1pb.Node{
|
2023-09-22 15:38:10 +02:00
|
|
|
Node: &castorev1pb.Node_Directory{
|
|
|
|
Directory: &castorev1pb.DirectoryNode{
|
2022-11-19 21:34:49 +01:00
|
|
|
Name: []byte{},
|
|
|
|
Digest: dgst,
|
|
|
|
Size: stackDirectory.Size(),
|
|
|
|
},
|
|
|
|
},
|
2023-10-11 12:28:10 +02:00
|
|
|
}, narSize, narSha256, nil
|
|
|
|
} else {
|
|
|
|
return nil, 0, nil, fmt.Errorf("no root set")
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for valid path transitions, pop from stack if needed
|
|
|
|
// The nar reader already gives us some guarantees about ordering and illegal transitions,
|
|
|
|
// So we really only need to check if the top-of-stack path is a prefix of the path,
|
2023-09-17 13:52:50 +02:00
|
|
|
// and if it's not, pop from the stack. We do this repeatedly until the top of the stack is
|
|
|
|
// the subdirectory the new entry is in, or we hit the root directory.
|
2022-11-19 21:34:49 +01:00
|
|
|
|
|
|
|
// We don't need to worry about the root node case, because we can only finish the root "/"
|
|
|
|
// If we're at the end of the NAR reader (covered by the EOF check)
|
2023-09-17 13:52:50 +02:00
|
|
|
for len(stack) > 1 && !strings.HasPrefix(hdr.Path, stack[len(stack)-1].path+"/") {
|
|
|
|
err := popFromStack()
|
|
|
|
if err != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("unable to pop from stack: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if hdr.Type == nar.TypeSymlink {
|
2023-09-22 15:38:10 +02:00
|
|
|
symlinkNode := &castorev1pb.SymlinkNode{
|
2022-11-19 21:34:49 +01:00
|
|
|
Name: []byte(getBasename(hdr.Path)),
|
|
|
|
Target: []byte(hdr.LinkTarget),
|
|
|
|
}
|
|
|
|
if len(stack) > 0 {
|
|
|
|
topOfStack := stack[len(stack)-1].directory
|
|
|
|
topOfStack.Symlinks = append(topOfStack.Symlinks, symlinkNode)
|
|
|
|
} else {
|
|
|
|
rootSymlink = symlinkNode
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
if hdr.Type == nar.TypeRegular {
|
2023-10-03 13:23:46 +02:00
|
|
|
// wrap reader with a reader counting the number of bytes read
|
|
|
|
blobCountW := &CountingWriter{}
|
|
|
|
blobReader := io.TeeReader(narReader, blobCountW)
|
2022-11-19 21:34:49 +01:00
|
|
|
|
2023-10-03 12:59:13 +02:00
|
|
|
blobDigest, err := blobCb(blobReader)
|
2022-11-19 21:34:49 +01:00
|
|
|
if err != nil {
|
2023-10-11 12:28:10 +02:00
|
|
|
return nil, 0, nil, fmt.Errorf("failure from blobCb: %w", err)
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
|
2023-10-03 12:59:13 +02:00
|
|
|
// ensure blobCb did read all the way to the end.
|
|
|
|
// If it didn't, the blobCb function is wrong and we should bail out.
|
2023-10-03 13:23:46 +02:00
|
|
|
if blobCountW.BytesWritten() != uint64(hdr.Size) {
|
|
|
|
panic("blobCB did not read to end")
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
|
|
|
|
2023-09-22 15:38:10 +02:00
|
|
|
fileNode := &castorev1pb.FileNode{
|
2022-11-19 21:34:49 +01:00
|
|
|
Name: []byte(getBasename(hdr.Path)),
|
2023-10-03 12:59:13 +02:00
|
|
|
Digest: blobDigest,
|
2023-11-05 13:31:01 +01:00
|
|
|
Size: uint64(hdr.Size),
|
2022-11-19 21:34:49 +01:00
|
|
|
Executable: hdr.Executable,
|
|
|
|
}
|
|
|
|
if len(stack) > 0 {
|
|
|
|
topOfStack := stack[len(stack)-1].directory
|
|
|
|
topOfStack.Files = append(topOfStack.Files, fileNode)
|
|
|
|
} else {
|
|
|
|
rootFile = fileNode
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if hdr.Type == nar.TypeDirectory {
|
2023-09-22 15:38:10 +02:00
|
|
|
directory := &castorev1pb.Directory{
|
|
|
|
Directories: []*castorev1pb.DirectoryNode{},
|
|
|
|
Files: []*castorev1pb.FileNode{},
|
|
|
|
Symlinks: []*castorev1pb.SymlinkNode{},
|
2022-11-19 21:34:49 +01:00
|
|
|
}
|
2023-10-03 11:42:48 +02:00
|
|
|
stack = append(stack, stackItem{
|
2022-11-19 21:34:49 +01:00
|
|
|
directory: directory,
|
|
|
|
path: hdr.Path,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|