fix(tvix/nar-bridge): chunk blobs

Instead of creating one big BlobChunk containing all data, and creating
way too large proto messages, chunk blobs up to a reasonable (1MiB)
chunk size, and send them to the server like that.

Change-Id: Ia45a53956a6d7c0599cc59ac516ba37e9fb1b30e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9357
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2023-09-17 21:47:11 +03:00 committed by flokli
parent 136b12ddd7
commit 0c031461c3
2 changed files with 48 additions and 25 deletions

View file

@ -41,7 +41,7 @@ func New(r io.Reader) *Reader {
func (r *Reader) Import(
ctx context.Context,
// callback function called with each regular file content
fileCb func(fileReader io.Reader) error,
blobCb func(fileReader io.Reader) error,
// callback function called with each finalized directory node
directoryCb func(directory *storev1pb.Directory) error,
) (*storev1pb.PathInfo, error) {
@ -219,9 +219,9 @@ func (r *Reader) Import(
// wrap reader with a reader calculating the blake3 hash
fileReader := NewHasher(narReader, blake3.New(32, nil))
err := fileCb(fileReader)
err := blobCb(fileReader)
if err != nil {
return nil, fmt.Errorf("failure from fileCb: %w", err)
return nil, fmt.Errorf("failure from blobCb: %w", err)
}
// drive the file reader to the end, in case the CB function doesn't read

View file

@ -1,47 +1,70 @@
package server
import (
storev1pb "code.tvl.fyi/tvix/store/protos"
"bufio"
"context"
"encoding/base64"
"errors"
"fmt"
log "github.com/sirupsen/logrus"
"io"
storev1pb "code.tvl.fyi/tvix/store/protos"
log "github.com/sirupsen/logrus"
)
// this returns a callback function that can be used as fileCb
// for the reader.Import function call
// the size of individual BlobChunk we send when uploading to BlobService.
const chunkSize = 1024 * 1024
// this produces a callback function that can be used as blobCb for the
// reader.Import function call
func genBlobServiceWriteCb(ctx context.Context, blobServiceClient storev1pb.BlobServiceClient) func(io.Reader) error {
return func(fileReader io.Reader) error {
// Read from fileReader into a buffer.
// We currently buffer all contents and send them to blobServiceClient at once,
// but that's about to change.
contents, err := io.ReadAll(fileReader)
if err != nil {
return fmt.Errorf("unable to read all contents from file reader: %w", err)
}
log := log.WithField("blob_size", len(contents))
log.Infof("about to upload blob")
return func(blobReader io.Reader) error {
// Ensure the blobReader is buffered to at least the chunk size.
blobReader = bufio.NewReaderSize(blobReader, chunkSize)
putter, err := blobServiceClient.Put(ctx)
if err != nil {
// return error to the importer
return fmt.Errorf("error from blob service: %w", err)
}
err = putter.Send(&storev1pb.BlobChunk{
Data: contents,
})
if err != nil {
return fmt.Errorf("putting blob chunk: %w", err)
blobSize := 0
chunk := make([]byte, chunkSize)
for {
n, err := blobReader.Read(chunk)
if err != nil && !errors.Is(err, io.EOF) {
return fmt.Errorf("unable to read from blobreader: %w", err)
}
if n != 0 {
log.WithField("chunk_size", n).Debug("sending chunk")
blobSize += n
// send the blob chunk to the server. The err is only valid in the inner scope
if err := putter.Send(&storev1pb.BlobChunk{
Data: chunk[:n],
}); err != nil {
return fmt.Errorf("sending blob chunk: %w", err)
}
}
// if our read from blobReader returned an EOF, we're done reading
if errors.Is(err, io.EOF) {
break
}
}
resp, err := putter.CloseAndRecv()
if err != nil {
return fmt.Errorf("close blob putter: %w", err)
}
log.WithField("digest", base64.StdEncoding.EncodeToString(resp.GetDigest())).Info("uploaded blob")
log.WithFields(log.Fields{
"blob_digest": base64.StdEncoding.EncodeToString(resp.GetDigest()),
"blob_size": blobSize,
}).Info("uploaded blob")
return nil
}