fix(tvix/nar-bridge): chunk blobs
Instead of creating one big BlobChunk containing all data, and creating way too large proto messages, chunk blobs up to a reasonable (1MiB) chunk size, and send them to the server like that. Change-Id: Ia45a53956a6d7c0599cc59ac516ba37e9fb1b30e Reviewed-on: https://cl.tvl.fyi/c/depot/+/9357 Reviewed-by: Connor Brewster <cbrewster@hey.com> Tested-by: BuildkiteCI
This commit is contained in:
parent
136b12ddd7
commit
0c031461c3
2 changed files with 48 additions and 25 deletions
|
@ -41,7 +41,7 @@ func New(r io.Reader) *Reader {
|
||||||
func (r *Reader) Import(
|
func (r *Reader) Import(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
// callback function called with each regular file content
|
// callback function called with each regular file content
|
||||||
fileCb func(fileReader io.Reader) error,
|
blobCb func(fileReader io.Reader) error,
|
||||||
// callback function called with each finalized directory node
|
// callback function called with each finalized directory node
|
||||||
directoryCb func(directory *storev1pb.Directory) error,
|
directoryCb func(directory *storev1pb.Directory) error,
|
||||||
) (*storev1pb.PathInfo, error) {
|
) (*storev1pb.PathInfo, error) {
|
||||||
|
@ -219,9 +219,9 @@ func (r *Reader) Import(
|
||||||
// wrap reader with a reader calculating the blake3 hash
|
// wrap reader with a reader calculating the blake3 hash
|
||||||
fileReader := NewHasher(narReader, blake3.New(32, nil))
|
fileReader := NewHasher(narReader, blake3.New(32, nil))
|
||||||
|
|
||||||
err := fileCb(fileReader)
|
err := blobCb(fileReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failure from fileCb: %w", err)
|
return nil, fmt.Errorf("failure from blobCb: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// drive the file reader to the end, in case the CB function doesn't read
|
// drive the file reader to the end, in case the CB function doesn't read
|
||||||
|
|
|
@ -1,47 +1,70 @@
|
||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
storev1pb "code.tvl.fyi/tvix/store/protos"
|
"bufio"
|
||||||
"context"
|
"context"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
"io"
|
"io"
|
||||||
|
|
||||||
|
storev1pb "code.tvl.fyi/tvix/store/protos"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
// this returns a callback function that can be used as fileCb
|
// the size of individual BlobChunk we send when uploading to BlobService.
|
||||||
// for the reader.Import function call
|
const chunkSize = 1024 * 1024
|
||||||
|
|
||||||
|
// this produces a callback function that can be used as blobCb for the
|
||||||
|
// reader.Import function call
|
||||||
func genBlobServiceWriteCb(ctx context.Context, blobServiceClient storev1pb.BlobServiceClient) func(io.Reader) error {
|
func genBlobServiceWriteCb(ctx context.Context, blobServiceClient storev1pb.BlobServiceClient) func(io.Reader) error {
|
||||||
return func(fileReader io.Reader) error {
|
return func(blobReader io.Reader) error {
|
||||||
// Read from fileReader into a buffer.
|
// Ensure the blobReader is buffered to at least the chunk size.
|
||||||
// We currently buffer all contents and send them to blobServiceClient at once,
|
blobReader = bufio.NewReaderSize(blobReader, chunkSize)
|
||||||
// but that's about to change.
|
|
||||||
contents, err := io.ReadAll(fileReader)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to read all contents from file reader: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
log := log.WithField("blob_size", len(contents))
|
|
||||||
|
|
||||||
log.Infof("about to upload blob")
|
|
||||||
|
|
||||||
putter, err := blobServiceClient.Put(ctx)
|
putter, err := blobServiceClient.Put(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// return error to the importer
|
// return error to the importer
|
||||||
return fmt.Errorf("error from blob service: %w", err)
|
return fmt.Errorf("error from blob service: %w", err)
|
||||||
}
|
}
|
||||||
err = putter.Send(&storev1pb.BlobChunk{
|
|
||||||
Data: contents,
|
blobSize := 0
|
||||||
})
|
chunk := make([]byte, chunkSize)
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("putting blob chunk: %w", err)
|
for {
|
||||||
|
n, err := blobReader.Read(chunk)
|
||||||
|
if err != nil && !errors.Is(err, io.EOF) {
|
||||||
|
return fmt.Errorf("unable to read from blobreader: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if n != 0 {
|
||||||
|
log.WithField("chunk_size", n).Debug("sending chunk")
|
||||||
|
blobSize += n
|
||||||
|
|
||||||
|
// send the blob chunk to the server. The err is only valid in the inner scope
|
||||||
|
if err := putter.Send(&storev1pb.BlobChunk{
|
||||||
|
Data: chunk[:n],
|
||||||
|
}); err != nil {
|
||||||
|
return fmt.Errorf("sending blob chunk: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if our read from blobReader returned an EOF, we're done reading
|
||||||
|
if errors.Is(err, io.EOF) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
resp, err := putter.CloseAndRecv()
|
resp, err := putter.CloseAndRecv()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("close blob putter: %w", err)
|
return fmt.Errorf("close blob putter: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.WithField("digest", base64.StdEncoding.EncodeToString(resp.GetDigest())).Info("uploaded blob")
|
log.WithFields(log.Fields{
|
||||||
|
"blob_digest": base64.StdEncoding.EncodeToString(resp.GetDigest()),
|
||||||
|
"blob_size": blobSize,
|
||||||
|
}).Info("uploaded blob")
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue