616fa4476f
Whether chunking is involved or not, is an implementation detail of each Blobstore. Consumers of a whole blob shouldn't need to worry about that. It currently is not visible in the gRPC interface either. It shouldn't bleed into everything. Let the BlobService trait provide `open_read` and `open_write` methods, which return handles providing io::Read or io::Write, and leave the details up to the implementation. This means, our custom BlobReader module can go away, and all the chunking bits in there, too. In the future, we might still want to add more chunking-aware syncing, but as a syncing strategy some stores can expose, not as a fundamental protocol component. This currently needs "SyncReadIntoAsyncRead", taken and vendored in from https://github.com/tokio-rs/tokio/pull/5669. It provides a AsyncRead for a sync Read, which is necessary to connect our (sync) BlobReader interface to a GRPC server implementation. As an alternative, we could also make the BlobReader itself async, and let consumers of the trait (EvalIO) deal with the async-ness, but this is less of a change for now. In terms of vendoring, I initially tried to move our tokio crate to these commits, but ended up in version incompatibilities, so let's vendor it in for now. Change-Id: I5969ebbc4c0e1ceece47981be3b9e7cfb3f59ad0 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8551 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
80 lines
2.7 KiB
Protocol Buffer
80 lines
2.7 KiB
Protocol Buffer
// SPDX-License-Identifier: MIT
|
|
// Copyright © 2022 The Tvix Authors
|
|
syntax = "proto3";
|
|
|
|
package tvix.store.v1;
|
|
|
|
option go_package = "code.tvl.fyi/tvix/store/protos;storev1";
|
|
|
|
service BlobService {
|
|
// Stat exposes metadata about a given blob,
|
|
// such as more granular chunking, baos.
|
|
// It implicitly allows checking for existence too, as asking this for a
|
|
// non-existing Blob will return a Status::not_found grpc error.
|
|
// If there's no more granular chunking available, the response will simply
|
|
// contain a single chunk.
|
|
rpc Stat(StatBlobRequest) returns (BlobMeta);
|
|
|
|
// Read returns a stream of BlobChunk, which is just a stream of bytes with
|
|
// the digest specified in ReadBlobRequest.
|
|
//
|
|
// The server may decide on whatever chunking it may seem fit as a size for
|
|
// the individual BlobChunk sent in the response stream.
|
|
//
|
|
rpc Read(ReadBlobRequest) returns (stream BlobChunk);
|
|
|
|
// Put uploads a Blob, by reading a stream of bytes.
|
|
//
|
|
// The way the data is chunked up in individual BlobChunk messages sent in
|
|
// the stream has no effect on how the server ends up chunking blobs up.
|
|
rpc Put(stream BlobChunk) returns (PutBlobResponse);
|
|
}
|
|
|
|
message StatBlobRequest {
|
|
// The blake3 digest of the blob requested
|
|
bytes digest = 1;
|
|
|
|
// Whether to include the chunks field
|
|
bool include_chunks = 2;
|
|
// Whether to include the inline_bao field, containing an (outboard) bao.
|
|
// The [bao](https://github.com/oconnor663/bao/blob/master/docs/spec.md)
|
|
// can be used to validate chunks end up hashing to the same root digest.
|
|
// These only really matter when only downloading parts of a blob. Some
|
|
// caution needs to be applied when validating chunks - the bao works with
|
|
// 1K leaf nodes, which might not align with the chunk sizes - this might
|
|
// imply a neighboring chunk might need to be (partially) fetched to
|
|
// validate the hash.
|
|
bool include_bao = 3;
|
|
}
|
|
|
|
// BlobMeta provides more granular chunking information for the requested blob,
|
|
// and baos.
|
|
message BlobMeta {
|
|
// This provides a list of chunks.
|
|
// Concatenating their contents would produce a blob with the digest that
|
|
// was specified in the request.
|
|
repeated ChunkMeta chunks = 1;
|
|
|
|
message ChunkMeta {
|
|
bytes digest = 1;
|
|
uint32 size = 2;
|
|
}
|
|
|
|
bytes inline_bao = 2;
|
|
}
|
|
|
|
message ReadBlobRequest {
|
|
// The blake3 digest of the blob or chunk requested
|
|
bytes digest = 1;
|
|
}
|
|
|
|
// This represents some bytes of a blob.
|
|
// Blobs are sent in smaller chunks to keep message sizes manageable.
|
|
message BlobChunk {
|
|
bytes data = 1;
|
|
}
|
|
|
|
message PutBlobResponse {
|
|
// The blake3 digest of the data that was sent.
|
|
bytes digest = 1;
|
|
}
|