fix(tvix/castore/import): check small blobs first
ConcurrentBlobUploader buffers small blobs in memory, and then uploads them to the BlobService in the background. In these cases, we know the hash of the whole blob, so we could check if it exists first before, uploading it. We were however not, and this caused rate limiting issues in GCS, as it has an update limit of one write per second on the same key, which we ran into especially frequently with the empty blob. This reduces the amount of writes of the same blob considerably. In the future, we might be able to drop this, as our chunked blob uploading protocol gets smarter and covers these cases. Change-Id: Icf482df815812f80a0b65cec0426f8e686308abb Reviewed-on: https://cl.tvl.fyi/c/depot/+/12497 Tested-by: BuildkiteCI Autosubmit: flokli <flokli@flokli.de> Reviewed-by: Connor Brewster <cbrewster@hey.com>
This commit is contained in:
parent
1f5a20736a
commit
21e5fc024d
1 changed files with 13 additions and 0 deletions
|
@ -28,6 +28,9 @@ pub enum Error {
|
|||
#[error("unable to read blob contents for {0}: {1}")]
|
||||
BlobRead(PathBuf, std::io::Error),
|
||||
|
||||
#[error("unable to check whether blob at {0} already exists: {1}")]
|
||||
BlobCheck(PathBuf, std::io::Error),
|
||||
|
||||
// FUTUREWORK: proper error for blob finalize
|
||||
#[error("unable to finalize blob {0}: {1}")]
|
||||
BlobFinalize(PathBuf, std::io::Error),
|
||||
|
@ -118,6 +121,16 @@ where
|
|||
let path = path.to_owned();
|
||||
let r = Cursor::new(buffer);
|
||||
async move {
|
||||
// We know the blob digest already, check it exists before sending it.
|
||||
if blob_service
|
||||
.has(&expected_digest)
|
||||
.await
|
||||
.map_err(|e| Error::BlobCheck(path.clone(), e))?
|
||||
{
|
||||
drop(permit);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let digest = upload_blob(&blob_service, &path, expected_size, r).await?;
|
||||
|
||||
assert_eq!(digest, expected_digest, "Tvix bug: blob digest mismatch");
|
||||
|
|
Loading…
Reference in a new issue