From 21e5fc024d3ad275112c5bc88476ee38966d9fe1 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 19 Sep 2024 11:27:51 +0300 Subject: [PATCH] fix(tvix/castore/import): check small blobs first ConcurrentBlobUploader buffers small blobs in memory, and then uploads them to the BlobService in the background. In these cases, we know the hash of the whole blob, so we could check if it exists first before, uploading it. We were however not, and this caused rate limiting issues in GCS, as it has an update limit of one write per second on the same key, which we ran into especially frequently with the empty blob. This reduces the amount of writes of the same blob considerably. In the future, we might be able to drop this, as our chunked blob uploading protocol gets smarter and covers these cases. Change-Id: Icf482df815812f80a0b65cec0426f8e686308abb Reviewed-on: https://cl.tvl.fyi/c/depot/+/12497 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: Connor Brewster --- tvix/castore/src/import/blobs.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tvix/castore/src/import/blobs.rs b/tvix/castore/src/import/blobs.rs index 8135d871d..f71ee1e63 100644 --- a/tvix/castore/src/import/blobs.rs +++ b/tvix/castore/src/import/blobs.rs @@ -28,6 +28,9 @@ pub enum Error { #[error("unable to read blob contents for {0}: {1}")] BlobRead(PathBuf, std::io::Error), + #[error("unable to check whether blob at {0} already exists: {1}")] + BlobCheck(PathBuf, std::io::Error), + // FUTUREWORK: proper error for blob finalize #[error("unable to finalize blob {0}: {1}")] BlobFinalize(PathBuf, std::io::Error), @@ -118,6 +121,16 @@ where let path = path.to_owned(); let r = Cursor::new(buffer); async move { + // We know the blob digest already, check it exists before sending it. + if blob_service + .has(&expected_digest) + .await + .map_err(|e| Error::BlobCheck(path.clone(), e))? + { + drop(permit); + return Ok(()); + } + let digest = upload_blob(&blob_service, &path, expected_size, r).await?; assert_eq!(digest, expected_digest, "Tvix bug: blob digest mismatch");