refactor(tvix/store/chunksvc): use [u8; 32] instead of Vec<u8>
Change-Id: Ie2b94aa5d69ff2c61fb77e13ae844f81f6270273 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8314 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su> Autosubmit: flokli <flokli@flokli.de>
This commit is contained in:
parent
ee23220564
commit
530cb920b5
6 changed files with 70 additions and 54 deletions
|
@ -97,33 +97,42 @@ impl<CS: ChunkService> std::io::Read for BlobReader<'_, CS> {
|
||||||
return Ok(bytes_read);
|
return Ok(bytes_read);
|
||||||
}
|
}
|
||||||
// There's another chunk to visit, fetch its contents
|
// There's another chunk to visit, fetch its contents
|
||||||
Some(chunk_meta) => match self.chunk_service.get(&chunk_meta.digest) {
|
Some(chunk_meta) => {
|
||||||
// Fetch successful, put it into `self.current_chunk` and restart the loop.
|
let chunk_meta_digest: [u8; 32] =
|
||||||
Ok(Some(chunk_data)) => {
|
chunk_meta.digest.clone().try_into().map_err(|_e| {
|
||||||
// make sure the size matches what chunk_meta says as well.
|
std::io::Error::new(
|
||||||
if chunk_data.len() as u32 != chunk_meta.size {
|
io::ErrorKind::InvalidData,
|
||||||
break Err(std::io::Error::new(
|
format!("chunk in chunkmeta has wrong digest size"),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
match self.chunk_service.get(&chunk_meta_digest) {
|
||||||
|
// Fetch successful, put it into `self.current_chunk` and restart the loop.
|
||||||
|
Ok(Some(chunk_data)) => {
|
||||||
|
// make sure the size matches what chunk_meta says as well.
|
||||||
|
if chunk_data.len() as u32 != chunk_meta.size {
|
||||||
|
break Err(std::io::Error::new(
|
||||||
io::ErrorKind::InvalidData,
|
io::ErrorKind::InvalidData,
|
||||||
format!(
|
format!(
|
||||||
"chunk_service returned chunk with wrong size for {}, expected {}, got {}",
|
"chunk_service returned chunk with wrong size for {}, expected {}, got {}",
|
||||||
BASE64.encode(&chunk_meta.digest), chunk_meta.size, chunk_data.len()
|
BASE64.encode(&chunk_meta.digest), chunk_meta.size, chunk_data.len()
|
||||||
)
|
)
|
||||||
));
|
));
|
||||||
|
}
|
||||||
|
self.current_chunk = Some(Cursor::new(chunk_data));
|
||||||
|
}
|
||||||
|
// Chunk requested does not exist
|
||||||
|
Ok(None) => {
|
||||||
|
break Err(std::io::Error::new(
|
||||||
|
io::ErrorKind::NotFound,
|
||||||
|
format!("chunk {} not found", BASE64.encode(&chunk_meta.digest)),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
// Error occured while fetching the next chunk, propagate the error from the chunk service
|
||||||
|
Err(e) => {
|
||||||
|
break Err(std::io::Error::new(io::ErrorKind::InvalidData, e));
|
||||||
}
|
}
|
||||||
self.current_chunk = Some(Cursor::new(chunk_data));
|
|
||||||
}
|
}
|
||||||
// Chunk requested does not exist
|
}
|
||||||
Ok(None) => {
|
|
||||||
break Err(std::io::Error::new(
|
|
||||||
io::ErrorKind::NotFound,
|
|
||||||
format!("chunk {} not found", BASE64.encode(&chunk_meta.digest)),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
// Error occured while fetching the next chunk, propagate the error from the chunk service
|
|
||||||
Err(e) => {
|
|
||||||
break Err(std::io::Error::new(io::ErrorKind::InvalidData, e));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -196,7 +205,7 @@ mod tests {
|
||||||
// assemble a blobmeta
|
// assemble a blobmeta
|
||||||
let blobmeta = proto::BlobMeta {
|
let blobmeta = proto::BlobMeta {
|
||||||
chunks: vec![proto::blob_meta::ChunkMeta {
|
chunks: vec![proto::blob_meta::ChunkMeta {
|
||||||
digest: dgst,
|
digest: dgst.to_vec(),
|
||||||
size: 0,
|
size: 0,
|
||||||
}],
|
}],
|
||||||
inline_bao: vec![],
|
inline_bao: vec![],
|
||||||
|
@ -228,7 +237,7 @@ mod tests {
|
||||||
// assemble a blobmeta
|
// assemble a blobmeta
|
||||||
let blobmeta = proto::BlobMeta {
|
let blobmeta = proto::BlobMeta {
|
||||||
chunks: vec![proto::blob_meta::ChunkMeta {
|
chunks: vec![proto::blob_meta::ChunkMeta {
|
||||||
digest: dgst,
|
digest: dgst.to_vec(),
|
||||||
size: 3,
|
size: 3,
|
||||||
}],
|
}],
|
||||||
inline_bao: vec![],
|
inline_bao: vec![],
|
||||||
|
@ -260,7 +269,7 @@ mod tests {
|
||||||
// assemble a blobmeta
|
// assemble a blobmeta
|
||||||
let blobmeta = proto::BlobMeta {
|
let blobmeta = proto::BlobMeta {
|
||||||
chunks: vec![proto::blob_meta::ChunkMeta {
|
chunks: vec![proto::blob_meta::ChunkMeta {
|
||||||
digest: dgst_1,
|
digest: dgst_1.to_vec(),
|
||||||
size: 42,
|
size: 42,
|
||||||
}],
|
}],
|
||||||
inline_bao: vec![],
|
inline_bao: vec![],
|
||||||
|
@ -294,15 +303,15 @@ mod tests {
|
||||||
let blobmeta = proto::BlobMeta {
|
let blobmeta = proto::BlobMeta {
|
||||||
chunks: vec![
|
chunks: vec![
|
||||||
proto::blob_meta::ChunkMeta {
|
proto::blob_meta::ChunkMeta {
|
||||||
digest: dgst_1.clone(),
|
digest: dgst_1.to_vec(),
|
||||||
size: 3,
|
size: 3,
|
||||||
},
|
},
|
||||||
proto::blob_meta::ChunkMeta {
|
proto::blob_meta::ChunkMeta {
|
||||||
digest: dgst_2,
|
digest: dgst_2.to_vec(),
|
||||||
size: 2,
|
size: 2,
|
||||||
},
|
},
|
||||||
proto::blob_meta::ChunkMeta {
|
proto::blob_meta::ChunkMeta {
|
||||||
digest: dgst_1,
|
digest: dgst_1.to_vec(),
|
||||||
size: 3,
|
size: 3,
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
|
|
@ -11,18 +11,18 @@ use super::ChunkService;
|
||||||
|
|
||||||
#[derive(Clone, Default)]
|
#[derive(Clone, Default)]
|
||||||
pub struct MemoryChunkService {
|
pub struct MemoryChunkService {
|
||||||
db: Arc<RwLock<HashMap<Vec<u8>, Vec<u8>>>>,
|
db: Arc<RwLock<HashMap<[u8; 32], Vec<u8>>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ChunkService for MemoryChunkService {
|
impl ChunkService for MemoryChunkService {
|
||||||
#[instrument(skip(self, digest), fields(chunk.digest=BASE64.encode(digest)))]
|
#[instrument(skip(self, digest), fields(chunk.digest=BASE64.encode(digest)))]
|
||||||
fn has(&self, digest: &[u8]) -> Result<bool, Error> {
|
fn has(&self, digest: &[u8; 32]) -> Result<bool, Error> {
|
||||||
let db = self.db.read().unwrap();
|
let db = self.db.read().unwrap();
|
||||||
Ok(db.get(digest).is_some())
|
Ok(db.get(digest).is_some())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip(self), fields(chunk.digest=BASE64.encode(digest)))]
|
#[instrument(skip(self), fields(chunk.digest=BASE64.encode(digest)))]
|
||||||
fn get(&self, digest: &[u8]) -> Result<Option<Vec<u8>>, Error> {
|
fn get(&self, digest: &[u8; 32]) -> Result<Option<Vec<u8>>, Error> {
|
||||||
let db = self.db.read().unwrap();
|
let db = self.db.read().unwrap();
|
||||||
match db.get(digest) {
|
match db.get(digest) {
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
|
@ -42,12 +42,12 @@ impl ChunkService for MemoryChunkService {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(skip(self, data))]
|
#[instrument(skip(self, data))]
|
||||||
fn put(&self, data: Vec<u8>) -> Result<Vec<u8>, Error> {
|
fn put(&self, data: Vec<u8>) -> Result<[u8; 32], Error> {
|
||||||
let digest = blake3::hash(&data).as_bytes().to_vec();
|
let digest = blake3::hash(&data);
|
||||||
|
|
||||||
let mut db = self.db.write().unwrap();
|
let mut db = self.db.write().unwrap();
|
||||||
db.insert(digest.clone(), data);
|
db.insert(digest.as_bytes().clone(), data);
|
||||||
|
|
||||||
Ok(digest)
|
Ok(digest.as_bytes().clone())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,12 +17,12 @@ pub use self::util::upload_chunk;
|
||||||
/// chunking information.
|
/// chunking information.
|
||||||
pub trait ChunkService {
|
pub trait ChunkService {
|
||||||
/// check if the service has a chunk, given by its digest.
|
/// check if the service has a chunk, given by its digest.
|
||||||
fn has(&self, digest: &[u8]) -> Result<bool, Error>;
|
fn has(&self, digest: &[u8; 32]) -> Result<bool, Error>;
|
||||||
|
|
||||||
/// retrieve a chunk by its digest. Implementations MUST validate the digest
|
/// retrieve a chunk by its digest. Implementations MUST validate the digest
|
||||||
/// matches.
|
/// matches.
|
||||||
fn get(&self, digest: &[u8]) -> Result<Option<Vec<u8>>, Error>;
|
fn get(&self, digest: &[u8; 32]) -> Result<Option<Vec<u8>>, Error>;
|
||||||
|
|
||||||
/// insert a chunk. returns the digest of the chunk, or an error.
|
/// insert a chunk. returns the digest of the chunk, or an error.
|
||||||
fn put(&self, data: Vec<u8>) -> Result<Vec<u8>, Error>;
|
fn put(&self, data: Vec<u8>) -> Result<[u8; 32], Error>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,7 +30,7 @@ impl SledChunkService {
|
||||||
|
|
||||||
impl ChunkService for SledChunkService {
|
impl ChunkService for SledChunkService {
|
||||||
#[instrument(name = "SledChunkService::has", skip(self, digest), fields(chunk.digest=BASE64.encode(digest)))]
|
#[instrument(name = "SledChunkService::has", skip(self, digest), fields(chunk.digest=BASE64.encode(digest)))]
|
||||||
fn has(&self, digest: &[u8]) -> Result<bool, Error> {
|
fn has(&self, digest: &[u8; 32]) -> Result<bool, Error> {
|
||||||
match self.db.get(digest) {
|
match self.db.get(digest) {
|
||||||
Ok(None) => Ok(false),
|
Ok(None) => Ok(false),
|
||||||
Ok(Some(_)) => Ok(true),
|
Ok(Some(_)) => Ok(true),
|
||||||
|
@ -39,7 +39,7 @@ impl ChunkService for SledChunkService {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(name = "SledChunkService::get", skip(self), fields(chunk.digest=BASE64.encode(digest)))]
|
#[instrument(name = "SledChunkService::get", skip(self), fields(chunk.digest=BASE64.encode(digest)))]
|
||||||
fn get(&self, digest: &[u8]) -> Result<Option<Vec<u8>>, Error> {
|
fn get(&self, digest: &[u8; 32]) -> Result<Option<Vec<u8>>, Error> {
|
||||||
match self.db.get(digest) {
|
match self.db.get(digest) {
|
||||||
Ok(None) => Ok(None),
|
Ok(None) => Ok(None),
|
||||||
Ok(Some(data)) => {
|
Ok(Some(data)) => {
|
||||||
|
@ -59,12 +59,12 @@ impl ChunkService for SledChunkService {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[instrument(name = "SledChunkService::put", skip(self, data))]
|
#[instrument(name = "SledChunkService::put", skip(self, data))]
|
||||||
fn put(&self, data: Vec<u8>) -> Result<Vec<u8>, Error> {
|
fn put(&self, data: Vec<u8>) -> Result<[u8; 32], Error> {
|
||||||
let digest = blake3::hash(&data).as_bytes().to_vec();
|
let digest = blake3::hash(&data);
|
||||||
let result = self.db.insert(&digest, data);
|
let result = self.db.insert(&digest.as_bytes(), data);
|
||||||
if let Err(e) = result {
|
if let Err(e) = result {
|
||||||
return Err(Error::StorageError(e.to_string()));
|
return Err(Error::StorageError(e.to_string()));
|
||||||
}
|
}
|
||||||
Ok(digest)
|
Ok(digest.as_bytes().clone())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,7 @@ use super::ChunkService;
|
||||||
pub fn upload_chunk<CS: ChunkService>(
|
pub fn upload_chunk<CS: ChunkService>(
|
||||||
chunk_service: &CS,
|
chunk_service: &CS,
|
||||||
chunk_data: Vec<u8>,
|
chunk_data: Vec<u8>,
|
||||||
) -> Result<Vec<u8>, Error> {
|
) -> Result<[u8; 32], Error> {
|
||||||
let mut hasher = blake3::Hasher::new();
|
let mut hasher = blake3::Hasher::new();
|
||||||
update_hasher(&mut hasher, &chunk_data);
|
update_hasher(&mut hasher, &chunk_data);
|
||||||
let digest = hasher.finalize();
|
let digest = hasher.finalize();
|
||||||
|
@ -19,9 +19,9 @@ pub fn upload_chunk<CS: ChunkService>(
|
||||||
}
|
}
|
||||||
let digest_resp = chunk_service.put(chunk_data)?;
|
let digest_resp = chunk_service.put(chunk_data)?;
|
||||||
|
|
||||||
assert_eq!(digest_resp, digest.as_bytes());
|
assert_eq!(&digest_resp, digest.as_bytes());
|
||||||
|
|
||||||
Ok(digest.as_bytes().to_vec())
|
Ok(digest.as_bytes().clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// reads through a reader, writes chunks to a [ChunkService] and returns a
|
/// reads through a reader, writes chunks to a [ChunkService] and returns a
|
||||||
|
@ -56,7 +56,7 @@ pub fn read_all_and_chunk<CS: ChunkService, R: Read>(
|
||||||
let chunk_digest = upload_chunk(chunk_service, chunk.data)?;
|
let chunk_digest = upload_chunk(chunk_service, chunk.data)?;
|
||||||
|
|
||||||
blob_meta.chunks.push(proto::blob_meta::ChunkMeta {
|
blob_meta.chunks.push(proto::blob_meta::ChunkMeta {
|
||||||
digest: chunk_digest,
|
digest: chunk_digest.to_vec(),
|
||||||
size: chunk_len,
|
size: chunk_len,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ impl<BS: BlobService, CS: ChunkService> GRPCBlobServiceWrapper<BS, CS> {
|
||||||
}
|
}
|
||||||
let digest_resp = chunk_service.put(chunk_data)?;
|
let digest_resp = chunk_service.put(chunk_data)?;
|
||||||
|
|
||||||
assert_eq!(digest_resp, digest.as_bytes());
|
assert_eq!(&digest_resp, digest.as_bytes());
|
||||||
|
|
||||||
Ok(digest.as_bytes().to_vec())
|
Ok(digest.as_bytes().to_vec())
|
||||||
}
|
}
|
||||||
|
@ -74,9 +74,14 @@ impl<
|
||||||
let req = request.into_inner();
|
let req = request.into_inner();
|
||||||
let (tx, rx) = channel(5);
|
let (tx, rx) = channel(5);
|
||||||
|
|
||||||
|
let req_digest: [u8; 32] = req
|
||||||
|
.digest
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_e| Status::invalid_argument("invalid digest length"))?;
|
||||||
|
|
||||||
// query the chunk service for more detailed blob info
|
// query the chunk service for more detailed blob info
|
||||||
let stat_resp = self.blob_service.stat(&super::StatBlobRequest {
|
let stat_resp = self.blob_service.stat(&super::StatBlobRequest {
|
||||||
digest: req.digest.to_vec(),
|
digest: req_digest.to_vec(),
|
||||||
include_chunks: true,
|
include_chunks: true,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
})?;
|
})?;
|
||||||
|
@ -86,7 +91,7 @@ impl<
|
||||||
// If the stat didn't return any blobmeta, the client might
|
// If the stat didn't return any blobmeta, the client might
|
||||||
// still have asked for a single chunk to be read.
|
// still have asked for a single chunk to be read.
|
||||||
// Check the chunkstore.
|
// Check the chunkstore.
|
||||||
if let Some(data) = self.chunk_service.get(&req.digest)? {
|
if let Some(data) = self.chunk_service.get(&req_digest)? {
|
||||||
// We already know the hash matches, and contrary to
|
// We already know the hash matches, and contrary to
|
||||||
// iterating over a blobmeta, we can't know the size,
|
// iterating over a blobmeta, we can't know the size,
|
||||||
// so send the contents of that chunk over,
|
// so send the contents of that chunk over,
|
||||||
|
@ -101,7 +106,7 @@ impl<
|
||||||
} else {
|
} else {
|
||||||
return Err(Status::not_found(format!(
|
return Err(Status::not_found(format!(
|
||||||
"blob {} not found",
|
"blob {} not found",
|
||||||
BASE64.encode(&req.digest),
|
BASE64.encode(&req_digest),
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -118,13 +123,15 @@ impl<
|
||||||
// request chunk.
|
// request chunk.
|
||||||
// We don't need to validate the digest again, as
|
// We don't need to validate the digest again, as
|
||||||
// that's required for all implementations of ChunkService.
|
// that's required for all implementations of ChunkService.
|
||||||
let res = match chunk_client.get(&chunkmeta.digest) {
|
// TODO: handle error
|
||||||
|
let chunkmeta_digest = &chunkmeta.digest.try_into().unwrap();
|
||||||
|
let res = match chunk_client.get(&chunkmeta_digest) {
|
||||||
Err(e) => Err(e.into()),
|
Err(e) => Err(e.into()),
|
||||||
// TODO: make this a separate error type
|
// TODO: make this a separate error type
|
||||||
Ok(None) => Err(Error::StorageError(format!(
|
Ok(None) => Err(Error::StorageError(format!(
|
||||||
"consistency error: chunk {} for blob {} not found",
|
"consistency error: chunk {} for blob {} not found",
|
||||||
BASE64.encode(&chunkmeta.digest),
|
BASE64.encode(chunkmeta_digest),
|
||||||
BASE64.encode(&req.digest),
|
BASE64.encode(&req_digest),
|
||||||
))
|
))
|
||||||
.into()),
|
.into()),
|
||||||
Ok(Some(data)) => {
|
Ok(Some(data)) => {
|
||||||
|
@ -133,8 +140,8 @@ impl<
|
||||||
if data.len() as u32 != chunkmeta.size {
|
if data.len() as u32 != chunkmeta.size {
|
||||||
Err(Error::StorageError(format!(
|
Err(Error::StorageError(format!(
|
||||||
"consistency error: chunk {} for blob {} has wrong size, expected {}, got {}",
|
"consistency error: chunk {} for blob {} has wrong size, expected {}, got {}",
|
||||||
BASE64.encode(&chunkmeta.digest),
|
BASE64.encode(chunkmeta_digest),
|
||||||
BASE64.encode(&req.digest),
|
BASE64.encode(&req_digest),
|
||||||
chunkmeta.size,
|
chunkmeta.size,
|
||||||
data.len(),
|
data.len(),
|
||||||
)).into())
|
)).into())
|
||||||
|
|
Loading…
Reference in a new issue