feat(tvix/store): bump fastcdc, use v2020 version

This switches away from the less canonical "ronomon" version to the
implementation as described in the
[paper](https://ieeexplore.ieee.org/document/9055082) by Wen Xia, et
al., in 2020.

That version uses 64-bit hash values and tends to be faster than both
the ronomon and v2016 versions, and produces the same chunking as the
2016 version.

As per https://docs.rs/fastcdc/latest/fastcdc/#implementations-1, it's
the recommended choice.

The crate also gained support for streaming version of chunkers:
https://docs.rs/fastcdc/latest/fastcdc/#large-data, which might be
useful.

Change-Id: Ieabec3da54eb2b73c045cb54e51f7a216f63641e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8134
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2023-02-17 18:15:21 +01:00 committed by flokli
parent fbabcf0420
commit a40d2dcdcd
4 changed files with 6 additions and 6 deletions

4
tvix/Cargo.lock generated
View file

@ -675,9 +675,9 @@ checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
[[package]]
name = "fastcdc"
version = "2.0.0"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8093ae083a5098c6ac2e898350c704b2c70d092f9a5c0ad6f43703ec89a872b1"
checksum = "c47726595a8a071d7d8045a837d1179b1964633e256300675aa50c31284a23e2"
[[package]]
name = "fastrand"

View file

@ -1915,9 +1915,9 @@ rec {
};
"fastcdc" = rec {
crateName = "fastcdc";
version = "2.0.0";
version = "3.0.0";
edition = "2018";
sha256 = "1cbjm24yq0rpykb0lp4s5w4hvixj0k3m10w95sncd62h784ax4w0";
sha256 = "1qi398l32355b9kh0qr57rin86cv2z8kga25h1yis1wab9cjcxy4";
authors = [
"Nathan Fiedler <nathanfiedler@fastmail.fm>"
];

View file

@ -12,7 +12,7 @@ blake3 = { version = "1.3.1", features = ["rayon", "std"] }
clap = { version = "4.0", features = ["derive", "env"] }
count-write = "0.1.0"
data-encoding = "2.3.3"
fastcdc = "2.0.0"
fastcdc = "3.0.0"
lazy_static = "1.4.0"
prost = "0.11.2"
sha2 = "0.10.6"

View file

@ -157,7 +157,7 @@ impl<
// initialize a new chunker
// TODO: play with chunking sizes
let chunker = fastcdc::FastCDC::new(
let chunker = fastcdc::v2020::FastCDC::new(
&blob_contents,
64 * 1024 / 4, // min
64 * 1024, // avg