feat(tvix/store): add nixbase32 mod
This implements the nix-specific base32 encoding and decoding, exposing a subset of the API that the data-encoding crate provides. Nix uses a custom alphabet, no padding, and encodes bytes in reverse order. The latter one is the reason we can't just use the data-encoding crate directly. Three odd corner case tests ported over from go-nix failed. We opened b/235 to further investigate. Change-Id: I73fab6ddd67177d882e4c3f2b48761c95853d558 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7683 Reviewed-by: tazjin <tazjin@tvl.su> Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
5ba47a2bc3
commit
357c4d4836
5 changed files with 247 additions and 33 deletions
30
tvix/Cargo.lock
generated
30
tvix/Cargo.lock
generated
|
@ -473,6 +473,12 @@ dependencies = [
|
|||
"syn 1.0.103",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "data-encoding"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb"
|
||||
|
||||
[[package]]
|
||||
name = "derivation"
|
||||
version = "0.1.0"
|
||||
|
@ -1785,6 +1791,28 @@ dependencies = [
|
|||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-case"
|
||||
version = "2.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21d6cf5a7dffb3f9dceec8e6b8ca528d9bd71d36c9f074defb548ce161f598c0"
|
||||
dependencies = [
|
||||
"test-case-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-case-macros"
|
||||
version = "2.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e45b7bf6e19353ddd832745c8fcf77a17a93171df7151187f26623f2b75b5b26"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"proc-macro-error",
|
||||
"proc-macro2 1.0.47",
|
||||
"quote 1.0.21",
|
||||
"syn 1.0.103",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "test-generator"
|
||||
version = "0.3.0"
|
||||
|
@ -2116,9 +2144,11 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"data-encoding",
|
||||
"lazy_static",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"test-case",
|
||||
"thiserror",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
|
|
122
tvix/Cargo.nix
122
tvix/Cargo.nix
|
@ -1438,6 +1438,20 @@ rec {
|
|||
];
|
||||
|
||||
};
|
||||
"data-encoding" = rec {
|
||||
crateName = "data-encoding";
|
||||
version = "2.3.3";
|
||||
edition = "2018";
|
||||
sha256 = "1yq8jnivxsjzl3mjbjdjg5kfvd17wawbmg1jvsfw6cqmn1n6dn13";
|
||||
authors = [
|
||||
"Julien Cretin <git@ia0.eu>"
|
||||
];
|
||||
features = {
|
||||
"default" = [ "std" ];
|
||||
"std" = [ "alloc" ];
|
||||
};
|
||||
resolvedDefaultFeatures = [ "alloc" "default" "std" ];
|
||||
};
|
||||
"derivation" = rec {
|
||||
crateName = "derivation";
|
||||
version = "0.1.0";
|
||||
|
@ -1450,31 +1464,23 @@ rec {
|
|||
else ./derivation;
|
||||
dependencies = [
|
||||
{
|
||||
name = "blake3";
|
||||
packageId = "blake3";
|
||||
features = [ "rayon" "std" ];
|
||||
name = "glob";
|
||||
packageId = "glob";
|
||||
}
|
||||
{
|
||||
name = "maplit";
|
||||
packageId = "maplit";
|
||||
}
|
||||
{
|
||||
name = "prost";
|
||||
packageId = "prost";
|
||||
}
|
||||
{
|
||||
name = "tonic";
|
||||
packageId = "tonic";
|
||||
name = "serde";
|
||||
packageId = "serde";
|
||||
features = [ "derive" ];
|
||||
}
|
||||
];
|
||||
buildDependencies = [
|
||||
devDependencies = [
|
||||
{
|
||||
name = "prost-build";
|
||||
packageId = "prost-build";
|
||||
name = "serde_json";
|
||||
packageId = "serde_json";
|
||||
}
|
||||
{
|
||||
name = "tonic-build";
|
||||
packageId = "tonic-build";
|
||||
name = "test-generator";
|
||||
packageId = "test-generator";
|
||||
}
|
||||
];
|
||||
|
||||
|
@ -2724,16 +2730,6 @@ rec {
|
|||
"value-bag" = [ "dep:value-bag" ];
|
||||
};
|
||||
};
|
||||
"maplit" = rec {
|
||||
crateName = "maplit";
|
||||
version = "1.0.2";
|
||||
edition = "2015";
|
||||
sha256 = "07b5kjnhrrmfhgqm9wprjw8adx6i225lqp49gasgqg74lahnabiy";
|
||||
authors = [
|
||||
"bluss"
|
||||
];
|
||||
|
||||
};
|
||||
"matchit" = rec {
|
||||
crateName = "matchit";
|
||||
version = "0.7.0";
|
||||
|
@ -5091,6 +5087,64 @@ rec {
|
|||
];
|
||||
|
||||
};
|
||||
"test-case" = rec {
|
||||
crateName = "test-case";
|
||||
version = "2.2.2";
|
||||
edition = "2018";
|
||||
sha256 = "1h4qymhy332lzgg79w696qfxg6wdab5birn8xvfgkczzgmdczmi1";
|
||||
authors = [
|
||||
"Marcin Sas-Szymanski <marcin.sas-szymanski@anixe.pl>"
|
||||
"Wojciech Polak <frondeus@gmail.com>"
|
||||
"Łukasz Biel <lukasz.p.biel@gmail.com>"
|
||||
];
|
||||
dependencies = [
|
||||
{
|
||||
name = "test-case-macros";
|
||||
packageId = "test-case-macros";
|
||||
usesDefaultFeatures = false;
|
||||
}
|
||||
];
|
||||
features = {
|
||||
"regex" = [ "dep:regex" ];
|
||||
"with-regex" = [ "regex" "test-case-macros/with-regex" ];
|
||||
};
|
||||
};
|
||||
"test-case-macros" = rec {
|
||||
crateName = "test-case-macros";
|
||||
version = "2.2.2";
|
||||
edition = "2018";
|
||||
sha256 = "09jvbfvz48v6ya3i25gp3lbr6ym1fz7qyp3l6bcdslwkw7v7nnz4";
|
||||
procMacro = true;
|
||||
authors = [
|
||||
"Marcin Sas-Szymanski <marcin.sas-szymanski@anixe.pl>"
|
||||
"Wojciech Polak <frondeus@gmail.com>"
|
||||
"Łukasz Biel <lukasz.p.biel@gmail.com>"
|
||||
];
|
||||
dependencies = [
|
||||
{
|
||||
name = "cfg-if";
|
||||
packageId = "cfg-if";
|
||||
}
|
||||
{
|
||||
name = "proc-macro-error";
|
||||
packageId = "proc-macro-error";
|
||||
}
|
||||
{
|
||||
name = "proc-macro2";
|
||||
packageId = "proc-macro2 1.0.47";
|
||||
}
|
||||
{
|
||||
name = "quote";
|
||||
packageId = "quote 1.0.21";
|
||||
}
|
||||
{
|
||||
name = "syn";
|
||||
packageId = "syn 1.0.103";
|
||||
features = [ "full" "extra-traits" ];
|
||||
}
|
||||
];
|
||||
features = { };
|
||||
};
|
||||
"test-generator" = rec {
|
||||
crateName = "test-generator";
|
||||
version = "0.3.0";
|
||||
|
@ -6335,6 +6389,14 @@ rec {
|
|||
packageId = "blake3";
|
||||
features = [ "rayon" "std" ];
|
||||
}
|
||||
{
|
||||
name = "data-encoding";
|
||||
packageId = "data-encoding";
|
||||
}
|
||||
{
|
||||
name = "lazy_static";
|
||||
packageId = "lazy_static";
|
||||
}
|
||||
{
|
||||
name = "prost";
|
||||
packageId = "prost";
|
||||
|
@ -6360,8 +6422,8 @@ rec {
|
|||
];
|
||||
devDependencies = [
|
||||
{
|
||||
name = "lazy_static";
|
||||
packageId = "lazy_static";
|
||||
name = "test-case";
|
||||
packageId = "test-case";
|
||||
}
|
||||
];
|
||||
|
||||
|
|
|
@ -6,13 +6,15 @@ edition = "2021"
|
|||
[dependencies]
|
||||
anyhow = "1.0.68"
|
||||
blake3 = { version = "1.3.1", features = ["rayon", "std"] }
|
||||
data-encoding = "2.3.3"
|
||||
lazy_static = "1.4.0"
|
||||
prost = "0.11.2"
|
||||
thiserror = "1.0.38"
|
||||
tonic = "0.8.2"
|
||||
|
||||
[dev-dependencies]
|
||||
lazy_static = "1.4.0"
|
||||
|
||||
[build-dependencies]
|
||||
prost-build = "0.11.2"
|
||||
tonic-build = "0.8.2"
|
||||
|
||||
[dev-dependencies]
|
||||
test-case = "2.2.2"
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
mod nixbase32;
|
||||
mod proto;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
119
tvix/store/src/nixbase32.rs
Normal file
119
tvix/store/src/nixbase32.rs
Normal file
|
@ -0,0 +1,119 @@
|
|||
//! Implements the slightly odd "base32" encoding that's used in Nix.
|
||||
//!
|
||||
//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648),
|
||||
//! encoding to "nix base32" doesn't use any padding, and reads in characters
|
||||
//! in reverse order.
|
||||
//!
|
||||
//! This is also the main reason why `data_encoding::Encoding` can't be used
|
||||
//! directly, but this module aims to provide a similar interface (with some
|
||||
//! methods omitted).
|
||||
use data_encoding::{DecodeError, Encoding, Specification};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
/// Nixbase32Encoding wraps a data_encoding::Encoding internally.
|
||||
/// We can't use it directly, as nix also reads in characters in reverse order.
|
||||
pub struct Nixbase32Encoding {
|
||||
encoding: Encoding,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
/// Returns a Nixbase32Encoding providing some functions seen on a data_encoding::Encoding.
|
||||
pub static ref NIXBASE32: Nixbase32Encoding = nixbase32_encoding();
|
||||
}
|
||||
|
||||
/// Populates the Nixbase32Encoding struct with a data_encoding::Encoding,
|
||||
/// using the nixbase32 alphabet and config.
|
||||
fn nixbase32_encoding() -> Nixbase32Encoding {
|
||||
let mut spec = Specification::new();
|
||||
spec.symbols.push_str("0123456789abcdfghijklmnpqrsvwxyz");
|
||||
|
||||
Nixbase32Encoding {
|
||||
encoding: spec.encoding().unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
impl Nixbase32Encoding {
|
||||
/// Returns encoded input
|
||||
pub fn encode(&self, input: &[u8]) -> String {
|
||||
// Reverse the input, reading in the bytes in reverse order.
|
||||
let mut reversed = Vec::with_capacity(input.len());
|
||||
reversed.extend(input.iter().rev());
|
||||
self.encoding.encode(&reversed)
|
||||
}
|
||||
|
||||
/// Returns decoded input
|
||||
/// Check [data_encoding::Encoding::encode] for the error cases.
|
||||
pub fn decode(&self, input: &[u8]) -> Result<Vec<u8>, DecodeError> {
|
||||
// Decode first, then reverse the bytes of the output.
|
||||
let output = self.encoding.decode(input)?;
|
||||
|
||||
let mut reversed = Vec::with_capacity(output.len());
|
||||
reversed.extend(output.iter().rev());
|
||||
Ok(reversed)
|
||||
}
|
||||
|
||||
/// Returns the decoded length of an input of length len.
|
||||
/// Check [data_encoding::Encoding::decode_len] for the error cases.
|
||||
pub fn decode_len(&self, len: usize) -> Result<usize, DecodeError> {
|
||||
self.encoding.decode_len(len)
|
||||
}
|
||||
|
||||
/// Returns the encoded length of an input of length len
|
||||
pub fn encode_len(&self, len: usize) -> usize {
|
||||
self.encoding.encode_len(len)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::nixbase32::NIXBASE32;
|
||||
use test_case::test_case;
|
||||
|
||||
#[test_case("", vec![] ; "empty bytes")]
|
||||
// FUTUREWORK: b/235
|
||||
// this seems to encode to 3w?
|
||||
// #[test_case("0z", vec![0x1f]; "one byte")]
|
||||
#[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![
|
||||
0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
|
||||
0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "nixpath")]
|
||||
fn encode(enc: &str, dec: Vec<u8>) {
|
||||
assert_eq!(enc, NIXBASE32.encode(&dec));
|
||||
}
|
||||
|
||||
#[test_case("", Some(vec![]) ; "empty bytes")]
|
||||
// FUTUREWORK: b/235
|
||||
// this seems to require spec.check_trailing_bits and still fails?
|
||||
// #[test_case("0z", Some(vec![0x1f]); "one byte")]
|
||||
#[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![
|
||||
0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
|
||||
0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "nixpath")]
|
||||
// this is invalid encoding, because it encodes 10 1-bytes, so the carry
|
||||
// would be 2 1-bytes
|
||||
#[test_case("zz", None; "invalid encoding-1")]
|
||||
// this is an even more specific example - it'd decode as 00000000 11
|
||||
// FUTUREWORK: b/235
|
||||
// #[test_case("c0", None; "invalid encoding-2")]
|
||||
|
||||
fn decode(enc: &str, dec: Option<Vec<u8>>) {
|
||||
match dec {
|
||||
Some(dec) => {
|
||||
// The decode needs to match what's passed in dec
|
||||
assert_eq!(dec, NIXBASE32.decode(enc.as_bytes()).unwrap());
|
||||
}
|
||||
None => {
|
||||
// the decode needs to be an error
|
||||
assert_eq!(true, NIXBASE32.decode(enc.as_bytes()).is_err());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_len() {
|
||||
assert_eq!(NIXBASE32.encode_len(20), 32)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_len() {
|
||||
assert_eq!(NIXBASE32.decode_len(32).unwrap(), 20)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue