refactor(tvix): introduce nix-compat crate

Move nixbase32 and store_path into this.

This allows //tvix/cli to not pull in //tvix/store for now.

Change-Id: Id3a32867205d95794bc0d33b21d4cb3d9bafd02a
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7964
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
Florian Klink 2023-01-31 12:18:03 +01:00 committed by flokli
parent 8ea93bb646
commit c27bacd905
20 changed files with 109 additions and 27 deletions

View file

@ -1,6 +1,4 @@
pub mod nixbase32;
pub mod proto;
pub mod store_path;
pub mod dummy_blob_service;
pub mod sled_directory_service;

View file

@ -1,167 +0,0 @@
//! Implements the slightly odd "base32" encoding that's used in Nix.
//!
//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648),
//! encoding to "nix base32" doesn't use any padding, and reads in characters
//! in reverse order.
//!
//! This is also the main reason why we can't use `data_encoding::Encoding` -
//! it gets things wrong if there normally would be a need for padding.
use std::fmt::Write;
use thiserror::Error;
const ALPHABET: &'static [u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz";
/// Errors that can occur while decoding nixbase32-encoded data.
#[derive(Debug, Eq, PartialEq, Error)]
pub enum Nixbase32DecodeError {
#[error("character {0:x} not in alphabet")]
CharacterNotInAlphabet(u8),
#[error("nonzero carry")]
NonzeroCarry(),
}
/// Returns encoded input
pub fn encode(input: &[u8]) -> String {
let output_len = encode_len(input.len());
let mut output = String::with_capacity(output_len);
if output_len > 0 {
for n in (0..=output_len - 1).rev() {
let b = n * 5; // bit offset within the entire input
let i = b / 8; // input byte index
let j = b % 8; // bit offset within that input byte
let mut c = input[i] >> j;
if i + 1 < input.len() {
// we want to right shift, and discard shifted out bits (unchecked)
// To do this without panicing, we need to do the shifting in u16
// and convert back to u8 afterwards.
c |= ((input[i + 1] as u16) << 8 - j as u16) as u8
}
output
.write_char(ALPHABET[(c & 0x1f) as usize] as char)
.unwrap();
}
}
output
}
/// This maps a nixbase32-encoded character to its binary representation, which
/// is also the index of the character in the alphabet.
fn decode_char(encoded_char: &u8) -> Option<u8> {
Some(match encoded_char {
b'0'..=b'9' => encoded_char - b'0',
b'a'..=b'd' => encoded_char - b'a' + 10_u8,
b'f'..=b'n' => encoded_char - b'f' + 14_u8,
b'p'..=b's' => encoded_char - b'p' + 23_u8,
b'v'..=b'z' => encoded_char - b'v' + 27_u8,
_ => return None,
})
}
/// Returns decoded input
pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> {
let output_len = decode_len(input.len());
let mut output: Vec<u8> = vec![0x00; output_len];
// loop over all characters in reverse, and keep the iteration count in n.
for (n, c) in input.iter().rev().enumerate() {
match decode_char(c) {
None => return Err(Nixbase32DecodeError::CharacterNotInAlphabet(*c)),
Some(c_decoded) => {
let b = n * 5;
let i = b / 8;
let j = b % 8;
let val = (c_decoded as u16).rotate_left(j as u32);
output[i] |= (val & 0x00ff) as u8;
let carry = ((val & 0xff00) >> 8) as u8;
// if we're at the end of dst…
if i == output_len - 1 {
// but have a nonzero carry, the encoding is invalid.
if carry != 0 {
return Err(Nixbase32DecodeError::NonzeroCarry());
}
} else {
output[i + 1] |= carry;
}
}
}
}
Ok(output)
}
/// Returns the decoded length of an input of length len.
pub fn decode_len(len: usize) -> usize {
return (len * 5) / 8;
}
/// Returns the encoded length of an input of length len
pub fn encode_len(len: usize) -> usize {
if len == 0 {
return 0;
}
return (len * 8 - 1) / 5 + 1;
}
#[cfg(test)]
mod tests {
use test_case::test_case;
#[test_case("", vec![] ; "empty bytes")]
#[test_case("0z", vec![0x1f]; "one byte")]
#[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![
0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "store path")]
#[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", vec![
0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30,
0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30,
]; "sha256")]
fn encode(enc: &str, dec: Vec<u8>) {
assert_eq!(enc, super::encode(&dec));
}
#[test_case("", Some(vec![]) ; "empty bytes")]
#[test_case("0z", Some(vec![0x1f]); "one byte")]
#[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![
0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "store path")]
#[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(vec![
0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30,
0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30,
]); "sha256")]
// this is invalid encoding, because it encodes 10 1-bytes, so the carry
// would be 2 1-bytes
#[test_case("zz", None; "invalid encoding-1")]
// this is an even more specific example - it'd decode as 00000000 11
#[test_case("c0", None; "invalid encoding-2")]
fn decode(enc: &str, dec: Option<Vec<u8>>) {
match dec {
Some(dec) => {
// The decode needs to match what's passed in dec
assert_eq!(dec, super::decode(enc.as_bytes()).unwrap());
}
None => {
// the decode needs to be an error
assert_eq!(true, super::decode(enc.as_bytes()).is_err());
}
}
}
#[test]
fn encode_len() {
assert_eq!(super::encode_len(20), 32)
}
#[test]
fn decode_len() {
assert_eq!(super::decode_len(32), 20)
}
}

View file

@ -5,7 +5,7 @@ use thiserror::Error;
use prost::Message;
use crate::store_path::{ParseStorePathError, StorePath};
use nix_compat::store_path::{ParseStorePathError, StorePath};
tonic::include_proto!("tvix.store.v1");

View file

@ -7,7 +7,7 @@ use crate::proto::CalculateNarResponse;
use crate::proto::GetPathInfoRequest;
use crate::proto::Node;
use crate::proto::PathInfo;
use crate::store_path::DIGEST_SIZE;
use nix_compat::store_path::DIGEST_SIZE;
use tonic::{Request, Response, Result, Status};
use tracing::{instrument, warn};

View file

@ -1,198 +0,0 @@
use crate::nixbase32::{self, Nixbase32DecodeError};
use std::fmt;
use thiserror::Error;
pub const DIGEST_SIZE: usize = 20;
// lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it
// manually and have an assert in the tests.
pub const ENCODED_DIGEST_SIZE: usize = 32;
// The store dir prefix, without trailing slash.
// That's usually where the Nix store is mounted at.
pub const STORE_DIR: &str = "/nix/store";
pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/";
/// Errors that can occur during the validation of name characters.
#[derive(Debug, PartialEq, Eq, Error)]
pub enum ParseStorePathError {
#[error("Dash is missing between hash and name")]
MissingDash(),
#[error("Hash encoding is invalid: {0}")]
InvalidHashEncoding(Nixbase32DecodeError),
#[error("Invalid name: {0}")]
InvalidName(String),
#[error("Tried to parse an absolute path which was missing the store dir prefix.")]
MissingStoreDir(),
}
/// Represents a path in the Nix store (a direct child of [STORE_DIR]).
///
/// It starts with a digest (20 bytes), [struct@NIXBASE32]-encoded, followed by
/// a `-`, and ends with a `name`, which is a string, consisting only of ASCCI
/// alphanumeric characters, or one of the following characters: `-`, `_`, `.`,
/// `+`, `?`, `=`.
///
/// The name is usually used to describe the pname and version of a package.
/// Derivations paths can also be represented as store paths, they end
/// with .drv.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct StorePath {
pub digest: [u8; DIGEST_SIZE],
pub name: String,
}
impl StorePath {
pub fn from_string(s: &str) -> Result<StorePath, ParseStorePathError> {
// the whole string needs to be at least:
//
// - 32 characters (encoded hash)
// - 1 dash
// - 1 character for the name
if s.len() < ENCODED_DIGEST_SIZE + 2 {
return Err(ParseStorePathError::InvalidName("".to_string()));
}
let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) {
Ok(decoded) => decoded,
Err(decoder_error) => {
return Err(ParseStorePathError::InvalidHashEncoding(decoder_error))
}
};
if s.as_bytes()[ENCODED_DIGEST_SIZE] != b'-' {
return Err(ParseStorePathError::MissingDash());
}
StorePath::validate_name(&s[ENCODED_DIGEST_SIZE + 2..])?;
Ok(StorePath {
name: s[ENCODED_DIGEST_SIZE + 1..].to_string(),
digest: digest.try_into().expect("size is known"),
})
}
/// Construct a [StorePath] from an absolute store path string.
/// That is a string starting with the store prefix (/nix/store)
pub fn from_absolute_path(s: &str) -> Result<StorePath, ParseStorePathError> {
match s.strip_prefix(STORE_DIR_WITH_SLASH) {
Some(s_stripped) => Self::from_string(s_stripped),
None => Err(ParseStorePathError::MissingStoreDir()),
}
}
/// Converts the [StorePath] to an absolute store path string.
/// That is a string starting with the store prefix (/nix/store)
pub fn to_absolute_path(&self) -> String {
format!("{}/{}", STORE_DIR, self)
}
/// Checks a given &str to match the restrictions for store path names.
pub fn validate_name(s: &str) -> Result<(), ParseStorePathError> {
for c in s.chars() {
if c.is_ascii_alphanumeric()
|| c == '-'
|| c == '_'
|| c == '.'
|| c == '+'
|| c == '?'
|| c == '='
{
continue;
}
return Err(ParseStorePathError::InvalidName(s.to_string()));
}
Ok(())
}
}
impl fmt::Display for StorePath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name)
}
}
#[cfg(test)]
mod tests {
use crate::nixbase32;
use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE};
use super::{ParseStorePathError, StorePath};
#[test]
fn encoded_digest_size() {
assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE));
}
#[test]
fn happy_path() {
let example_nix_path_str =
"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432";
let nixpath =
StorePath::from_string(&example_nix_path_str).expect("Error parsing example string");
let expected_digest: [u8; DIGEST_SIZE] = [
0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00,
];
assert_eq!("net-tools-1.60_p20170221182432", nixpath.name);
assert_eq!(nixpath.digest, expected_digest);
assert_eq!(example_nix_path_str, nixpath.to_string())
}
#[test]
fn invalid_hash_length() {
StorePath::from_string("00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432")
.expect_err("No error raised.");
}
#[test]
fn invalid_encoding_hash() {
StorePath::from_string("00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432")
.expect_err("No error raised.");
}
#[test]
fn more_than_just_the_bare_nix_store_path() {
StorePath::from_string(
"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp",
)
.expect_err("No error raised.");
}
#[test]
fn no_dash_between_hash_and_name() {
StorePath::from_string("00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432")
.expect_err("No error raised.");
}
#[test]
fn absolute_path() {
let example_nix_path_str =
"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432";
let nixpath_expected = StorePath::from_string(&example_nix_path_str).expect("must parse");
let nixpath_actual = StorePath::from_absolute_path(
"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432",
)
.expect("must parse");
assert_eq!(nixpath_expected, nixpath_actual);
assert_eq!(
"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432",
nixpath_actual.to_absolute_path(),
);
}
#[test]
fn absolute_path_missing_prefix() {
assert_eq!(
ParseStorePathError::MissingStoreDir(),
StorePath::from_absolute_path("foobar-123").expect_err("must fail")
);
}
}

View file

@ -1,8 +1,6 @@
use crate::{
proto::{self, Node, PathInfo, ValidatePathInfoError},
store_path::{ParseStorePathError, StorePath},
};
use crate::proto::{self, Node, PathInfo, ValidatePathInfoError};
use lazy_static::lazy_static;
use nix_compat::store_path::{ParseStorePathError, StorePath};
use test_case::test_case;
lazy_static! {