feat(tvix/store): implement Directory::validate()
Change-Id: I4c6ae79d705b8e19a3e2ed54812366e88935d7a6 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7650 Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
This commit is contained in:
parent
0c7e545fd0
commit
7163d3ad37
4 changed files with 351 additions and 13 deletions
14
tvix/Cargo.lock
generated
14
tvix/Cargo.lock
generated
|
@ -34,9 +34,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
|||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.66"
|
||||
version = "1.0.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6"
|
||||
checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61"
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
|
@ -1775,18 +1775,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.37"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e"
|
||||
checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.37"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb"
|
||||
checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.47",
|
||||
"quote 1.0.21",
|
||||
|
@ -2066,10 +2066,12 @@ version = "0.0.0"
|
|||
name = "tvix-store"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"lazy_static",
|
||||
"prost",
|
||||
"prost-build",
|
||||
"thiserror",
|
||||
"tonic",
|
||||
"tonic-build",
|
||||
]
|
||||
|
|
|
@ -200,9 +200,9 @@ rec {
|
|||
};
|
||||
"anyhow" = rec {
|
||||
crateName = "anyhow";
|
||||
version = "1.0.66";
|
||||
version = "1.0.68";
|
||||
edition = "2018";
|
||||
sha256 = "1xj3ahmwjlbiqsajhkaa0q6hqwb4l3l5rkfxa7jk1498r3fn2qi1";
|
||||
sha256 = "0qdb5x89jpngjrl40fpp8047xlydm38n8bz8iaml3lcds64zkcic";
|
||||
authors = [
|
||||
"David Tolnay <dtolnay@gmail.com>"
|
||||
];
|
||||
|
@ -5028,9 +5028,9 @@ rec {
|
|||
};
|
||||
"thiserror" = rec {
|
||||
crateName = "thiserror";
|
||||
version = "1.0.37";
|
||||
version = "1.0.38";
|
||||
edition = "2018";
|
||||
sha256 = "0gky83x4i87gd87w3fknnp920wvk9yycp7dgkf5h3jg364vb7phh";
|
||||
sha256 = "1l7yh18iqcr2jnl6qjx3ywvhny98cvda3biwc334ap3xm65d373a";
|
||||
authors = [
|
||||
"David Tolnay <dtolnay@gmail.com>"
|
||||
];
|
||||
|
@ -5044,9 +5044,9 @@ rec {
|
|||
};
|
||||
"thiserror-impl" = rec {
|
||||
crateName = "thiserror-impl";
|
||||
version = "1.0.37";
|
||||
version = "1.0.38";
|
||||
edition = "2018";
|
||||
sha256 = "1fydmpksd14x1mkc24zas01qjssz8q43sbn2ywl6n527dda1fbcq";
|
||||
sha256 = "0vzkcjqkzzgrwwby92xvnbp11a8d70b1gkybm0zx1r458spjgcqz";
|
||||
procMacro = true;
|
||||
authors = [
|
||||
"David Tolnay <dtolnay@gmail.com>"
|
||||
|
@ -6162,6 +6162,10 @@ rec {
|
|||
then lib.cleanSourceWith { filter = sourceFilter; src = ./store; }
|
||||
else ./store;
|
||||
dependencies = [
|
||||
{
|
||||
name = "anyhow";
|
||||
packageId = "anyhow";
|
||||
}
|
||||
{
|
||||
name = "blake3";
|
||||
packageId = "blake3";
|
||||
|
@ -6175,6 +6179,10 @@ rec {
|
|||
name = "prost";
|
||||
packageId = "prost";
|
||||
}
|
||||
{
|
||||
name = "thiserror";
|
||||
packageId = "thiserror";
|
||||
}
|
||||
{
|
||||
name = "tonic";
|
||||
packageId = "tonic";
|
||||
|
|
|
@ -4,9 +4,11 @@ version = "0.1.0"
|
|||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.68"
|
||||
blake3 = { version = "1.3.1", features = ["rayon", "std"] }
|
||||
lazy_static = "1.4.0"
|
||||
prost = "0.11.2"
|
||||
thiserror = "1.0.38"
|
||||
tonic = "0.8.2"
|
||||
|
||||
[build-dependencies]
|
||||
|
|
|
@ -1,7 +1,82 @@
|
|||
use anyhow::Result;
|
||||
use std::collections::HashSet;
|
||||
use thiserror::Error;
|
||||
|
||||
use prost::Message;
|
||||
|
||||
tonic::include_proto!("tvix.store.v1");
|
||||
|
||||
/// Errors that can occur during the validation of Directory messages.
|
||||
#[derive(Debug, Error, PartialEq)]
|
||||
pub enum ValidateDirectoryError {
|
||||
/// Elements are not in sorted order
|
||||
#[error("{0} is not sorted")]
|
||||
WrongSorting(String),
|
||||
/// Multiple elements with the same name encountered
|
||||
#[error("{0} is a duplicate name")]
|
||||
DuplicateName(String),
|
||||
/// Invalid name encountered
|
||||
#[error("Invalid name in {0}")]
|
||||
InvalidName(String),
|
||||
/// Invalid digest length encountered
|
||||
#[error("Ivalid Digest length: {0}")]
|
||||
InvalidDigestLen(usize),
|
||||
}
|
||||
|
||||
/// Checks a name for validity.
|
||||
/// We disallow slashes, null bytes, '.', '..' and the empty string.
|
||||
/// Depending on the context, a [DirectoryNode], [FileNode] or [SymlinkNode]
|
||||
/// message with an empty string as name is allowed, but they don't occur
|
||||
/// inside a Directory message.
|
||||
fn validate_node_name(name: &str) -> Result<(), ValidateDirectoryError> {
|
||||
if name == "" || name == ".." || name == "." || name.contains("\x00") || name.contains("/") {
|
||||
return Err(ValidateDirectoryError::InvalidName(
|
||||
name.to_string().clone(),
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks a digest for validity.
|
||||
/// Digests are 32 bytes long, as we store blake3 digests.
|
||||
fn validate_digest(digest: &Vec<u8>) -> Result<(), ValidateDirectoryError> {
|
||||
if digest.len() != 32 {
|
||||
return Err(ValidateDirectoryError::InvalidDigestLen(digest.len()));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Accepts a name, and a mutable reference to the previous name.
|
||||
/// If the passed name is larger than the previous one, the reference is updated.
|
||||
/// If it's not, an error is returned.
|
||||
fn update_if_lt_prev<'set, 'n>(
|
||||
prev_name: &'set mut &'n str,
|
||||
name: &'n str,
|
||||
) -> Result<(), ValidateDirectoryError> {
|
||||
if *name < **prev_name {
|
||||
return Err(ValidateDirectoryError::WrongSorting(
|
||||
name.to_string().clone(),
|
||||
));
|
||||
}
|
||||
*prev_name = name;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Inserts the given name into a HashSet if it's not already in there.
|
||||
/// If it is, an error is returned.
|
||||
fn insert_once<'n>(
|
||||
seen_names: &mut HashSet<&'n str>,
|
||||
name: &'n str,
|
||||
) -> Result<(), ValidateDirectoryError> {
|
||||
if seen_names.get(name).is_some() {
|
||||
return Err(ValidateDirectoryError::DuplicateName(
|
||||
name.to_string().clone(),
|
||||
));
|
||||
}
|
||||
seen_names.insert(name);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl Directory {
|
||||
// The size of a directory is the number of all regular and symlink elements,
|
||||
// the number of directory elements, and their size fields.
|
||||
|
@ -19,11 +94,52 @@ impl Directory {
|
|||
|
||||
hasher.update(&self.encode_to_vec()).finalize().as_bytes()[..].to_vec()
|
||||
}
|
||||
|
||||
/// validate checks the directory for invalid data, such as:
|
||||
/// - violations of name restrictions
|
||||
/// - invalid digest lengths
|
||||
/// - not properly sorted lists
|
||||
/// - duplicate names in the three lists
|
||||
pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
|
||||
let mut seen_names: HashSet<&str> = HashSet::new();
|
||||
|
||||
let mut last_directory_name: &str = "";
|
||||
let mut last_file_name: &str = "";
|
||||
let mut last_symlink_name: &str = "";
|
||||
|
||||
// check directories
|
||||
for directory_node in &self.directories {
|
||||
validate_node_name(&directory_node.name)?;
|
||||
validate_digest(&directory_node.digest)?;
|
||||
|
||||
update_if_lt_prev(&mut last_directory_name, &mut directory_node.name.as_str())?;
|
||||
insert_once(&mut seen_names, &directory_node.name.as_str())?;
|
||||
}
|
||||
|
||||
// check files
|
||||
for file_node in &self.files {
|
||||
validate_node_name(&file_node.name)?;
|
||||
validate_digest(&file_node.digest)?;
|
||||
|
||||
update_if_lt_prev(&mut last_file_name, &mut file_node.name.as_str())?;
|
||||
insert_once(&mut seen_names, &file_node.name.as_str())?;
|
||||
}
|
||||
|
||||
// check symlinks
|
||||
for symlink_node in &self.symlinks {
|
||||
validate_node_name(&symlink_node.name)?;
|
||||
|
||||
update_if_lt_prev(&mut last_symlink_name, &mut symlink_node.name.as_str())?;
|
||||
insert_once(&mut seen_names, &symlink_node.name.as_str())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Directory, DirectoryNode, FileNode, SymlinkNode};
|
||||
use super::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
lazy_static! {
|
||||
|
@ -98,4 +214,214 @@ mod tests {
|
|||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_directory_validate_empty() {
|
||||
let d = Directory::default();
|
||||
assert_eq!(d.validate(), Ok(()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_directory_validate_invalid_names() {
|
||||
{
|
||||
let d = Directory {
|
||||
directories: vec![DirectoryNode {
|
||||
name: "".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::InvalidName(n) => {
|
||||
assert_eq!(n, "")
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
let d = Directory {
|
||||
directories: vec![DirectoryNode {
|
||||
name: ".".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::InvalidName(n) => {
|
||||
assert_eq!(n, ".")
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
let d = Directory {
|
||||
files: vec![FileNode {
|
||||
name: "..".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
executable: false,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::InvalidName(n) => {
|
||||
assert_eq!(n, "..")
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
let d = Directory {
|
||||
symlinks: vec![SymlinkNode {
|
||||
name: "\x00".to_string(),
|
||||
target: "foo".to_string(),
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::InvalidName(n) => {
|
||||
assert_eq!(n, "\x00")
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
};
|
||||
}
|
||||
|
||||
{
|
||||
let d = Directory {
|
||||
symlinks: vec![SymlinkNode {
|
||||
name: "foo/bar".to_string(),
|
||||
target: "foo".to_string(),
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::InvalidName(n) => {
|
||||
assert_eq!(n, "foo/bar")
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_directory_validate_invalid_digest() {
|
||||
let d = Directory {
|
||||
directories: vec![DirectoryNode {
|
||||
name: "foo".to_string(),
|
||||
digest: vec![0x00, 0x42], // invalid length
|
||||
size: 42,
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::InvalidDigestLen(n) => {
|
||||
assert_eq!(n, 2)
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_directory_validate_sorting() {
|
||||
// "b" comes before "a", bad.
|
||||
{
|
||||
let d = Directory {
|
||||
directories: vec![
|
||||
DirectoryNode {
|
||||
name: "b".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
DirectoryNode {
|
||||
name: "a".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::WrongSorting(s) => {
|
||||
assert_eq!(s, "a".to_string());
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
}
|
||||
}
|
||||
|
||||
// "a" exists twice, bad.
|
||||
{
|
||||
let d = Directory {
|
||||
directories: vec![
|
||||
DirectoryNode {
|
||||
name: "a".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
DirectoryNode {
|
||||
name: "a".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
match d.validate().expect_err("must fail") {
|
||||
ValidateDirectoryError::DuplicateName(s) => {
|
||||
assert_eq!(s, "a".to_string());
|
||||
}
|
||||
_ => panic!("unexpected error"),
|
||||
}
|
||||
}
|
||||
|
||||
// "a" comes before "b", all good.
|
||||
{
|
||||
let d = Directory {
|
||||
directories: vec![
|
||||
DirectoryNode {
|
||||
name: "a".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
DirectoryNode {
|
||||
name: "b".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
d.validate().expect("validate shouldn't error");
|
||||
}
|
||||
|
||||
// [b, c] and [a] are both properly sorted.
|
||||
{
|
||||
let d = Directory {
|
||||
directories: vec![
|
||||
DirectoryNode {
|
||||
name: "b".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
DirectoryNode {
|
||||
name: "c".to_string(),
|
||||
digest: DUMMY_DIGEST.to_vec(),
|
||||
size: 42,
|
||||
},
|
||||
],
|
||||
symlinks: vec![SymlinkNode {
|
||||
name: "a".to_string(),
|
||||
target: "foo".to_string(),
|
||||
}],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
d.validate().expect("validate shouldn't error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue