feat(tvix/store): implement Directory::validate()

Change-Id: I4c6ae79d705b8e19a3e2ed54812366e88935d7a6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7650
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2022-12-27 18:10:46 +01:00 committed by flokli
parent 0c7e545fd0
commit 7163d3ad37
4 changed files with 351 additions and 13 deletions

View file

@ -1,7 +1,82 @@
use anyhow::Result;
use std::collections::HashSet;
use thiserror::Error;
use prost::Message;
tonic::include_proto!("tvix.store.v1");
/// Errors that can occur during the validation of Directory messages.
#[derive(Debug, Error, PartialEq)]
pub enum ValidateDirectoryError {
/// Elements are not in sorted order
#[error("{0} is not sorted")]
WrongSorting(String),
/// Multiple elements with the same name encountered
#[error("{0} is a duplicate name")]
DuplicateName(String),
/// Invalid name encountered
#[error("Invalid name in {0}")]
InvalidName(String),
/// Invalid digest length encountered
#[error("Ivalid Digest length: {0}")]
InvalidDigestLen(usize),
}
/// Checks a name for validity.
/// We disallow slashes, null bytes, '.', '..' and the empty string.
/// Depending on the context, a [DirectoryNode], [FileNode] or [SymlinkNode]
/// message with an empty string as name is allowed, but they don't occur
/// inside a Directory message.
fn validate_node_name(name: &str) -> Result<(), ValidateDirectoryError> {
if name == "" || name == ".." || name == "." || name.contains("\x00") || name.contains("/") {
return Err(ValidateDirectoryError::InvalidName(
name.to_string().clone(),
));
}
Ok(())
}
/// Checks a digest for validity.
/// Digests are 32 bytes long, as we store blake3 digests.
fn validate_digest(digest: &Vec<u8>) -> Result<(), ValidateDirectoryError> {
if digest.len() != 32 {
return Err(ValidateDirectoryError::InvalidDigestLen(digest.len()));
}
Ok(())
}
/// Accepts a name, and a mutable reference to the previous name.
/// If the passed name is larger than the previous one, the reference is updated.
/// If it's not, an error is returned.
fn update_if_lt_prev<'set, 'n>(
prev_name: &'set mut &'n str,
name: &'n str,
) -> Result<(), ValidateDirectoryError> {
if *name < **prev_name {
return Err(ValidateDirectoryError::WrongSorting(
name.to_string().clone(),
));
}
*prev_name = name;
Ok(())
}
/// Inserts the given name into a HashSet if it's not already in there.
/// If it is, an error is returned.
fn insert_once<'n>(
seen_names: &mut HashSet<&'n str>,
name: &'n str,
) -> Result<(), ValidateDirectoryError> {
if seen_names.get(name).is_some() {
return Err(ValidateDirectoryError::DuplicateName(
name.to_string().clone(),
));
}
seen_names.insert(name);
Ok(())
}
impl Directory {
// The size of a directory is the number of all regular and symlink elements,
// the number of directory elements, and their size fields.
@ -19,11 +94,52 @@ impl Directory {
hasher.update(&self.encode_to_vec()).finalize().as_bytes()[..].to_vec()
}
/// validate checks the directory for invalid data, such as:
/// - violations of name restrictions
/// - invalid digest lengths
/// - not properly sorted lists
/// - duplicate names in the three lists
pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
let mut seen_names: HashSet<&str> = HashSet::new();
let mut last_directory_name: &str = "";
let mut last_file_name: &str = "";
let mut last_symlink_name: &str = "";
// check directories
for directory_node in &self.directories {
validate_node_name(&directory_node.name)?;
validate_digest(&directory_node.digest)?;
update_if_lt_prev(&mut last_directory_name, &mut directory_node.name.as_str())?;
insert_once(&mut seen_names, &directory_node.name.as_str())?;
}
// check files
for file_node in &self.files {
validate_node_name(&file_node.name)?;
validate_digest(&file_node.digest)?;
update_if_lt_prev(&mut last_file_name, &mut file_node.name.as_str())?;
insert_once(&mut seen_names, &file_node.name.as_str())?;
}
// check symlinks
for symlink_node in &self.symlinks {
validate_node_name(&symlink_node.name)?;
update_if_lt_prev(&mut last_symlink_name, &mut symlink_node.name.as_str())?;
insert_once(&mut seen_names, &symlink_node.name.as_str())?;
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::{Directory, DirectoryNode, FileNode, SymlinkNode};
use super::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
use lazy_static::lazy_static;
lazy_static! {
@ -98,4 +214,214 @@ mod tests {
]
)
}
#[test]
fn test_directory_validate_empty() {
let d = Directory::default();
assert_eq!(d.validate(), Ok(()));
}
#[test]
fn test_directory_validate_invalid_names() {
{
let d = Directory {
directories: vec![DirectoryNode {
name: "".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
}],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
assert_eq!(n, "")
}
_ => panic!("unexpected error"),
};
}
{
let d = Directory {
directories: vec![DirectoryNode {
name: ".".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
}],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
assert_eq!(n, ".")
}
_ => panic!("unexpected error"),
};
}
{
let d = Directory {
files: vec![FileNode {
name: "..".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
executable: false,
}],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
assert_eq!(n, "..")
}
_ => panic!("unexpected error"),
};
}
{
let d = Directory {
symlinks: vec![SymlinkNode {
name: "\x00".to_string(),
target: "foo".to_string(),
}],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
assert_eq!(n, "\x00")
}
_ => panic!("unexpected error"),
};
}
{
let d = Directory {
symlinks: vec![SymlinkNode {
name: "foo/bar".to_string(),
target: "foo".to_string(),
}],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
assert_eq!(n, "foo/bar")
}
_ => panic!("unexpected error"),
};
}
}
#[test]
fn test_directory_validate_invalid_digest() {
let d = Directory {
directories: vec![DirectoryNode {
name: "foo".to_string(),
digest: vec![0x00, 0x42], // invalid length
size: 42,
}],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidDigestLen(n) => {
assert_eq!(n, 2)
}
_ => panic!("unexpected error"),
}
}
#[test]
fn test_directory_validate_sorting() {
// "b" comes before "a", bad.
{
let d = Directory {
directories: vec![
DirectoryNode {
name: "b".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
DirectoryNode {
name: "a".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::WrongSorting(s) => {
assert_eq!(s, "a".to_string());
}
_ => panic!("unexpected error"),
}
}
// "a" exists twice, bad.
{
let d = Directory {
directories: vec![
DirectoryNode {
name: "a".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
DirectoryNode {
name: "a".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
],
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::DuplicateName(s) => {
assert_eq!(s, "a".to_string());
}
_ => panic!("unexpected error"),
}
}
// "a" comes before "b", all good.
{
let d = Directory {
directories: vec![
DirectoryNode {
name: "a".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
DirectoryNode {
name: "b".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
],
..Default::default()
};
d.validate().expect("validate shouldn't error");
}
// [b, c] and [a] are both properly sorted.
{
let d = Directory {
directories: vec![
DirectoryNode {
name: "b".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
DirectoryNode {
name: "c".to_string(),
digest: DUMMY_DIGEST.to_vec(),
size: 42,
},
],
symlinks: vec![SymlinkNode {
name: "a".to_string(),
target: "foo".to_string(),
}],
..Default::default()
};
d.validate().expect("validate shouldn't error");
}
}
}