refactor(tvix/castore): factor out node checks

Implement `validate()` on `node::Node`, and call it from PathInfo's
validate() too. Node-related errors are moved to a ValidateNodeError
error type.

This additionally adds some more validations for symlink targets (they
must not be empty, and not contain null bytes).

Change-Id: Ib9b89f1c9c795e868a1533281239bc8a36d97c5d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9715
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: edef <edef@edef.eu>
This commit is contained in:
Florian Klink 2023-10-12 22:26:53 +02:00 committed by flokli
parent c9e90b4dd7
commit 9b7629826f
4 changed files with 109 additions and 68 deletions

View file

@ -2,7 +2,6 @@
// https://github.com/hyperium/tonic/issues/1056
use bstr::ByteSlice;
use std::{collections::HashSet, iter::Peekable};
use thiserror::Error;
use prost::Message;
@ -26,7 +25,7 @@ pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix
mod tests;
/// Errors that can occur during the validation of Directory messages.
#[derive(Debug, PartialEq, Eq, Error)]
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
pub enum ValidateDirectoryError {
/// Elements are not in sorted order
#[error("{:?} is not sorted", .0.as_bstr())]
@ -34,28 +33,37 @@ pub enum ValidateDirectoryError {
/// Multiple elements with the same name encountered
#[error("{:?} is a duplicate name", .0.as_bstr())]
DuplicateName(Vec<u8>),
/// Invalid name encountered
#[error("Invalid name in {:?}", .0.as_bstr())]
InvalidName(Vec<u8>),
/// Invalid digest length encountered
#[error("Invalid Digest length: {0}")]
InvalidDigestLen(usize),
/// Invalid node
#[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())]
InvalidNode(Vec<u8>, ValidateNodeError),
#[error("Total size exceeds u32::MAX")]
SizeOverflow,
}
/// Checks a Node name for validity as an intermediate node, and returns an
/// error that's generated from the supplied constructor.
///
/// Errors that occur during Node validation
#[derive(Debug, PartialEq, Eq, thiserror::Error)]
pub enum ValidateNodeError {
/// Invalid digest length encountered
#[error("Invalid Digest length: {0}")]
InvalidDigestLen(usize),
/// Invalid name encountered
#[error("Invalid name")]
InvalidName(),
/// Invalid symlink target
#[error("Invalid symlink target: {}", .0.as_bstr())]
InvalidSymlinkTarget(Vec<u8>),
}
/// Checks a Node name for validity as an intermediate node.
/// We disallow slashes, null bytes, '.', '..' and the empty string.
fn validate_node_name<E>(name: &[u8], err: fn(Vec<u8>) -> E) -> Result<(), E> {
fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
if name.is_empty()
|| name == b".."
|| name == b"."
|| name.contains(&0x00)
|| name.contains(&b'/')
{
return Err(err(name.to_vec()));
Err(ValidateNodeError::InvalidName())?;
}
Ok(())
}
@ -103,6 +111,39 @@ impl node::Node {
node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }),
}
}
pub fn validate(&self) -> Result<(), ValidateNodeError> {
match self {
// for a directory root node, ensure the digest has the appropriate size.
node::Node::Directory(directory_node) => {
if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
Err(ValidateNodeError::InvalidDigestLen(
directory_node.digest.len(),
))?;
};
validate_node_name(&directory_node.name)?;
}
// for a file root node, ensure the digest has the appropriate size.
node::Node::File(file_node) => {
if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
Err(ValidateNodeError::InvalidDigestLen(file_node.digest.len()))?;
};
validate_node_name(&file_node.name)?;
}
// ensure the symlink target is not empty and doesn't contain null bytes.
node::Node::Symlink(symlink_node) => {
if symlink_node.target.len() == 0 {
Err(ValidateNodeError::InvalidSymlinkTarget(vec![]))?;
}
if symlink_node.target.contains(&b'\0') {
Err(ValidateNodeError::InvalidSymlinkTarget(
symlink_node.target.to_vec(),
))?;
}
validate_node_name(&symlink_node.name)?;
}
}
Ok(())
}
}
/// Accepts a name, and a mutable reference to the previous name.
@ -183,13 +224,11 @@ impl Directory {
// check directories
for directory_node in &self.directories {
validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?;
// ensure the digest has the appropriate size.
if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
return Err(ValidateDirectoryError::InvalidDigestLen(
directory_node.digest.len(),
));
}
node::Node::Directory(directory_node.clone())
.validate()
.map_err(|e| {
ValidateDirectoryError::InvalidNode(directory_node.name.to_vec(), e)
})?;
update_if_lt_prev(&mut last_directory_name, &directory_node.name)?;
insert_once(&mut seen_names, &directory_node.name)?;
@ -197,12 +236,9 @@ impl Directory {
// check files
for file_node in &self.files {
validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?;
if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
return Err(ValidateDirectoryError::InvalidDigestLen(
file_node.digest.len(),
));
}
node::Node::File(file_node.clone())
.validate()
.map_err(|e| ValidateDirectoryError::InvalidNode(file_node.name.to_vec(), e))?;
update_if_lt_prev(&mut last_file_name, &file_node.name)?;
insert_once(&mut seen_names, &file_node.name)?;
@ -210,7 +246,9 @@ impl Directory {
// check symlinks
for symlink_node in &self.symlinks {
validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?;
node::Node::Symlink(symlink_node.clone())
.validate()
.map_err(|e| ValidateDirectoryError::InvalidNode(symlink_node.name.to_vec(), e))?;
update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?;
insert_once(&mut seen_names, &symlink_node.name)?;

View file

@ -1,4 +1,6 @@
use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
use crate::proto::{
Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError, ValidateNodeError,
};
use lazy_static::lazy_static;
lazy_static! {
@ -171,7 +173,7 @@ fn validate_invalid_names() {
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
assert_eq!(n, b"")
}
_ => panic!("unexpected error"),
@ -188,7 +190,7 @@ fn validate_invalid_names() {
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
assert_eq!(n, b".")
}
_ => panic!("unexpected error"),
@ -206,7 +208,7 @@ fn validate_invalid_names() {
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
assert_eq!(n, b"..")
}
_ => panic!("unexpected error"),
@ -222,7 +224,7 @@ fn validate_invalid_names() {
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
assert_eq!(n, b"\x00")
}
_ => panic!("unexpected error"),
@ -238,7 +240,7 @@ fn validate_invalid_names() {
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidName(n) => {
ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
assert_eq!(n, b"foo/bar")
}
_ => panic!("unexpected error"),
@ -257,7 +259,7 @@ fn validate_invalid_digest() {
..Default::default()
};
match d.validate().expect_err("must fail") {
ValidateDirectoryError::InvalidDigestLen(n) => {
ValidateDirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => {
assert_eq!(n, 2)
}
_ => panic!("unexpected error"),

View file

@ -3,10 +3,7 @@ use data_encoding::BASE64;
// https://github.com/hyperium/tonic/issues/1056
use nix_compat::store_path::{self, StorePath};
use thiserror::Error;
use tvix_castore::{
proto::{self as castorepb, NamedNode},
B3Digest, B3_LEN,
};
use tvix_castore::proto::{self as castorepb, NamedNode, ValidateNodeError};
mod grpc_pathinfoservice_wrapper;
@ -34,14 +31,14 @@ pub enum ValidatePathInfoError {
#[error("No node present")]
NoNodePresent(),
/// Invalid node name encountered.
/// Node fails validation
#[error("Invalid root node: {:?}", .0.to_string())]
InvalidRootNode(ValidateNodeError),
/// Invalid node name encountered. Root nodes in PathInfos have more strict name requirements
#[error("Failed to parse {0:?} as StorePath: {1}")]
InvalidNodeName(Vec<u8>, store_path::Error),
/// The digest the (root) node refers to has invalid length.
#[error("Invalid Digest length: expected {}, got {}", B3_LEN, .0)]
InvalidNodeDigestLen(usize),
/// The digest in narinfo.nar_sha256 has an invalid len.
#[error("Invalid narinfo.nar_sha256 length: expected {}, got {}", 32, .0)]
InvalidNarSha256DigestLen(usize),
@ -146,27 +143,8 @@ impl PathInfo {
Err(ValidatePathInfoError::NoNodePresent())?
}
Some(castorepb::Node { node: Some(node) }) => {
match node {
// for a directory root node, ensure the digest has the appropriate size.
castorepb::node::Node::Directory(directory_node) => {
if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
return Err(ValidatePathInfoError::InvalidNodeDigestLen(
directory_node.digest.len(),
));
}
}
// for a file root node, ensure the digest has the appropriate size.
castorepb::node::Node::File(file_node) => {
// ensure the digest has the appropriate size.
if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
return Err(ValidatePathInfoError::InvalidNodeDigestLen(
file_node.digest.len(),
));
}
}
// nothing to do specifically for symlinks
castorepb::node::Node::Symlink(_) => {}
}
node.validate()
.map_err(|e| ValidatePathInfoError::InvalidRootNode(e))?;
// parse the name of the node itself and return
parse_node_name_root(node.get_name(), ValidatePathInfoError::InvalidNodeName)?
}

View file

@ -43,7 +43,7 @@ fn validate_no_node(
digest: Bytes::new(),
size: 0,
},
Err(ValidatePathInfoError::InvalidNodeDigestLen(0));
Err(ValidatePathInfoError::InvalidRootNode(castorepb::ValidateNodeError::InvalidDigestLen(0)));
"invalid digest length"
)]
#[test_case(
@ -88,7 +88,7 @@ fn validate_directory(
digest: Bytes::new(),
..Default::default()
},
Err(ValidatePathInfoError::InvalidNodeDigestLen(0));
Err(ValidatePathInfoError::InvalidRootNode(castorepb::ValidateNodeError::InvalidDigestLen(0)));
"invalid digest length"
)]
#[test_case(
@ -120,7 +120,7 @@ fn validate_file(
#[test_case(
castorepb::SymlinkNode {
name: DUMMY_NAME.into(),
..Default::default()
target: "foo".into(),
},
Ok(StorePath::from_str(DUMMY_NAME).expect("must succeed"));
"ok"
@ -128,7 +128,7 @@ fn validate_file(
#[test_case(
castorepb::SymlinkNode {
name: "invalid".into(),
..Default::default()
target: "foo".into(),
},
Err(ValidatePathInfoError::InvalidNodeName(
"invalid".into(),
@ -239,3 +239,26 @@ fn validate_inconsistent_narinfo_reference_name_digest() {
e => panic!("unexpected error: {:?}", e),
}
}
/// Create a node with an empty symlink target, and ensure it fails validation.
#[test]
fn validate_symlink_empty_target_invalid() {
let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode {
name: "foo".into(),
target: "".into(),
});
node.validate().expect_err("must fail validation");
}
/// Create a node with a symlink target including null bytes, and ensure it
/// fails validation.
#[test]
fn validate_symlink_target_null_byte_invalid() {
let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode {
name: "foo".into(),
target: "foo\0".into(),
});
node.validate().expect_err("must fail validation");
}