refactor(tvix/castore): have PathComponent-specific errors

Don't use DirectoryError, but PathComponentError.

Also add checks for too long path components.

Change-Id: Ia9deb9dd0351138baadb2e9c9454c3e019d5a45e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12229
Tested-by: BuildkiteCI
Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: edef <edef@edef.eu>
This commit is contained in:
Florian Klink 2024-08-17 22:00:06 +03:00 committed by clbot
parent 0cfe2aaf6a
commit 56fa533e43
6 changed files with 246 additions and 76 deletions

View file

@ -3,7 +3,7 @@ use thiserror::Error;
use tokio::task::JoinError; use tokio::task::JoinError;
use tonic::Status; use tonic::Status;
use crate::path::PathComponent; use crate::path::{PathComponent, PathComponentError};
/// Errors related to communication with the store. /// Errors related to communication with the store.
#[derive(Debug, Error, PartialEq)] #[derive(Debug, Error, PartialEq)]
@ -47,8 +47,8 @@ pub enum DirectoryError {
#[error("Total size exceeds u32::MAX")] #[error("Total size exceeds u32::MAX")]
SizeOverflow, SizeOverflow,
/// Invalid name encountered /// Invalid name encountered
#[error("Invalid name: {}", .0.as_bstr())] #[error("Invalid name: {0}")]
InvalidName(bytes::Bytes), InvalidName(PathComponentError),
/// Elements are not in sorted order. Can only happen on protos /// Elements are not in sorted order. Can only happen on protos
#[error("{:?} is not sorted", .0.as_bstr())] #[error("{:?} is not sorted", .0.as_bstr())]
WrongSorting(bytes::Bytes), WrongSorting(bytes::Bytes),

View file

@ -14,7 +14,7 @@ mod nodes;
pub use nodes::*; pub use nodes::*;
mod path; mod path;
pub use path::{Path, PathBuf, PathComponent}; pub use path::{Path, PathBuf, PathComponent, PathComponentError};
pub mod import; pub mod import;
pub mod proto; pub mod proto;

View file

@ -1,6 +1,3 @@
// TODO: split out this error
use crate::DirectoryError;
use bstr::ByteSlice; use bstr::ByteSlice;
use std::fmt::{self, Debug, Display}; use std::fmt::{self, Debug, Display};
@ -8,12 +5,17 @@ use std::fmt::{self, Debug, Display};
/// Internally uses a [bytes::Bytes], but disallows /// Internally uses a [bytes::Bytes], but disallows
/// slashes, and null bytes to be present, as well as /// slashes, and null bytes to be present, as well as
/// '.', '..' and the empty string. /// '.', '..' and the empty string.
/// It also rejects components that are too long (> 255 bytes).
#[repr(transparent)] #[repr(transparent)]
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct PathComponent { pub struct PathComponent {
pub(super) inner: bytes::Bytes, pub(super) inner: bytes::Bytes,
} }
/// The maximum length an individual path component can have.
/// Linux allows 255 bytes of actual name, so we pick that.
pub const MAX_NAME_LEN: usize = 255;
impl AsRef<[u8]> for PathComponent { impl AsRef<[u8]> for PathComponent {
fn as_ref(&self) -> &[u8] { fn as_ref(&self) -> &[u8] {
self.inner.as_ref() self.inner.as_ref()
@ -26,18 +28,24 @@ impl From<PathComponent> for bytes::Bytes {
} }
} }
pub(super) fn is_valid_name<B: AsRef<[u8]>>(name: B) -> bool { pub(super) fn validate_name<B: AsRef<[u8]>>(name: B) -> Result<(), PathComponentError> {
let v = name.as_ref(); match name.as_ref() {
b"" => Err(PathComponentError::Empty),
!v.is_empty() && v != *b".." && v != *b"." && !v.contains(&0x00) && !v.contains(&b'/') b".." => Err(PathComponentError::Parent),
b"." => Err(PathComponentError::CurDir),
v if v.len() > MAX_NAME_LEN => Err(PathComponentError::TooLong),
v if v.contains(&0x00) => Err(PathComponentError::Null),
v if v.contains(&b'/') => Err(PathComponentError::Slashes),
_ => Ok(()),
}
} }
impl TryFrom<bytes::Bytes> for PathComponent { impl TryFrom<bytes::Bytes> for PathComponent {
type Error = DirectoryError; type Error = PathComponentError;
fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> { fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
if !is_valid_name(&value) { if let Err(e) = validate_name(&value) {
return Err(DirectoryError::InvalidName(value)); return Err(PathComponentError::Convert(value, Box::new(e)));
} }
Ok(Self { inner: value }) Ok(Self { inner: value })
@ -45,14 +53,13 @@ impl TryFrom<bytes::Bytes> for PathComponent {
} }
impl TryFrom<&'static [u8]> for PathComponent { impl TryFrom<&'static [u8]> for PathComponent {
type Error = DirectoryError; type Error = PathComponentError;
fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> { fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> {
if !is_valid_name(value) { if let Err(e) = validate_name(value) {
return Err(DirectoryError::InvalidName(bytes::Bytes::from_static( return Err(PathComponentError::Convert(value.into(), Box::new(e)));
value,
)));
} }
Ok(Self { Ok(Self {
inner: bytes::Bytes::from_static(value), inner: bytes::Bytes::from_static(value),
}) })
@ -60,14 +67,16 @@ impl TryFrom<&'static [u8]> for PathComponent {
} }
impl TryFrom<&str> for PathComponent { impl TryFrom<&str> for PathComponent {
type Error = DirectoryError; type Error = PathComponentError;
fn try_from(value: &str) -> Result<Self, Self::Error> { fn try_from(value: &str) -> Result<Self, Self::Error> {
if !is_valid_name(value) { if let Err(e) = validate_name(value) {
return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( return Err(PathComponentError::Convert(
value.as_bytes(), value.to_owned().into(),
))); Box::new(e),
));
} }
Ok(Self { Ok(Self {
inner: bytes::Bytes::copy_from_slice(value.as_bytes()), inner: bytes::Bytes::copy_from_slice(value.as_bytes()),
}) })
@ -75,16 +84,19 @@ impl TryFrom<&str> for PathComponent {
} }
impl TryFrom<&std::ffi::CStr> for PathComponent { impl TryFrom<&std::ffi::CStr> for PathComponent {
type Error = DirectoryError; type Error = PathComponentError;
fn try_from(value: &std::ffi::CStr) -> Result<Self, Self::Error> { fn try_from(value: &std::ffi::CStr) -> Result<Self, Self::Error> {
if !is_valid_name(value.to_bytes()) { let value = value.to_bytes();
return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( if let Err(e) = validate_name(value) {
value.to_bytes(), return Err(PathComponentError::Convert(
))); value.to_owned().into(),
Box::new(e),
));
} }
Ok(Self { Ok(Self {
inner: bytes::Bytes::copy_from_slice(value.to_bytes()), inner: bytes::Bytes::copy_from_slice(value),
}) })
} }
} }
@ -100,3 +112,157 @@ impl Display for PathComponent {
Display::fmt(self.inner.as_bstr(), f) Display::fmt(self.inner.as_bstr(), f)
} }
} }
/// Errors created when parsing / validating [PathComponent].
#[derive(Debug, PartialEq, thiserror::Error)]
#[cfg_attr(test, derive(Clone))]
pub enum PathComponentError {
#[error("cannot be empty")]
Empty,
#[error("cannot contain null bytes")]
Null,
#[error("cannot be '.'")]
CurDir,
#[error("cannot be '..'")]
Parent,
#[error("cannot contain slashes")]
Slashes,
#[error("cannot be over {} bytes long", MAX_NAME_LEN)]
TooLong,
#[error("unable to convert '{:?}'", .0.as_bstr())]
Convert(bytes::Bytes, #[source] Box<Self>),
}
#[cfg(test)]
mod tests {
use std::ffi::CString;
use bytes::Bytes;
use rstest::rstest;
use super::{validate_name, PathComponent, PathComponentError};
#[rstest]
#[case::empty(b"", PathComponentError::Empty)]
#[case::null(b"foo\0", PathComponentError::Null)]
#[case::curdir(b".", PathComponentError::CurDir)]
#[case::parent(b"..", PathComponentError::Parent)]
#[case::slashes1(b"a/b", PathComponentError::Slashes)]
#[case::slashes2(b"/", PathComponentError::Slashes)]
fn errors(#[case] v: &'static [u8], #[case] err: PathComponentError) {
{
assert_eq!(
Err(err.clone()),
validate_name(v),
"validate_name must fail as expected"
);
}
let exp_err_v = Bytes::from_static(v);
// Bytes
{
let v = Bytes::from_static(v);
assert_eq!(
Err(PathComponentError::Convert(
exp_err_v.clone(),
Box::new(err.clone())
)),
PathComponent::try_from(v),
"conversion must fail as expected"
);
}
// &[u8]
{
assert_eq!(
Err(PathComponentError::Convert(
exp_err_v.clone(),
Box::new(err.clone())
)),
PathComponent::try_from(v),
"conversion must fail as expected"
);
}
// &str, if it is valid UTF-8
{
if let Ok(v) = std::str::from_utf8(v) {
assert_eq!(
Err(PathComponentError::Convert(
exp_err_v.clone(),
Box::new(err.clone())
)),
PathComponent::try_from(v),
"conversion must fail as expected"
);
}
}
// &CStr, if it can be constructed (fails if the payload contains null bytes)
{
if let Ok(v) = CString::new(v) {
let v = v.as_ref();
assert_eq!(
Err(PathComponentError::Convert(
exp_err_v.clone(),
Box::new(err.clone())
)),
PathComponent::try_from(v),
"conversion must fail as expected"
);
}
}
}
#[test]
fn error_toolong() {
assert_eq!(
Err(PathComponentError::TooLong),
validate_name("X".repeat(500).into_bytes().as_slice())
)
}
#[test]
fn success() {
let exp = PathComponent { inner: "aa".into() };
// Bytes
{
let v: Bytes = "aa".into();
assert_eq!(
Ok(exp.clone()),
PathComponent::try_from(v),
"conversion must succeed"
);
}
// &[u8]
{
let v: &[u8] = b"aa";
assert_eq!(
Ok(exp.clone()),
PathComponent::try_from(v),
"conversion must succeed"
);
}
// &str
{
let v: &str = "aa";
assert_eq!(
Ok(exp.clone()),
PathComponent::try_from(v),
"conversion must succeed"
);
}
// &CStr
{
let v = CString::new("aa").expect("CString must construct");
let v = v.as_c_str();
assert_eq!(
Ok(exp.clone()),
PathComponent::try_from(v),
"conversion must succeed"
);
}
}
}

View file

@ -9,7 +9,7 @@ use std::{
}; };
mod component; mod component;
pub use component::PathComponent; pub use component::{PathComponent, PathComponentError};
/// Represents a Path in the castore model. /// Represents a Path in the castore model.
/// These are always relative, and platform-independent, which distinguishes /// These are always relative, and platform-independent, which distinguishes
@ -37,7 +37,7 @@ impl Path {
if !bytes.is_empty() { if !bytes.is_empty() {
// Ensure all components are valid castore node names. // Ensure all components are valid castore node names.
for component in bytes.split_str(b"/") { for component in bytes.split_str(b"/") {
if !component::is_valid_name(component) { if component::validate_name(component).is_err() {
return None; return None;
} }
} }
@ -233,7 +233,7 @@ impl PathBuf {
/// Adjoins `name` to self. /// Adjoins `name` to self.
pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
if !component::is_valid_name(name) { if component::validate_name(name).is_err() {
return Err(std::io::ErrorKind::InvalidData.into()); return Err(std::io::ErrorKind::InvalidData.into());
} }

View file

@ -1,6 +1,5 @@
use std::{cmp::Ordering, str};
use prost::Message; use prost::Message;
use std::cmp::Ordering;
mod grpc_blobservice_wrapper; mod grpc_blobservice_wrapper;
mod grpc_directoryservice_wrapper; mod grpc_directoryservice_wrapper;
@ -80,31 +79,42 @@ impl TryFrom<Directory> for crate::Directory {
.try_fold(&b""[..], |prev_name, e| { .try_fold(&b""[..], |prev_name, e| {
match e.name.as_ref().cmp(prev_name) { match e.name.as_ref().cmp(prev_name) {
Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
Ordering::Equal => { Ordering::Equal => Err(DirectoryError::DuplicateName(
Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) e.name
} .to_owned()
.try_into()
.map_err(DirectoryError::InvalidName)?,
)),
Ordering::Greater => Ok(e.name.as_ref()), Ordering::Greater => Ok(e.name.as_ref()),
} }
})?; })?;
value.files.iter().try_fold(&b""[..], |prev_name, e| { value.files.iter().try_fold(&b""[..], |prev_name, e| {
match e.name.as_ref().cmp(prev_name) { match e.name.as_ref().cmp(prev_name) {
Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
Ordering::Equal => { Ordering::Equal => Err(DirectoryError::DuplicateName(
Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) e.name
} .to_owned()
.try_into()
.map_err(DirectoryError::InvalidName)?,
)),
Ordering::Greater => Ok(e.name.as_ref()), Ordering::Greater => Ok(e.name.as_ref()),
} }
})?; })?;
value.symlinks.iter().try_fold(&b""[..], |prev_name, e| { value.symlinks.iter().try_fold(&b""[..], |prev_name, e| {
match e.name.as_ref().cmp(prev_name) { match e.name.as_ref().cmp(prev_name) {
Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
Ordering::Equal => { Ordering::Equal => Err(DirectoryError::DuplicateName(
Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) e.name
} .to_owned()
.try_into()
.map_err(DirectoryError::InvalidName)?,
)),
Ordering::Greater => Ok(e.name.as_ref()), Ordering::Greater => Ok(e.name.as_ref()),
} }
})?; })?;
// FUTUREWORK: use is_sorted() once stable, and/or implement the producer for
// [crate::Directory::try_from_iter] iterating over all three and doing all checks inline.
let mut elems: Vec<(PathComponent, crate::Node)> = let mut elems: Vec<(PathComponent, crate::Node)> =
Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len()); Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len());
@ -184,7 +194,7 @@ impl Node {
pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> { pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> {
match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? {
node::Node::Directory(n) => { node::Node::Directory(n) => {
let name: PathComponent = n.name.try_into()?; let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?;
let digest = B3Digest::try_from(n.digest) let digest = B3Digest::try_from(n.digest)
.map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?;
@ -196,7 +206,7 @@ impl Node {
Ok((name, node)) Ok((name, node))
} }
node::Node::File(n) => { node::Node::File(n) => {
let name: PathComponent = n.name.try_into()?; let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?;
let digest = B3Digest::try_from(n.digest) let digest = B3Digest::try_from(n.digest)
.map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?;
@ -210,7 +220,8 @@ impl Node {
} }
node::Node::Symlink(n) => { node::Node::Symlink(n) => {
let name: PathComponent = n.name.try_into()?; let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?;
let node = crate::Node::Symlink { let node = crate::Node::Symlink {
target: n target: n
.target .target

View file

@ -161,12 +161,9 @@ fn validate_invalid_names() {
}], }],
..Default::default() ..Default::default()
}; };
match crate::Directory::try_from(d).expect_err("must fail") {
DirectoryError::InvalidName(n) => { let e = crate::Directory::try_from(d).expect_err("must fail");
assert_eq!(n.as_ref(), b"\0") assert!(matches!(e, DirectoryError::InvalidName(_)));
}
_ => panic!("unexpected error"),
};
} }
{ {
@ -178,12 +175,8 @@ fn validate_invalid_names() {
}], }],
..Default::default() ..Default::default()
}; };
match crate::Directory::try_from(d).expect_err("must fail") { let e = crate::Directory::try_from(d).expect_err("must fail");
DirectoryError::InvalidName(n) => { assert!(matches!(e, DirectoryError::InvalidName(_)));
assert_eq!(n.as_ref(), b".")
}
_ => panic!("unexpected error"),
};
} }
{ {
@ -196,12 +189,8 @@ fn validate_invalid_names() {
}], }],
..Default::default() ..Default::default()
}; };
match crate::Directory::try_from(d).expect_err("must fail") { let e = crate::Directory::try_from(d).expect_err("must fail");
DirectoryError::InvalidName(n) => { assert!(matches!(e, DirectoryError::InvalidName(_)));
assert_eq!(n.as_ref(), b"..")
}
_ => panic!("unexpected error"),
};
} }
{ {
@ -212,12 +201,8 @@ fn validate_invalid_names() {
}], }],
..Default::default() ..Default::default()
}; };
match crate::Directory::try_from(d).expect_err("must fail") { let e = crate::Directory::try_from(d).expect_err("must fail");
DirectoryError::InvalidName(n) => { assert!(matches!(e, DirectoryError::InvalidName(_)));
assert_eq!(n.as_ref(), b"\x00")
}
_ => panic!("unexpected error"),
};
} }
{ {
@ -228,12 +213,20 @@ fn validate_invalid_names() {
}], }],
..Default::default() ..Default::default()
}; };
match crate::Directory::try_from(d).expect_err("must fail") { let e = crate::Directory::try_from(d).expect_err("must fail");
DirectoryError::InvalidName(n) => { assert!(matches!(e, DirectoryError::InvalidName(_)));
assert_eq!(n.as_ref(), b"foo/bar")
} }
_ => panic!("unexpected error"),
{
let d = Directory {
symlinks: vec![SymlinkNode {
name: bytes::Bytes::copy_from_slice("X".repeat(500).into_bytes().as_slice()),
target: "foo".into(),
}],
..Default::default()
}; };
let e = crate::Directory::try_from(d).expect_err("must fail");
assert!(matches!(e, DirectoryError::InvalidName(_)));
} }
} }