refactor(tvix/castore): add PathComponent type for checked components

This encodes a verified component on the type level. Internally, it
contains a bytes::Bytes.

The castore Path/PathBuf component() and file_name() methods now
return this type, the old ones returning bytes were renamed to
component_bytes() and component_file_name() respectively.

We can drop the directory_reject_invalid_name test - it's not possible
anymore to pass an invalid name to Directories::add.
Invalid names in the Directory proto are still being tested to be
rejected in the validate_invalid_names tests.

Change-Id: Ide4d16415dfd50b7e2d7e0c36d42a3bbeeb9b6c5
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12217
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-08-16 17:32:20 +03:00 committed by clbot
parent 8ea7d2b60e
commit 5ec93b57e6
25 changed files with 282 additions and 165 deletions

View file

@ -125,7 +125,7 @@ impl BuildRequest {
pub fn validate(&self) -> Result<(), ValidateBuildRequestError> {
// validate names. Make sure they're sorted
let mut last_name = bytes::Bytes::new();
let mut last_name: bytes::Bytes = "".into();
for (i, node) in self.inputs.iter().enumerate() {
// TODO(flokli): store result somewhere
let (name, _node) = node
@ -133,10 +133,10 @@ impl BuildRequest {
.into_name_and_node()
.map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?;
if name <= last_name {
if name.as_ref() <= last_name.as_ref() {
return Err(ValidateBuildRequestError::InputNodesNotSorted);
} else {
last_name = name
last_name = name.into()
}
}

View file

@ -1,7 +1,5 @@
use std::collections::HashMap;
use bstr::ByteSlice;
use petgraph::{
graph::{DiGraph, NodeIndex},
visit::{Bfs, DfsPostOrder, EdgeRef, IntoNodeIdentifiers, Walker},
@ -10,7 +8,7 @@ use petgraph::{
use tracing::instrument;
use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator};
use crate::{B3Digest, Directory, Node};
use crate::{path::PathComponent, B3Digest, Directory, Node};
#[derive(thiserror::Error, Debug)]
pub enum Error {
@ -71,12 +69,12 @@ pub struct ValidatedDirectoryGraph {
root: Option<NodeIndex>,
}
fn check_edge(dir: &Edge, dir_name: &[u8], child: &Directory) -> Result<(), Error> {
fn check_edge(dir: &Edge, dir_name: &PathComponent, child: &Directory) -> Result<(), Error> {
// Ensure the size specified in the child node matches our records.
if dir.1 != child.size() {
return Err(Error::ValidationError(format!(
"'{}' has wrong size, specified {}, recorded {}",
dir_name.as_bstr(),
dir_name,
dir.1,
child.size(),
)));
@ -179,7 +177,7 @@ impl<O: OrderValidator> DirectoryGraph<O> {
.expect("edge is already validated");
// TODO: where's the name here?
check_edge(&edge_weight, b"??", &directory)?;
check_edge(&edge_weight, &"??".try_into().unwrap(), &directory)?;
}
// finally, store the directory information in the node weight
@ -284,7 +282,7 @@ mod tests {
pub static ref BROKEN_PARENT_DIRECTORY: Directory = {
let mut dir = Directory::new();
dir.add(
"foo".into(),
"foo".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size() + 42, // wrong!

View file

@ -219,7 +219,7 @@ async fn upload_reject_wrong_size(directory_service: impl DirectoryService) {
let wrong_parent_directory = {
let mut dir = Directory::new();
dir.add(
"foo".into(),
"foo".try_into().unwrap(),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size() + 42, // wrong!

View file

@ -13,7 +13,7 @@ where
DS: AsRef<dyn DirectoryService>,
{
let mut parent_node = root_node;
for component in path.as_ref().components() {
for component in path.as_ref().components_bytes() {
match parent_node {
Node::File { .. } | Node::Symlink { .. } => {
// There's still some path left, but the parent node is no directory.

View file

@ -3,6 +3,8 @@ use thiserror::Error;
use tokio::task::JoinError;
use tonic::Status;
use crate::path::PathComponent;
/// Errors related to communication with the store.
#[derive(Debug, Error, PartialEq)]
pub enum Error {
@ -37,11 +39,11 @@ impl From<crate::digests::Error> for ValidateNodeError {
#[derive(Debug, thiserror::Error, PartialEq)]
pub enum DirectoryError {
/// Multiple elements with the same name encountered
#[error("{:?} is a duplicate name", .0.as_bstr())]
DuplicateName(Vec<u8>),
#[error("{:?} is a duplicate name", .0)]
DuplicateName(PathComponent),
/// Node failed validation
#[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())]
InvalidNode(bytes::Bytes, ValidateNodeError),
#[error("invalid node with name {}: {:?}", .0, .1.to_string())]
InvalidNode(PathComponent, ValidateNodeError),
#[error("Total size exceeds u32::MAX")]
SizeOverflow,
/// Invalid name encountered

View file

@ -34,7 +34,7 @@ lazy_static! {
pub static ref DIRECTORY_WITH_KEEP: Directory = {
let mut dir = Directory::new();
dir.add(
".keep".into(),
".keep".try_into().unwrap(),
Node::File{
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
@ -46,20 +46,20 @@ lazy_static! {
pub static ref DIRECTORY_COMPLICATED: Directory = {
let mut dir = Directory::new();
dir.add(
"keep".into(),
"keep".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_WITH_KEEP.digest(),
size: DIRECTORY_WITH_KEEP.size()
}).unwrap();
dir.add(
".keep".into(),
".keep".try_into().unwrap(),
Node::File{
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false
}).unwrap();
dir.add(
"aa".into(),
"aa".try_into().unwrap(),
Node::Symlink{
target: "/nix/store/somewhereelse".try_into().unwrap()
}).unwrap();
@ -70,7 +70,7 @@ lazy_static! {
pub static ref DIRECTORY_B: Directory = {
let mut dir = Directory::new();
dir.add(
"a".into(),
"a".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
@ -81,13 +81,13 @@ lazy_static! {
pub static ref DIRECTORY_C: Directory = {
let mut dir = Directory::new();
dir.add(
"a".into(),
"a".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
}).unwrap();
dir.add(
"a'".into(),
"a'".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
@ -98,13 +98,13 @@ lazy_static! {
pub static ref DIRECTORY_D: Directory = {
let mut dir = Directory::new();
dir.add(
"a".into(),
"a".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
}).unwrap();
dir.add(
"b".into(),
"b".try_into().unwrap(),
Node::Directory{
digest: DIRECTORY_B.digest(),
size: DIRECTORY_B.size(),

View file

@ -1,5 +1,4 @@
use bstr::ByteSlice;
use bytes::Bytes;
use std::{
collections::BTreeMap,
ffi::{OsStr, OsString},
@ -12,12 +11,15 @@ use tempfile::TempDir;
use tokio_stream::{wrappers::ReadDirStream, StreamExt};
use super::FuseDaemon;
use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST};
use crate::{
blobservice::{BlobService, MemoryBlobService},
directoryservice::{DirectoryService, MemoryDirectoryService},
fixtures, Node,
};
use crate::{
fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST},
PathComponent,
};
const BLOB_A_NAME: &str = "00000000000000000000000000000000-test";
const BLOB_B_NAME: &str = "55555555555555555555555555555555-test";
@ -37,7 +39,7 @@ fn gen_svcs() -> (Arc<dyn BlobService>, Arc<dyn DirectoryService>) {
fn do_mount<P: AsRef<Path>, BS, DS>(
blob_service: BS,
directory_service: DS,
root_nodes: BTreeMap<bytes::Bytes, Node>,
root_nodes: BTreeMap<PathComponent, Node>,
mountpoint: P,
list_root: bool,
show_xattr: bool,
@ -58,7 +60,7 @@ where
async fn populate_blob_a(
blob_service: &Arc<dyn BlobService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
let mut bw = blob_service.open_write().await;
tokio::io::copy(&mut Cursor::new(fixtures::BLOB_A.to_vec()), &mut bw)
@ -67,7 +69,7 @@ async fn populate_blob_a(
bw.close().await.expect("must succeed closing");
root_nodes.insert(
BLOB_A_NAME.into(),
BLOB_A_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::BLOB_A_DIGEST.clone(),
size: fixtures::BLOB_A.len() as u64,
@ -78,7 +80,7 @@ async fn populate_blob_a(
async fn populate_blob_b(
blob_service: &Arc<dyn BlobService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
let mut bw = blob_service.open_write().await;
tokio::io::copy(&mut Cursor::new(fixtures::BLOB_B.to_vec()), &mut bw)
@ -87,7 +89,7 @@ async fn populate_blob_b(
bw.close().await.expect("must succeed closing");
root_nodes.insert(
BLOB_B_NAME.into(),
BLOB_B_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::BLOB_B_DIGEST.clone(),
size: fixtures::BLOB_B.len() as u64,
@ -99,7 +101,7 @@ async fn populate_blob_b(
/// adds a blob containing helloworld and marks it as executable
async fn populate_blob_helloworld(
blob_service: &Arc<dyn BlobService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
let mut bw = blob_service.open_write().await;
tokio::io::copy(
@ -111,7 +113,7 @@ async fn populate_blob_helloworld(
bw.close().await.expect("must succeed closing");
root_nodes.insert(
HELLOWORLD_BLOB_NAME.into(),
HELLOWORLD_BLOB_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone(),
size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
@ -120,9 +122,9 @@ async fn populate_blob_helloworld(
);
}
async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_symlink(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
SYMLINK_NAME.into(),
SYMLINK_NAME.try_into().unwrap(),
Node::Symlink {
target: BLOB_A_NAME.try_into().unwrap(),
},
@ -131,9 +133,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
/// This writes a symlink pointing to /nix/store/somewhereelse,
/// which is the same symlink target as "aa" inside DIRECTORY_COMPLICATED.
async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_symlink2(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
SYMLINK_NAME2.into(),
SYMLINK_NAME2.try_into().unwrap(),
Node::Symlink {
target: "/nix/store/somewhereelse".try_into().unwrap(),
},
@ -143,7 +145,7 @@ async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_directory_with_keep(
blob_service: &Arc<dyn BlobService>,
directory_service: &Arc<dyn DirectoryService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
// upload empty blob
let mut bw = blob_service.open_write().await;
@ -159,7 +161,7 @@ async fn populate_directory_with_keep(
.expect("must succeed uploading");
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(),
Node::Directory {
digest: fixtures::DIRECTORY_WITH_KEEP.digest(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
@ -169,9 +171,9 @@ async fn populate_directory_with_keep(
/// Create a root node for DIRECTORY_WITH_KEEP, but don't upload the Directory
/// itself.
async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(),
Node::Directory {
digest: fixtures::DIRECTORY_WITH_KEEP.digest(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
@ -180,9 +182,9 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte
}
/// Insert BLOB_A, but don't provide the blob .keep is pointing to.
async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<PathComponent, Node>) {
root_nodes.insert(
BLOB_A_NAME.into(),
BLOB_A_NAME.try_into().unwrap(),
Node::File {
digest: fixtures::BLOB_A_DIGEST.clone(),
size: fixtures::BLOB_A.len() as u64,
@ -194,7 +196,7 @@ async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>)
async fn populate_directory_complicated(
blob_service: &Arc<dyn BlobService>,
directory_service: &Arc<dyn DirectoryService>,
root_nodes: &mut BTreeMap<Bytes, Node>,
root_nodes: &mut BTreeMap<PathComponent, Node>,
) {
// upload empty blob
let mut bw = blob_service.open_write().await;
@ -216,7 +218,7 @@ async fn populate_directory_complicated(
.expect("must succeed uploading");
root_nodes.insert(
DIRECTORY_COMPLICATED_NAME.into(),
DIRECTORY_COMPLICATED_NAME.try_into().unwrap(),
Node::Directory {
digest: fixtures::DIRECTORY_COMPLICATED.digest(),
size: fixtures::DIRECTORY_COMPLICATED.size(),

View file

@ -2,7 +2,7 @@
//! about inodes, which present tvix-castore nodes in a filesystem.
use std::time::Duration;
use crate::{B3Digest, Node};
use crate::{path::PathComponent, B3Digest, Node};
#[derive(Clone, Debug)]
pub enum InodeData {
@ -18,7 +18,7 @@ pub enum InodeData {
#[derive(Clone, Debug)]
pub enum DirectoryInodeData {
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, bytes::Bytes, Node)>), // [(child_inode, name, node)]
Populated(B3Digest, Vec<(u64, PathComponent, Node)>), // [(child_inode, name, node)]
}
impl InodeData {

View file

@ -18,10 +18,10 @@ use self::{
use crate::{
blobservice::{BlobReader, BlobService},
directoryservice::DirectoryService,
{B3Digest, Node},
path::PathComponent,
B3Digest, Node,
};
use bstr::ByteVec;
use bytes::Bytes;
use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions};
use fuse_backend_rs::api::filesystem::{
Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID,
@ -87,7 +87,7 @@ pub struct TvixStoreFs<BS, DS, RN> {
show_xattr: bool,
/// This maps a given basename in the root to the inode we allocated for the node.
root_nodes: RwLock<HashMap<Bytes, u64>>,
root_nodes: RwLock<HashMap<PathComponent, u64>>,
/// This keeps track of inodes and data alongside them.
inode_tracker: RwLock<InodeTracker>,
@ -103,7 +103,7 @@ pub struct TvixStoreFs<BS, DS, RN> {
u64,
(
Span,
Arc<Mutex<mpsc::Receiver<(usize, Result<(Bytes, Node), crate::Error>)>>>,
Arc<Mutex<mpsc::Receiver<(usize, Result<(PathComponent, Node), crate::Error>)>>>,
),
>,
>,
@ -154,7 +154,7 @@ where
/// Retrieves the inode for a given root node basename, if present.
/// This obtains a read lock on self.root_nodes.
fn get_inode_for_root_name(&self, name: &[u8]) -> Option<u64> {
fn get_inode_for_root_name(&self, name: &PathComponent) -> Option<u64> {
self.root_nodes.read().get(name).cloned()
}
@ -170,7 +170,7 @@ where
fn get_directory_children(
&self,
ino: u64,
) -> io::Result<(B3Digest, Vec<(u64, bytes::Bytes, Node)>)> {
) -> io::Result<(B3Digest, Vec<(u64, PathComponent, Node)>)> {
let data = self.inode_tracker.read().get(ino).unwrap();
match *data {
// if it's populated already, return children.
@ -200,7 +200,7 @@ where
let children = {
let mut inode_tracker = self.inode_tracker.write();
let children: Vec<(u64, Bytes, Node)> = directory
let children: Vec<(u64, PathComponent, Node)> = directory
.nodes()
.map(|(child_name, child_node)| {
let inode_data = InodeData::from_node(child_node);
@ -240,12 +240,12 @@ where
/// In the case the name can't be found, a libc::ENOENT is returned.
fn name_in_root_to_ino_and_data(
&self,
name: &std::ffi::CStr,
name: &PathComponent,
) -> io::Result<(u64, Arc<InodeData>)> {
// Look up the inode for that root node.
// If there's one, [self.inode_tracker] MUST also contain the data,
// which we can then return.
if let Some(inode) = self.get_inode_for_root_name(name.to_bytes()) {
if let Some(inode) = self.get_inode_for_root_name(name) {
return Ok((
inode,
self.inode_tracker
@ -259,7 +259,8 @@ where
// We don't have it yet, look it up in [self.root_nodes].
match self.tokio_handle.block_on({
let root_nodes_provider = self.root_nodes_provider.clone();
async move { root_nodes_provider.get_by_basename(name.to_bytes()).await }
let name = name.clone();
async move { root_nodes_provider.get_by_basename(&name).await }
}) {
// if there was an error looking up the root node, propagate up an IO error.
Err(_e) => Err(io::Error::from_raw_os_error(libc::EIO)),
@ -269,7 +270,7 @@ where
Ok(Some(root_node)) => {
// Let's check if someone else beat us to updating the inode tracker and
// root_nodes map. This avoids locking inode_tracker for writing.
if let Some(ino) = self.root_nodes.read().get(name.to_bytes()) {
if let Some(ino) = self.root_nodes.read().get(name) {
return Ok((
*ino,
self.inode_tracker.read().get(*ino).expect("must exist"),
@ -285,7 +286,7 @@ where
// self.root_nodes.
let inode_data = InodeData::from_node(&root_node);
let ino = inode_tracker.put(inode_data.clone());
root_nodes.insert(bytes::Bytes::copy_from_slice(name.to_bytes()), ino);
root_nodes.insert(name.to_owned(), ino);
Ok((ino, Arc::new(inode_data)))
}
@ -341,13 +342,17 @@ where
) -> io::Result<fuse_backend_rs::api::filesystem::Entry> {
debug!("lookup");
// convert the CStr to a PathComponent
// If it can't be converted, we definitely don't have anything here.
let name: PathComponent = name.try_into().map_err(|_| std::io::ErrorKind::NotFound)?;
// This goes from a parent inode to a node.
// - If the parent is [ROOT_ID], we need to check
// [self.root_nodes] (fetching from a [RootNode] provider if needed)
// - Otherwise, lookup the parent in [self.inode_tracker] (which must be
// a [InodeData::Directory]), and find the child with that name.
if parent == ROOT_ID {
let (ino, inode_data) = self.name_in_root_to_ino_and_data(name)?;
let (ino, inode_data) = self.name_in_root_to_ino_and_data(&name)?;
debug!(inode_data=?&inode_data, ino=ino, "Some");
return Ok(inode_data.as_fuse_entry(ino));
@ -360,7 +365,7 @@ where
// Search for that name in the list of children and return the FileAttrs.
// in the children, find the one with the desired name.
if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == name.to_bytes()) {
if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == &name) {
// lookup the child [InodeData] in [self.inode_tracker].
// We know the inodes for children have already been allocated.
let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap();
@ -479,7 +484,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &name,
name: name.as_ref(),
})?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
@ -503,7 +508,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &child_name,
name: child_name.as_ref(),
})?;
// If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 {
@ -569,7 +574,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &name,
name: name.as_ref(),
},
inode_data.as_fuse_entry(ino),
)?;
@ -594,7 +599,7 @@ where
ino,
offset: offset + (i as u64) + 1,
type_: inode_data.as_fuse_type(),
name: &name,
name: name.as_ref(),
},
inode_data.as_fuse_entry(ino),
)?;

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use crate::{Error, Node};
use crate::{path::PathComponent, Error, Node};
use futures::stream::BoxStream;
use tonic::async_trait;
@ -10,12 +10,12 @@ use tonic::async_trait;
pub trait RootNodes: Send + Sync {
/// Looks up a root CA node based on the basename of the node in the root
/// directory of the filesystem.
async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error>;
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error>;
/// Lists all root CA nodes in the filesystem, as a tuple of (base)name
/// and Node.
/// An error can be returned in case listing is not allowed.
fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>>;
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>>;
}
#[async_trait]
@ -23,13 +23,13 @@ pub trait RootNodes: Send + Sync {
/// the key is the node name.
impl<T> RootNodes for T
where
T: AsRef<BTreeMap<bytes::Bytes, Node>> + Send + Sync,
T: AsRef<BTreeMap<PathComponent, Node>> + Send + Sync,
{
async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error> {
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error> {
Ok(self.as_ref().get(name).cloned())
}
fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> {
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>> {
Box::pin(tokio_stream::iter(
self.as_ref()
.iter()

View file

@ -123,9 +123,8 @@ where
.path()
.file_name()
// If this is the root node, it will have an empty name.
.unwrap_or_default()
.to_owned()
.into();
.unwrap_or_else(|| "".try_into().unwrap())
.to_owned();
// record node in parent directory, creating a new [Directory] if not there yet.
directories

View file

@ -14,7 +14,7 @@ mod nodes;
pub use nodes::*;
mod path;
pub use path::{Path, PathBuf};
pub use path::{Path, PathBuf, PathComponent};
pub mod import;
pub mod proto;

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use crate::{errors::DirectoryError, proto, B3Digest, Node};
use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node};
/// A Directory contains nodes, which can be Directory, File or Symlink nodes.
/// It attached names to these nodes, which is the basename in that directory.
@ -10,7 +10,7 @@ use crate::{errors::DirectoryError, proto, B3Digest, Node};
/// - MUST be unique across all three lists
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct Directory {
nodes: BTreeMap<bytes::Bytes, Node>,
nodes: BTreeMap<PathComponent, Node>,
}
impl Directory {
@ -46,31 +46,17 @@ impl Directory {
/// Allows iterating over all nodes (directories, files and symlinks)
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ {
pub fn nodes(&self) -> impl Iterator<Item = (&PathComponent, &Node)> + Send + Sync + '_ {
self.nodes.iter()
}
/// Checks a Node name for validity as a directory entry
/// We disallow slashes, null bytes, '.', '..' and the empty string.
pub(crate) fn is_valid_name(name: &[u8]) -> bool {
!(name.is_empty()
|| name == b".."
|| name == b"."
|| name.contains(&0x00)
|| name.contains(&b'/'))
}
/// Adds the specified [Node] to the [Directory] with a given name.
///
/// Inserting an element that already exists with the same name in the directory will yield an
/// error.
/// Inserting an element will validate that its name fulfills the
/// requirements for directory entries and yield an error if it is not.
pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> {
if !Self::is_valid_name(&name) {
return Err(DirectoryError::InvalidName(name));
}
pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> {
// Check that the even after adding this new directory entry, the size calculation will not
// overflow
// FUTUREWORK: add some sort of batch add interface which only does this check once with
@ -91,7 +77,7 @@ impl Directory {
Ok(())
}
std::collections::btree_map::Entry::Occupied(occupied) => {
Err(DirectoryError::DuplicateName(occupied.key().to_vec()))
Err(DirectoryError::DuplicateName(occupied.key().to_owned()))
}
}
}
@ -112,7 +98,7 @@ mod test {
let mut d = Directory::new();
d.add(
"b".into(),
"b".try_into().unwrap(),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -120,7 +106,7 @@ mod test {
)
.unwrap();
d.add(
"a".into(),
"a".try_into().unwrap(),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -128,7 +114,7 @@ mod test {
)
.unwrap();
d.add(
"z".into(),
"z".try_into().unwrap(),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -137,7 +123,7 @@ mod test {
.unwrap();
d.add(
"f".into(),
"f".try_into().unwrap(),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -146,7 +132,7 @@ mod test {
)
.unwrap();
d.add(
"c".into(),
"c".try_into().unwrap(),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -155,7 +141,7 @@ mod test {
)
.unwrap();
d.add(
"g".into(),
"g".try_into().unwrap(),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -165,21 +151,21 @@ mod test {
.unwrap();
d.add(
"t".into(),
"t".try_into().unwrap(),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
d.add(
"o".into(),
"o".try_into().unwrap(),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
d.add(
"e".into(),
"e".try_into().unwrap(),
Node::Symlink {
target: "a".try_into().unwrap(),
},
@ -197,7 +183,7 @@ mod test {
assert_eq!(
d.add(
"foo".into(),
"foo".try_into().unwrap(),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: u64::MAX
@ -212,7 +198,7 @@ mod test {
let mut d = Directory::new();
d.add(
"a".into(),
"a".try_into().unwrap(),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -223,7 +209,7 @@ mod test {
format!(
"{}",
d.add(
"a".into(),
"a".try_into().unwrap(),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
@ -235,20 +221,4 @@ mod test {
"\"a\" is a duplicate name"
);
}
/// Attempt to add a directory entry with a name which should be rejected.
#[tokio::test]
async fn directory_reject_invalid_name() {
let mut dir = Directory::new();
assert!(
dir.add(
"".into(), // wrong! can not be added to directory
Node::Symlink {
target: "doesntmatter".try_into().unwrap(),
},
)
.is_err(),
"invalid symlink entry be rejected"
);
}
}

View file

@ -4,7 +4,7 @@ mod symlink_target;
use crate::B3Digest;
pub use directory::Directory;
use symlink_target::SymlinkTarget;
pub use symlink_target::SymlinkTarget;
/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
/// Nodes themselves don't have names, what gives them names is either them

View file

@ -0,0 +1,102 @@
// TODO: split out this error
use crate::DirectoryError;
use bstr::ByteSlice;
use std::fmt::{self, Debug, Display};
/// A wrapper type for validated path components in the castore model.
/// Internally uses a [bytes::Bytes], but disallows
/// slashes, and null bytes to be present, as well as
/// '.', '..' and the empty string.
#[repr(transparent)]
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct PathComponent {
pub(super) inner: bytes::Bytes,
}
impl AsRef<[u8]> for PathComponent {
fn as_ref(&self) -> &[u8] {
self.inner.as_ref()
}
}
impl From<PathComponent> for bytes::Bytes {
fn from(value: PathComponent) -> Self {
value.inner
}
}
pub(super) fn is_valid_name<B: AsRef<[u8]>>(name: B) -> bool {
let v = name.as_ref();
!v.is_empty() && v != *b".." && v != *b"." && !v.contains(&0x00) && !v.contains(&b'/')
}
impl TryFrom<bytes::Bytes> for PathComponent {
type Error = DirectoryError;
fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
if !is_valid_name(&value) {
return Err(DirectoryError::InvalidName(value));
}
Ok(Self { inner: value })
}
}
impl TryFrom<&'static [u8]> for PathComponent {
type Error = DirectoryError;
fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> {
if !is_valid_name(value) {
return Err(DirectoryError::InvalidName(bytes::Bytes::from_static(
value,
)));
}
Ok(Self {
inner: bytes::Bytes::from_static(value),
})
}
}
impl TryFrom<&str> for PathComponent {
type Error = DirectoryError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
if !is_valid_name(value) {
return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice(
value.as_bytes(),
)));
}
Ok(Self {
inner: bytes::Bytes::copy_from_slice(value.as_bytes()),
})
}
}
impl TryFrom<&std::ffi::CStr> for PathComponent {
type Error = DirectoryError;
fn try_from(value: &std::ffi::CStr) -> Result<Self, Self::Error> {
if !is_valid_name(value.to_bytes()) {
return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice(
value.to_bytes(),
)));
}
Ok(Self {
inner: bytes::Bytes::copy_from_slice(value.to_bytes()),
})
}
}
impl Debug for PathComponent {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Debug::fmt(self.inner.as_bstr(), f)
}
}
impl Display for PathComponent {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Display::fmt(self.inner.as_bstr(), f)
}
}

View file

@ -1,5 +1,5 @@
//! Contains data structures to deal with Paths in the tvix-castore model.
use bstr::ByteSlice;
use std::{
borrow::Borrow,
fmt::{self, Debug, Display},
@ -8,9 +8,8 @@ use std::{
str::FromStr,
};
use bstr::ByteSlice;
use crate::Directory;
mod component;
pub use component::PathComponent;
/// Represents a Path in the castore model.
/// These are always relative, and platform-independent, which distinguishes
@ -38,7 +37,7 @@ impl Path {
if !bytes.is_empty() {
// Ensure all components are valid castore node names.
for component in bytes.split_str(b"/") {
if !Directory::is_valid_name(component) {
if !component::is_valid_name(component) {
return None;
}
}
@ -83,10 +82,26 @@ impl Path {
Ok(v)
}
/// Provides an iterator over the components of the path,
/// which are invividual [PathComponent].
/// In case the path is empty, an empty iterator is returned.
pub fn components(&self) -> impl Iterator<Item = PathComponent> + '_ {
let mut iter = self.inner.split_str(&b"/");
// We don't want to return an empty element, consume it if it's the only one.
if self.inner.is_empty() {
let _ = iter.next();
}
iter.map(|b| PathComponent {
inner: bytes::Bytes::copy_from_slice(b),
})
}
/// Produces an iterator over the components of the path, which are
/// individual byte slices.
/// In case the path is empty, an empty iterator is returned.
pub fn components(&self) -> impl Iterator<Item = &[u8]> {
pub fn components_bytes(&self) -> impl Iterator<Item = &[u8]> {
let mut iter = self.inner.split_str(&b"/");
// We don't want to return an empty element, consume it if it's the only one.
@ -97,11 +112,16 @@ impl Path {
iter
}
/// Returns the final component of the Path, if there is one.
pub fn file_name(&self) -> Option<&[u8]> {
/// Returns the final component of the Path, if there is one, in bytes.
pub fn file_name(&self) -> Option<PathComponent> {
self.components().last()
}
/// Returns the final component of the Path, if there is one, in bytes.
pub fn file_name_bytes(&self) -> Option<&[u8]> {
self.components_bytes().last()
}
pub fn as_bytes(&self) -> &[u8] {
&self.inner
}
@ -213,7 +233,7 @@ impl PathBuf {
/// Adjoins `name` to self.
pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
if !Directory::is_valid_name(name) {
if !component::is_valid_name(name) {
return Err(std::io::ErrorKind::InvalidData.into());
}
@ -333,7 +353,7 @@ mod test {
assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch");
assert_eq!(
num_components,
p.components().count(),
p.components_bytes().count(),
"number of components mismatch"
);
}
@ -400,10 +420,10 @@ mod test {
#[case("a", vec!["a"])]
#[case("a/b", vec!["a", "b"])]
#[case("a/b/c", vec!["a","b", "c"])]
pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
pub fn components_bytes(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
assert_eq!(
exp_components,
p.components()
p.components_bytes()
.map(|x| x.to_str().unwrap())
.collect::<Vec<_>>()
);

View file

@ -5,7 +5,7 @@ use prost::Message;
mod grpc_blobservice_wrapper;
mod grpc_directoryservice_wrapper;
use crate::{B3Digest, DirectoryError};
use crate::{path::PathComponent, B3Digest, DirectoryError};
pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper;
pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper;
@ -162,19 +162,19 @@ impl From<&crate::Directory> for Directory {
size,
executable,
} => files.push(FileNode {
name: name.clone(),
name: name.to_owned().into(),
digest: digest.to_owned().into(),
size: *size,
executable: *executable,
}),
crate::Node::Directory { digest, size } => directories.push(DirectoryNode {
name: name.clone(),
name: name.to_owned().into(),
digest: digest.to_owned().into(),
size: *size,
}),
crate::Node::Symlink { target } => {
symlinks.push(SymlinkNode {
name: name.clone(),
name: name.to_owned().into(),
target: target.to_owned().into(),
});
}
@ -190,22 +190,24 @@ impl From<&crate::Directory> for Directory {
impl Node {
/// Converts a proto [Node] to a [crate::Node], and splits off the name.
pub fn into_name_and_node(self) -> Result<(bytes::Bytes, crate::Node), DirectoryError> {
pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> {
match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? {
node::Node::Directory(n) => {
let name: PathComponent = n.name.try_into()?;
let digest = B3Digest::try_from(n.digest)
.map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?;
.map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?;
let node = crate::Node::Directory {
digest,
size: n.size,
};
Ok((n.name, node))
Ok((name, node))
}
node::Node::File(n) => {
let name: PathComponent = n.name.try_into()?;
let digest = B3Digest::try_from(n.digest)
.map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?;
.map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?;
let node = crate::Node::File {
digest,
@ -213,23 +215,26 @@ impl Node {
executable: n.executable,
};
Ok((n.name, node))
Ok((name, node))
}
node::Node::Symlink(n) => {
let name: PathComponent = n.name.try_into()?;
let node = crate::Node::Symlink {
target: n
.target
.try_into()
.map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?,
.map_err(|e| DirectoryError::InvalidNode(name.clone(), e))?,
};
Ok((n.name, node))
Ok((name, node))
}
}
}
/// Construsts a [Node] from a name and [crate::Node].
/// The name is a [bytes::Bytes], not a [PathComponent], as we have use an
/// empty name in some places.
pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self {
match n {
crate::Node::Directory { digest, size } => Self {

View file

@ -301,7 +301,7 @@ fn validate_sorting() {
};
match crate::Directory::try_from(d).expect_err("must fail") {
DirectoryError::DuplicateName(s) => {
assert_eq!(s, b"a");
assert_eq!(s.as_ref(), b"a");
}
_ => panic!("unexpected error"),
}

View file

@ -1,6 +1,8 @@
# prevents a false-positive lint on our types containing bytes::Bytes
# https://rust-lang.github.io/rust-clippy/master/index.html#/mutable_key_type
ignore-interior-mutability = [
# make sure to specify the originating type name, not re-exports!
"bytes::Bytes",
"tvix_castore::digests::B3Digest"
"tvix_castore::digests::B3Digest",
"tvix_castore::path::component::PathComponent"
]

View file

@ -314,7 +314,7 @@ impl TvixStoreIO {
// assemble the PathInfo to persist
let path_info = PathInfo {
node: Some(tvix_castore::proto::Node::from_name_and_node(
output_name,
output_name.into(),
output_node,
)),
references: vec![], // TODO: refscan
@ -355,7 +355,9 @@ impl TvixStoreIO {
.outputs
.into_iter()
.map(|e| e.into_name_and_node().expect("invalid node"))
.find(|(output_name, _output_node)| output_name == s.as_bytes())
.find(|(output_name, _output_node)| {
output_name.as_ref() == s.as_bytes()
})
.expect("build didn't produce the store path")
.1
}
@ -562,9 +564,13 @@ impl EvalIO for TvixStoreIO {
// TODO: into_nodes() to avoid cloning
for (name, node) in directory.nodes() {
children.push(match node {
Node::Directory { .. } => (name.clone(), FileType::Directory),
Node::File { .. } => (name.clone(), FileType::Regular),
Node::Symlink { .. } => (name.clone(), FileType::Symlink),
Node::Directory { .. } => {
(name.clone().into(), FileType::Directory)
}
Node::File { .. } => (name.clone().into(), FileType::Regular),
Node::Symlink { .. } => {
(name.clone().into(), FileType::Symlink)
}
})
}
Ok(children)

View file

@ -57,7 +57,7 @@ pub async fn get(
StatusCode::BAD_REQUEST
})?;
if !root_name.is_empty() {
if !root_name.as_ref().is_empty() {
warn!("root node has name, which it shouldn't");
return Err(StatusCode::BAD_REQUEST);
}

View file

@ -3,6 +3,7 @@ use std::path::Path;
use tracing::{debug, instrument};
use tvix_castore::{
blobservice::BlobService, directoryservice::DirectoryService, import::fs::ingest_path, Node,
PathComponent,
};
use nix_compat::{
@ -139,14 +140,19 @@ where
)
})?;
let name = bytes::Bytes::from(output_path.to_string());
let name: PathComponent = output_path
.to_string()
.as_str()
.try_into()
.expect("Tvix bug: StorePath must be PathComponent");
log_node(name.as_ref(), &root_node, path.as_ref());
let path_info = derive_nar_ca_path_info(
nar_size,
nar_sha256,
Some(&CAHash::Nar(NixHash::Sha256(nar_sha256))),
output_path.to_string().into_bytes().into(),
name.into(),
root_node,
);

View file

@ -178,7 +178,7 @@ where
// and then recurse on that entry.
for (name, node) in directory.nodes() {
let child_node = nar_node_directory
.entry(name)
.entry(name.as_ref())
.await
.map_err(RenderError::NARWriterError)?;

View file

@ -3,7 +3,7 @@ use futures::StreamExt;
use tonic::async_trait;
use tvix_castore::fs::{RootNodes, TvixStoreFs};
use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService};
use tvix_castore::{Error, Node};
use tvix_castore::{Error, Node, PathComponent};
use super::PathInfoService;
@ -47,8 +47,8 @@ impl<T> RootNodes for RootNodesWrapper<T>
where
T: AsRef<dyn PathInfoService> + Send + Sync,
{
async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error> {
let Ok(store_path) = nix_compat::store_path::StorePath::from_bytes(name) else {
async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error> {
let Ok(store_path) = nix_compat::store_path::StorePath::from_bytes(name.as_ref()) else {
return Ok(None);
};
@ -72,7 +72,7 @@ where
.transpose()?)
}
fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> {
fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>> {
Box::pin(self.0.as_ref().list().map(|result| {
result.and_then(|path_info| {
let node = path_info

View file

@ -35,7 +35,7 @@ fn validate_pathinfo(
name: DUMMY_PATH.into(),
digest: Bytes::new(),
size: 0,
}, Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.into(), ValidateNodeError::InvalidDigestLen(0)))))]
}, Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.try_into().unwrap(), ValidateNodeError::InvalidDigestLen(0)))))]
#[case::invalid_node_name_no_storepath(castorepb::DirectoryNode {
name: "invalid".into(),
digest: DUMMY_DIGEST.clone().into(),
@ -74,7 +74,7 @@ fn validate_directory(
digest: Bytes::new(),
..Default::default()
},
Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.into(), ValidateNodeError::InvalidDigestLen(0))))
Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.try_into().unwrap(), ValidateNodeError::InvalidDigestLen(0))))
)]
#[case::invalid_node_name(
castorepb::FileNode {