diff --git a/tvix/build/src/proto/mod.rs b/tvix/build/src/proto/mod.rs index d3b325bd3..b36049d05 100644 --- a/tvix/build/src/proto/mod.rs +++ b/tvix/build/src/proto/mod.rs @@ -125,7 +125,7 @@ impl BuildRequest { pub fn validate(&self) -> Result<(), ValidateBuildRequestError> { // validate names. Make sure they're sorted - let mut last_name = bytes::Bytes::new(); + let mut last_name: bytes::Bytes = "".into(); for (i, node) in self.inputs.iter().enumerate() { // TODO(flokli): store result somewhere let (name, _node) = node @@ -133,10 +133,10 @@ impl BuildRequest { .into_name_and_node() .map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?; - if name <= last_name { + if name.as_ref() <= last_name.as_ref() { return Err(ValidateBuildRequestError::InputNodesNotSorted); } else { - last_name = name + last_name = name.into() } } diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs index 2f932fe05..3c2d54702 100644 --- a/tvix/castore/src/directoryservice/directory_graph.rs +++ b/tvix/castore/src/directoryservice/directory_graph.rs @@ -1,7 +1,5 @@ use std::collections::HashMap; -use bstr::ByteSlice; - use petgraph::{ graph::{DiGraph, NodeIndex}, visit::{Bfs, DfsPostOrder, EdgeRef, IntoNodeIdentifiers, Walker}, @@ -10,7 +8,7 @@ use petgraph::{ use tracing::instrument; use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator}; -use crate::{B3Digest, Directory, Node}; +use crate::{path::PathComponent, B3Digest, Directory, Node}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -71,12 +69,12 @@ pub struct ValidatedDirectoryGraph { root: Option, } -fn check_edge(dir: &Edge, dir_name: &[u8], child: &Directory) -> Result<(), Error> { +fn check_edge(dir: &Edge, dir_name: &PathComponent, child: &Directory) -> Result<(), Error> { // Ensure the size specified in the child node matches our records. if dir.1 != child.size() { return Err(Error::ValidationError(format!( "'{}' has wrong size, specified {}, recorded {}", - dir_name.as_bstr(), + dir_name, dir.1, child.size(), ))); @@ -179,7 +177,7 @@ impl DirectoryGraph { .expect("edge is already validated"); // TODO: where's the name here? - check_edge(&edge_weight, b"??", &directory)?; + check_edge(&edge_weight, &"??".try_into().unwrap(), &directory)?; } // finally, store the directory information in the node weight @@ -284,7 +282,7 @@ mod tests { pub static ref BROKEN_PARENT_DIRECTORY: Directory = { let mut dir = Directory::new(); dir.add( - "foo".into(), + "foo".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size() + 42, // wrong! diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index 2bb9f05bf..01e421304 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -219,7 +219,7 @@ async fn upload_reject_wrong_size(directory_service: impl DirectoryService) { let wrong_parent_directory = { let mut dir = Directory::new(); dir.add( - "foo".into(), + "foo".try_into().unwrap(), Node::Directory { digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size() + 42, // wrong! diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index 86a90175e..63b99ba4e 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -13,7 +13,7 @@ where DS: AsRef, { let mut parent_node = root_node; - for component in path.as_ref().components() { + for component in path.as_ref().components_bytes() { match parent_node { Node::File { .. } | Node::Symlink { .. } => { // There's still some path left, but the parent node is no directory. diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs index e03cfa5a5..6c213cb06 100644 --- a/tvix/castore/src/errors.rs +++ b/tvix/castore/src/errors.rs @@ -3,6 +3,8 @@ use thiserror::Error; use tokio::task::JoinError; use tonic::Status; +use crate::path::PathComponent; + /// Errors related to communication with the store. #[derive(Debug, Error, PartialEq)] pub enum Error { @@ -37,11 +39,11 @@ impl From for ValidateNodeError { #[derive(Debug, thiserror::Error, PartialEq)] pub enum DirectoryError { /// Multiple elements with the same name encountered - #[error("{:?} is a duplicate name", .0.as_bstr())] - DuplicateName(Vec), + #[error("{:?} is a duplicate name", .0)] + DuplicateName(PathComponent), /// Node failed validation - #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] - InvalidNode(bytes::Bytes, ValidateNodeError), + #[error("invalid node with name {}: {:?}", .0, .1.to_string())] + InvalidNode(PathComponent, ValidateNodeError), #[error("Total size exceeds u32::MAX")] SizeOverflow, /// Invalid name encountered diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs index ddeacaf1d..00cf3682d 100644 --- a/tvix/castore/src/fixtures.rs +++ b/tvix/castore/src/fixtures.rs @@ -34,7 +34,7 @@ lazy_static! { pub static ref DIRECTORY_WITH_KEEP: Directory = { let mut dir = Directory::new(); dir.add( - ".keep".into(), + ".keep".try_into().unwrap(), Node::File{ digest: EMPTY_BLOB_DIGEST.clone(), size: 0, @@ -46,20 +46,20 @@ lazy_static! { pub static ref DIRECTORY_COMPLICATED: Directory = { let mut dir = Directory::new(); dir.add( - "keep".into(), + "keep".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_WITH_KEEP.digest(), size: DIRECTORY_WITH_KEEP.size() }).unwrap(); dir.add( - ".keep".into(), + ".keep".try_into().unwrap(), Node::File{ digest: EMPTY_BLOB_DIGEST.clone(), size: 0, executable: false }).unwrap(); dir.add( - "aa".into(), + "aa".try_into().unwrap(), Node::Symlink{ target: "/nix/store/somewhereelse".try_into().unwrap() }).unwrap(); @@ -70,7 +70,7 @@ lazy_static! { pub static ref DIRECTORY_B: Directory = { let mut dir = Directory::new(); dir.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), @@ -81,13 +81,13 @@ lazy_static! { pub static ref DIRECTORY_C: Directory = { let mut dir = Directory::new(); dir.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), }).unwrap(); dir.add( - "a'".into(), + "a'".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), @@ -98,13 +98,13 @@ lazy_static! { pub static ref DIRECTORY_D: Directory = { let mut dir = Directory::new(); dir.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), }).unwrap(); dir.add( - "b".into(), + "b".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_B.digest(), size: DIRECTORY_B.size(), diff --git a/tvix/castore/src/fs/fuse/tests.rs b/tvix/castore/src/fs/fuse/tests.rs index e5e2e66c3..9e01204d5 100644 --- a/tvix/castore/src/fs/fuse/tests.rs +++ b/tvix/castore/src/fs/fuse/tests.rs @@ -1,5 +1,4 @@ use bstr::ByteSlice; -use bytes::Bytes; use std::{ collections::BTreeMap, ffi::{OsStr, OsString}, @@ -12,12 +11,15 @@ use tempfile::TempDir; use tokio_stream::{wrappers::ReadDirStream, StreamExt}; use super::FuseDaemon; -use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST}; use crate::{ blobservice::{BlobService, MemoryBlobService}, directoryservice::{DirectoryService, MemoryDirectoryService}, fixtures, Node, }; +use crate::{ + fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST}, + PathComponent, +}; const BLOB_A_NAME: &str = "00000000000000000000000000000000-test"; const BLOB_B_NAME: &str = "55555555555555555555555555555555-test"; @@ -37,7 +39,7 @@ fn gen_svcs() -> (Arc, Arc) { fn do_mount, BS, DS>( blob_service: BS, directory_service: DS, - root_nodes: BTreeMap, + root_nodes: BTreeMap, mountpoint: P, list_root: bool, show_xattr: bool, @@ -58,7 +60,7 @@ where async fn populate_blob_a( blob_service: &Arc, - root_nodes: &mut BTreeMap, + root_nodes: &mut BTreeMap, ) { let mut bw = blob_service.open_write().await; tokio::io::copy(&mut Cursor::new(fixtures::BLOB_A.to_vec()), &mut bw) @@ -67,7 +69,7 @@ async fn populate_blob_a( bw.close().await.expect("must succeed closing"); root_nodes.insert( - BLOB_A_NAME.into(), + BLOB_A_NAME.try_into().unwrap(), Node::File { digest: fixtures::BLOB_A_DIGEST.clone(), size: fixtures::BLOB_A.len() as u64, @@ -78,7 +80,7 @@ async fn populate_blob_a( async fn populate_blob_b( blob_service: &Arc, - root_nodes: &mut BTreeMap, + root_nodes: &mut BTreeMap, ) { let mut bw = blob_service.open_write().await; tokio::io::copy(&mut Cursor::new(fixtures::BLOB_B.to_vec()), &mut bw) @@ -87,7 +89,7 @@ async fn populate_blob_b( bw.close().await.expect("must succeed closing"); root_nodes.insert( - BLOB_B_NAME.into(), + BLOB_B_NAME.try_into().unwrap(), Node::File { digest: fixtures::BLOB_B_DIGEST.clone(), size: fixtures::BLOB_B.len() as u64, @@ -99,7 +101,7 @@ async fn populate_blob_b( /// adds a blob containing helloworld and marks it as executable async fn populate_blob_helloworld( blob_service: &Arc, - root_nodes: &mut BTreeMap, + root_nodes: &mut BTreeMap, ) { let mut bw = blob_service.open_write().await; tokio::io::copy( @@ -111,7 +113,7 @@ async fn populate_blob_helloworld( bw.close().await.expect("must succeed closing"); root_nodes.insert( - HELLOWORLD_BLOB_NAME.into(), + HELLOWORLD_BLOB_NAME.try_into().unwrap(), Node::File { digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone(), size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, @@ -120,9 +122,9 @@ async fn populate_blob_helloworld( ); } -async fn populate_symlink(root_nodes: &mut BTreeMap) { +async fn populate_symlink(root_nodes: &mut BTreeMap) { root_nodes.insert( - SYMLINK_NAME.into(), + SYMLINK_NAME.try_into().unwrap(), Node::Symlink { target: BLOB_A_NAME.try_into().unwrap(), }, @@ -131,9 +133,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap) { /// This writes a symlink pointing to /nix/store/somewhereelse, /// which is the same symlink target as "aa" inside DIRECTORY_COMPLICATED. -async fn populate_symlink2(root_nodes: &mut BTreeMap) { +async fn populate_symlink2(root_nodes: &mut BTreeMap) { root_nodes.insert( - SYMLINK_NAME2.into(), + SYMLINK_NAME2.try_into().unwrap(), Node::Symlink { target: "/nix/store/somewhereelse".try_into().unwrap(), }, @@ -143,7 +145,7 @@ async fn populate_symlink2(root_nodes: &mut BTreeMap) { async fn populate_directory_with_keep( blob_service: &Arc, directory_service: &Arc, - root_nodes: &mut BTreeMap, + root_nodes: &mut BTreeMap, ) { // upload empty blob let mut bw = blob_service.open_write().await; @@ -159,7 +161,7 @@ async fn populate_directory_with_keep( .expect("must succeed uploading"); root_nodes.insert( - DIRECTORY_WITH_KEEP_NAME.into(), + DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(), Node::Directory { digest: fixtures::DIRECTORY_WITH_KEEP.digest(), size: fixtures::DIRECTORY_WITH_KEEP.size(), @@ -169,9 +171,9 @@ async fn populate_directory_with_keep( /// Create a root node for DIRECTORY_WITH_KEEP, but don't upload the Directory /// itself. -async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap) { +async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap) { root_nodes.insert( - DIRECTORY_WITH_KEEP_NAME.into(), + DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(), Node::Directory { digest: fixtures::DIRECTORY_WITH_KEEP.digest(), size: fixtures::DIRECTORY_WITH_KEEP.size(), @@ -180,9 +182,9 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap) { +async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap) { root_nodes.insert( - BLOB_A_NAME.into(), + BLOB_A_NAME.try_into().unwrap(), Node::File { digest: fixtures::BLOB_A_DIGEST.clone(), size: fixtures::BLOB_A.len() as u64, @@ -194,7 +196,7 @@ async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap) async fn populate_directory_complicated( blob_service: &Arc, directory_service: &Arc, - root_nodes: &mut BTreeMap, + root_nodes: &mut BTreeMap, ) { // upload empty blob let mut bw = blob_service.open_write().await; @@ -216,7 +218,7 @@ async fn populate_directory_complicated( .expect("must succeed uploading"); root_nodes.insert( - DIRECTORY_COMPLICATED_NAME.into(), + DIRECTORY_COMPLICATED_NAME.try_into().unwrap(), Node::Directory { digest: fixtures::DIRECTORY_COMPLICATED.digest(), size: fixtures::DIRECTORY_COMPLICATED.size(), diff --git a/tvix/castore/src/fs/inodes.rs b/tvix/castore/src/fs/inodes.rs index 782ffff71..2696fdede 100644 --- a/tvix/castore/src/fs/inodes.rs +++ b/tvix/castore/src/fs/inodes.rs @@ -2,7 +2,7 @@ //! about inodes, which present tvix-castore nodes in a filesystem. use std::time::Duration; -use crate::{B3Digest, Node}; +use crate::{path::PathComponent, B3Digest, Node}; #[derive(Clone, Debug)] pub enum InodeData { @@ -17,8 +17,8 @@ pub enum InodeData { /// lookup and did fetch the data. #[derive(Clone, Debug)] pub enum DirectoryInodeData { - Sparse(B3Digest, u64), // digest, size - Populated(B3Digest, Vec<(u64, bytes::Bytes, Node)>), // [(child_inode, name, node)] + Sparse(B3Digest, u64), // digest, size + Populated(B3Digest, Vec<(u64, PathComponent, Node)>), // [(child_inode, name, node)] } impl InodeData { diff --git a/tvix/castore/src/fs/mod.rs b/tvix/castore/src/fs/mod.rs index f819adb54..d196266ab 100644 --- a/tvix/castore/src/fs/mod.rs +++ b/tvix/castore/src/fs/mod.rs @@ -18,10 +18,10 @@ use self::{ use crate::{ blobservice::{BlobReader, BlobService}, directoryservice::DirectoryService, - {B3Digest, Node}, + path::PathComponent, + B3Digest, Node, }; use bstr::ByteVec; -use bytes::Bytes; use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions}; use fuse_backend_rs::api::filesystem::{ Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID, @@ -87,7 +87,7 @@ pub struct TvixStoreFs { show_xattr: bool, /// This maps a given basename in the root to the inode we allocated for the node. - root_nodes: RwLock>, + root_nodes: RwLock>, /// This keeps track of inodes and data alongside them. inode_tracker: RwLock, @@ -103,7 +103,7 @@ pub struct TvixStoreFs { u64, ( Span, - Arc)>>>, + Arc)>>>, ), >, >, @@ -154,7 +154,7 @@ where /// Retrieves the inode for a given root node basename, if present. /// This obtains a read lock on self.root_nodes. - fn get_inode_for_root_name(&self, name: &[u8]) -> Option { + fn get_inode_for_root_name(&self, name: &PathComponent) -> Option { self.root_nodes.read().get(name).cloned() } @@ -170,7 +170,7 @@ where fn get_directory_children( &self, ino: u64, - ) -> io::Result<(B3Digest, Vec<(u64, bytes::Bytes, Node)>)> { + ) -> io::Result<(B3Digest, Vec<(u64, PathComponent, Node)>)> { let data = self.inode_tracker.read().get(ino).unwrap(); match *data { // if it's populated already, return children. @@ -200,7 +200,7 @@ where let children = { let mut inode_tracker = self.inode_tracker.write(); - let children: Vec<(u64, Bytes, Node)> = directory + let children: Vec<(u64, PathComponent, Node)> = directory .nodes() .map(|(child_name, child_node)| { let inode_data = InodeData::from_node(child_node); @@ -240,12 +240,12 @@ where /// In the case the name can't be found, a libc::ENOENT is returned. fn name_in_root_to_ino_and_data( &self, - name: &std::ffi::CStr, + name: &PathComponent, ) -> io::Result<(u64, Arc)> { // Look up the inode for that root node. // If there's one, [self.inode_tracker] MUST also contain the data, // which we can then return. - if let Some(inode) = self.get_inode_for_root_name(name.to_bytes()) { + if let Some(inode) = self.get_inode_for_root_name(name) { return Ok(( inode, self.inode_tracker @@ -259,7 +259,8 @@ where // We don't have it yet, look it up in [self.root_nodes]. match self.tokio_handle.block_on({ let root_nodes_provider = self.root_nodes_provider.clone(); - async move { root_nodes_provider.get_by_basename(name.to_bytes()).await } + let name = name.clone(); + async move { root_nodes_provider.get_by_basename(&name).await } }) { // if there was an error looking up the root node, propagate up an IO error. Err(_e) => Err(io::Error::from_raw_os_error(libc::EIO)), @@ -269,7 +270,7 @@ where Ok(Some(root_node)) => { // Let's check if someone else beat us to updating the inode tracker and // root_nodes map. This avoids locking inode_tracker for writing. - if let Some(ino) = self.root_nodes.read().get(name.to_bytes()) { + if let Some(ino) = self.root_nodes.read().get(name) { return Ok(( *ino, self.inode_tracker.read().get(*ino).expect("must exist"), @@ -285,7 +286,7 @@ where // self.root_nodes. let inode_data = InodeData::from_node(&root_node); let ino = inode_tracker.put(inode_data.clone()); - root_nodes.insert(bytes::Bytes::copy_from_slice(name.to_bytes()), ino); + root_nodes.insert(name.to_owned(), ino); Ok((ino, Arc::new(inode_data))) } @@ -341,13 +342,17 @@ where ) -> io::Result { debug!("lookup"); + // convert the CStr to a PathComponent + // If it can't be converted, we definitely don't have anything here. + let name: PathComponent = name.try_into().map_err(|_| std::io::ErrorKind::NotFound)?; + // This goes from a parent inode to a node. // - If the parent is [ROOT_ID], we need to check // [self.root_nodes] (fetching from a [RootNode] provider if needed) // - Otherwise, lookup the parent in [self.inode_tracker] (which must be // a [InodeData::Directory]), and find the child with that name. if parent == ROOT_ID { - let (ino, inode_data) = self.name_in_root_to_ino_and_data(name)?; + let (ino, inode_data) = self.name_in_root_to_ino_and_data(&name)?; debug!(inode_data=?&inode_data, ino=ino, "Some"); return Ok(inode_data.as_fuse_entry(ino)); @@ -360,7 +365,7 @@ where // Search for that name in the list of children and return the FileAttrs. // in the children, find the one with the desired name. - if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == name.to_bytes()) { + if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == &name) { // lookup the child [InodeData] in [self.inode_tracker]. // We know the inodes for children have already been allocated. let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap(); @@ -479,7 +484,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: name.as_ref(), })?; // If the buffer is full, add_entry will return `Ok(0)`. if written == 0 { @@ -503,7 +508,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &child_name, + name: child_name.as_ref(), })?; // If the buffer is full, add_entry will return `Ok(0)`. if written == 0 { @@ -569,7 +574,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: name.as_ref(), }, inode_data.as_fuse_entry(ino), )?; @@ -594,7 +599,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: name.as_ref(), }, inode_data.as_fuse_entry(ino), )?; diff --git a/tvix/castore/src/fs/root_nodes.rs b/tvix/castore/src/fs/root_nodes.rs index 62f0b6219..5ed1a4d8d 100644 --- a/tvix/castore/src/fs/root_nodes.rs +++ b/tvix/castore/src/fs/root_nodes.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use crate::{Error, Node}; +use crate::{path::PathComponent, Error, Node}; use futures::stream::BoxStream; use tonic::async_trait; @@ -10,12 +10,12 @@ use tonic::async_trait; pub trait RootNodes: Send + Sync { /// Looks up a root CA node based on the basename of the node in the root /// directory of the filesystem. - async fn get_by_basename(&self, name: &[u8]) -> Result, Error>; + async fn get_by_basename(&self, name: &PathComponent) -> Result, Error>; /// Lists all root CA nodes in the filesystem, as a tuple of (base)name /// and Node. /// An error can be returned in case listing is not allowed. - fn list(&self) -> BoxStream>; + fn list(&self) -> BoxStream>; } #[async_trait] @@ -23,13 +23,13 @@ pub trait RootNodes: Send + Sync { /// the key is the node name. impl RootNodes for T where - T: AsRef> + Send + Sync, + T: AsRef> + Send + Sync, { - async fn get_by_basename(&self, name: &[u8]) -> Result, Error> { + async fn get_by_basename(&self, name: &PathComponent) -> Result, Error> { Ok(self.as_ref().get(name).cloned()) } - fn list(&self) -> BoxStream> { + fn list(&self) -> BoxStream> { Box::pin(tokio_stream::iter( self.as_ref() .iter() diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs index 9fce22585..a7c459fdb 100644 --- a/tvix/castore/src/import/mod.rs +++ b/tvix/castore/src/import/mod.rs @@ -123,9 +123,8 @@ where .path() .file_name() // If this is the root node, it will have an empty name. - .unwrap_or_default() - .to_owned() - .into(); + .unwrap_or_else(|| "".try_into().unwrap()) + .to_owned(); // record node in parent directory, creating a new [Directory] if not there yet. directories diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index f56ddf827..6e1b36231 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -14,7 +14,7 @@ mod nodes; pub use nodes::*; mod path; -pub use path::{Path, PathBuf}; +pub use path::{Path, PathBuf, PathComponent}; pub mod import; pub mod proto; diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs index a2c520358..03db30b52 100644 --- a/tvix/castore/src/nodes/directory.rs +++ b/tvix/castore/src/nodes/directory.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use crate::{errors::DirectoryError, proto, B3Digest, Node}; +use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node}; /// A Directory contains nodes, which can be Directory, File or Symlink nodes. /// It attached names to these nodes, which is the basename in that directory. @@ -10,7 +10,7 @@ use crate::{errors::DirectoryError, proto, B3Digest, Node}; /// - MUST be unique across all three lists #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct Directory { - nodes: BTreeMap, + nodes: BTreeMap, } impl Directory { @@ -46,31 +46,17 @@ impl Directory { /// Allows iterating over all nodes (directories, files and symlinks) /// For each, it returns a tuple of its name and node. /// The elements are sorted by their names. - pub fn nodes(&self) -> impl Iterator + Send + Sync + '_ { + pub fn nodes(&self) -> impl Iterator + Send + Sync + '_ { self.nodes.iter() } - /// Checks a Node name for validity as a directory entry - /// We disallow slashes, null bytes, '.', '..' and the empty string. - pub(crate) fn is_valid_name(name: &[u8]) -> bool { - !(name.is_empty() - || name == b".." - || name == b"." - || name.contains(&0x00) - || name.contains(&b'/')) - } - /// Adds the specified [Node] to the [Directory] with a given name. /// /// Inserting an element that already exists with the same name in the directory will yield an /// error. /// Inserting an element will validate that its name fulfills the /// requirements for directory entries and yield an error if it is not. - pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> { - if !Self::is_valid_name(&name) { - return Err(DirectoryError::InvalidName(name)); - } - + pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> { // Check that the even after adding this new directory entry, the size calculation will not // overflow // FUTUREWORK: add some sort of batch add interface which only does this check once with @@ -91,7 +77,7 @@ impl Directory { Ok(()) } std::collections::btree_map::Entry::Occupied(occupied) => { - Err(DirectoryError::DuplicateName(occupied.key().to_vec())) + Err(DirectoryError::DuplicateName(occupied.key().to_owned())) } } } @@ -112,7 +98,7 @@ mod test { let mut d = Directory::new(); d.add( - "b".into(), + "b".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -120,7 +106,7 @@ mod test { ) .unwrap(); d.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -128,7 +114,7 @@ mod test { ) .unwrap(); d.add( - "z".into(), + "z".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -137,7 +123,7 @@ mod test { .unwrap(); d.add( - "f".into(), + "f".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -146,7 +132,7 @@ mod test { ) .unwrap(); d.add( - "c".into(), + "c".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -155,7 +141,7 @@ mod test { ) .unwrap(); d.add( - "g".into(), + "g".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -165,21 +151,21 @@ mod test { .unwrap(); d.add( - "t".into(), + "t".try_into().unwrap(), Node::Symlink { target: "a".try_into().unwrap(), }, ) .unwrap(); d.add( - "o".into(), + "o".try_into().unwrap(), Node::Symlink { target: "a".try_into().unwrap(), }, ) .unwrap(); d.add( - "e".into(), + "e".try_into().unwrap(), Node::Symlink { target: "a".try_into().unwrap(), }, @@ -197,7 +183,7 @@ mod test { assert_eq!( d.add( - "foo".into(), + "foo".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: u64::MAX @@ -212,7 +198,7 @@ mod test { let mut d = Directory::new(); d.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -223,7 +209,7 @@ mod test { format!( "{}", d.add( - "a".into(), + "a".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -235,20 +221,4 @@ mod test { "\"a\" is a duplicate name" ); } - - /// Attempt to add a directory entry with a name which should be rejected. - #[tokio::test] - async fn directory_reject_invalid_name() { - let mut dir = Directory::new(); - assert!( - dir.add( - "".into(), // wrong! can not be added to directory - Node::Symlink { - target: "doesntmatter".try_into().unwrap(), - }, - ) - .is_err(), - "invalid symlink entry be rejected" - ); - } } diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs index 0c7b89de9..684e65f89 100644 --- a/tvix/castore/src/nodes/mod.rs +++ b/tvix/castore/src/nodes/mod.rs @@ -4,7 +4,7 @@ mod symlink_target; use crate::B3Digest; pub use directory::Directory; -use symlink_target::SymlinkTarget; +pub use symlink_target::SymlinkTarget; /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. /// Nodes themselves don't have names, what gives them names is either them diff --git a/tvix/castore/src/path/component.rs b/tvix/castore/src/path/component.rs new file mode 100644 index 000000000..f755f06e6 --- /dev/null +++ b/tvix/castore/src/path/component.rs @@ -0,0 +1,102 @@ +// TODO: split out this error +use crate::DirectoryError; + +use bstr::ByteSlice; +use std::fmt::{self, Debug, Display}; + +/// A wrapper type for validated path components in the castore model. +/// Internally uses a [bytes::Bytes], but disallows +/// slashes, and null bytes to be present, as well as +/// '.', '..' and the empty string. +#[repr(transparent)] +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct PathComponent { + pub(super) inner: bytes::Bytes, +} + +impl AsRef<[u8]> for PathComponent { + fn as_ref(&self) -> &[u8] { + self.inner.as_ref() + } +} + +impl From for bytes::Bytes { + fn from(value: PathComponent) -> Self { + value.inner + } +} + +pub(super) fn is_valid_name>(name: B) -> bool { + let v = name.as_ref(); + + !v.is_empty() && v != *b".." && v != *b"." && !v.contains(&0x00) && !v.contains(&b'/') +} + +impl TryFrom for PathComponent { + type Error = DirectoryError; + + fn try_from(value: bytes::Bytes) -> Result { + if !is_valid_name(&value) { + return Err(DirectoryError::InvalidName(value)); + } + + Ok(Self { inner: value }) + } +} + +impl TryFrom<&'static [u8]> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: &'static [u8]) -> Result { + if !is_valid_name(value) { + return Err(DirectoryError::InvalidName(bytes::Bytes::from_static( + value, + ))); + } + Ok(Self { + inner: bytes::Bytes::from_static(value), + }) + } +} + +impl TryFrom<&str> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: &str) -> Result { + if !is_valid_name(value) { + return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( + value.as_bytes(), + ))); + } + Ok(Self { + inner: bytes::Bytes::copy_from_slice(value.as_bytes()), + }) + } +} + +impl TryFrom<&std::ffi::CStr> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: &std::ffi::CStr) -> Result { + if !is_valid_name(value.to_bytes()) { + return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( + value.to_bytes(), + ))); + } + Ok(Self { + inner: bytes::Bytes::copy_from_slice(value.to_bytes()), + }) + } +} + +impl Debug for PathComponent { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self.inner.as_bstr(), f) + } +} + +impl Display for PathComponent { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(self.inner.as_bstr(), f) + } +} diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path/mod.rs similarity index 91% rename from tvix/castore/src/path.rs rename to tvix/castore/src/path/mod.rs index 8a55e9f5a..5a68f1add 100644 --- a/tvix/castore/src/path.rs +++ b/tvix/castore/src/path/mod.rs @@ -1,5 +1,5 @@ //! Contains data structures to deal with Paths in the tvix-castore model. - +use bstr::ByteSlice; use std::{ borrow::Borrow, fmt::{self, Debug, Display}, @@ -8,9 +8,8 @@ use std::{ str::FromStr, }; -use bstr::ByteSlice; - -use crate::Directory; +mod component; +pub use component::PathComponent; /// Represents a Path in the castore model. /// These are always relative, and platform-independent, which distinguishes @@ -38,7 +37,7 @@ impl Path { if !bytes.is_empty() { // Ensure all components are valid castore node names. for component in bytes.split_str(b"/") { - if !Directory::is_valid_name(component) { + if !component::is_valid_name(component) { return None; } } @@ -83,10 +82,26 @@ impl Path { Ok(v) } + /// Provides an iterator over the components of the path, + /// which are invividual [PathComponent]. + /// In case the path is empty, an empty iterator is returned. + pub fn components(&self) -> impl Iterator + '_ { + let mut iter = self.inner.split_str(&b"/"); + + // We don't want to return an empty element, consume it if it's the only one. + if self.inner.is_empty() { + let _ = iter.next(); + } + + iter.map(|b| PathComponent { + inner: bytes::Bytes::copy_from_slice(b), + }) + } + /// Produces an iterator over the components of the path, which are /// individual byte slices. /// In case the path is empty, an empty iterator is returned. - pub fn components(&self) -> impl Iterator { + pub fn components_bytes(&self) -> impl Iterator { let mut iter = self.inner.split_str(&b"/"); // We don't want to return an empty element, consume it if it's the only one. @@ -97,11 +112,16 @@ impl Path { iter } - /// Returns the final component of the Path, if there is one. - pub fn file_name(&self) -> Option<&[u8]> { + /// Returns the final component of the Path, if there is one, in bytes. + pub fn file_name(&self) -> Option { self.components().last() } + /// Returns the final component of the Path, if there is one, in bytes. + pub fn file_name_bytes(&self) -> Option<&[u8]> { + self.components_bytes().last() + } + pub fn as_bytes(&self) -> &[u8] { &self.inner } @@ -213,7 +233,7 @@ impl PathBuf { /// Adjoins `name` to self. pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { - if !Directory::is_valid_name(name) { + if !component::is_valid_name(name) { return Err(std::io::ErrorKind::InvalidData.into()); } @@ -333,7 +353,7 @@ mod test { assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch"); assert_eq!( num_components, - p.components().count(), + p.components_bytes().count(), "number of components mismatch" ); } @@ -400,10 +420,10 @@ mod test { #[case("a", vec!["a"])] #[case("a/b", vec!["a", "b"])] #[case("a/b/c", vec!["a","b", "c"])] - pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { + pub fn components_bytes(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { assert_eq!( exp_components, - p.components() + p.components_bytes() .map(|x| x.to_str().unwrap()) .collect::>() ); diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index c12eb4ee3..f32f3a068 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -5,7 +5,7 @@ use prost::Message; mod grpc_blobservice_wrapper; mod grpc_directoryservice_wrapper; -use crate::{B3Digest, DirectoryError}; +use crate::{path::PathComponent, B3Digest, DirectoryError}; pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; @@ -162,19 +162,19 @@ impl From<&crate::Directory> for Directory { size, executable, } => files.push(FileNode { - name: name.clone(), + name: name.to_owned().into(), digest: digest.to_owned().into(), size: *size, executable: *executable, }), crate::Node::Directory { digest, size } => directories.push(DirectoryNode { - name: name.clone(), + name: name.to_owned().into(), digest: digest.to_owned().into(), size: *size, }), crate::Node::Symlink { target } => { symlinks.push(SymlinkNode { - name: name.clone(), + name: name.to_owned().into(), target: target.to_owned().into(), }); } @@ -190,22 +190,24 @@ impl From<&crate::Directory> for Directory { impl Node { /// Converts a proto [Node] to a [crate::Node], and splits off the name. - pub fn into_name_and_node(self) -> Result<(bytes::Bytes, crate::Node), DirectoryError> { + pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> { match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { node::Node::Directory(n) => { + let name: PathComponent = n.name.try_into()?; let digest = B3Digest::try_from(n.digest) - .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; let node = crate::Node::Directory { digest, size: n.size, }; - Ok((n.name, node)) + Ok((name, node)) } node::Node::File(n) => { + let name: PathComponent = n.name.try_into()?; let digest = B3Digest::try_from(n.digest) - .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; let node = crate::Node::File { digest, @@ -213,23 +215,26 @@ impl Node { executable: n.executable, }; - Ok((n.name, node)) + Ok((name, node)) } node::Node::Symlink(n) => { + let name: PathComponent = n.name.try_into()?; let node = crate::Node::Symlink { target: n .target .try_into() - .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?, + .map_err(|e| DirectoryError::InvalidNode(name.clone(), e))?, }; - Ok((n.name, node)) + Ok((name, node)) } } } /// Construsts a [Node] from a name and [crate::Node]. + /// The name is a [bytes::Bytes], not a [PathComponent], as we have use an + /// empty name in some places. pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self { match n { crate::Node::Directory { digest, size } => Self { diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index 215bce727..1e7cdb89c 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -301,7 +301,7 @@ fn validate_sorting() { }; match crate::Directory::try_from(d).expect_err("must fail") { DirectoryError::DuplicateName(s) => { - assert_eq!(s, b"a"); + assert_eq!(s.as_ref(), b"a"); } _ => panic!("unexpected error"), } diff --git a/tvix/clippy.toml b/tvix/clippy.toml index be7709684..31952cc80 100644 --- a/tvix/clippy.toml +++ b/tvix/clippy.toml @@ -1,6 +1,8 @@ # prevents a false-positive lint on our types containing bytes::Bytes # https://rust-lang.github.io/rust-clippy/master/index.html#/mutable_key_type ignore-interior-mutability = [ + # make sure to specify the originating type name, not re-exports! "bytes::Bytes", - "tvix_castore::digests::B3Digest" + "tvix_castore::digests::B3Digest", + "tvix_castore::path::component::PathComponent" ] diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 234a5ef38..0833fc45e 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -314,7 +314,7 @@ impl TvixStoreIO { // assemble the PathInfo to persist let path_info = PathInfo { node: Some(tvix_castore::proto::Node::from_name_and_node( - output_name, + output_name.into(), output_node, )), references: vec![], // TODO: refscan @@ -355,7 +355,9 @@ impl TvixStoreIO { .outputs .into_iter() .map(|e| e.into_name_and_node().expect("invalid node")) - .find(|(output_name, _output_node)| output_name == s.as_bytes()) + .find(|(output_name, _output_node)| { + output_name.as_ref() == s.as_bytes() + }) .expect("build didn't produce the store path") .1 } @@ -562,9 +564,13 @@ impl EvalIO for TvixStoreIO { // TODO: into_nodes() to avoid cloning for (name, node) in directory.nodes() { children.push(match node { - Node::Directory { .. } => (name.clone(), FileType::Directory), - Node::File { .. } => (name.clone(), FileType::Regular), - Node::Symlink { .. } => (name.clone(), FileType::Symlink), + Node::Directory { .. } => { + (name.clone().into(), FileType::Directory) + } + Node::File { .. } => (name.clone().into(), FileType::Regular), + Node::Symlink { .. } => { + (name.clone().into(), FileType::Symlink) + } }) } Ok(children) diff --git a/tvix/nar-bridge/src/nar.rs b/tvix/nar-bridge/src/nar.rs index f65c8f0be..e43b220c5 100644 --- a/tvix/nar-bridge/src/nar.rs +++ b/tvix/nar-bridge/src/nar.rs @@ -57,7 +57,7 @@ pub async fn get( StatusCode::BAD_REQUEST })?; - if !root_name.is_empty() { + if !root_name.as_ref().is_empty() { warn!("root node has name, which it shouldn't"); return Err(StatusCode::BAD_REQUEST); } diff --git a/tvix/store/src/import.rs b/tvix/store/src/import.rs index f2f38f538..5a21b375b 100644 --- a/tvix/store/src/import.rs +++ b/tvix/store/src/import.rs @@ -3,6 +3,7 @@ use std::path::Path; use tracing::{debug, instrument}; use tvix_castore::{ blobservice::BlobService, directoryservice::DirectoryService, import::fs::ingest_path, Node, + PathComponent, }; use nix_compat::{ @@ -139,14 +140,19 @@ where ) })?; - let name = bytes::Bytes::from(output_path.to_string()); + let name: PathComponent = output_path + .to_string() + .as_str() + .try_into() + .expect("Tvix bug: StorePath must be PathComponent"); + log_node(name.as_ref(), &root_node, path.as_ref()); let path_info = derive_nar_ca_path_info( nar_size, nar_sha256, Some(&CAHash::Nar(NixHash::Sha256(nar_sha256))), - output_path.to_string().into_bytes().into(), + name.into(), root_node, ); diff --git a/tvix/store/src/nar/renderer.rs b/tvix/store/src/nar/renderer.rs index 84c73c981..afd85f267 100644 --- a/tvix/store/src/nar/renderer.rs +++ b/tvix/store/src/nar/renderer.rs @@ -178,7 +178,7 @@ where // and then recurse on that entry. for (name, node) in directory.nodes() { let child_node = nar_node_directory - .entry(name) + .entry(name.as_ref()) .await .map_err(RenderError::NARWriterError)?; diff --git a/tvix/store/src/pathinfoservice/fs/mod.rs b/tvix/store/src/pathinfoservice/fs/mod.rs index 7a46e1fc8..d97032b66 100644 --- a/tvix/store/src/pathinfoservice/fs/mod.rs +++ b/tvix/store/src/pathinfoservice/fs/mod.rs @@ -3,7 +3,7 @@ use futures::StreamExt; use tonic::async_trait; use tvix_castore::fs::{RootNodes, TvixStoreFs}; use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService}; -use tvix_castore::{Error, Node}; +use tvix_castore::{Error, Node, PathComponent}; use super::PathInfoService; @@ -47,8 +47,8 @@ impl RootNodes for RootNodesWrapper where T: AsRef + Send + Sync, { - async fn get_by_basename(&self, name: &[u8]) -> Result, Error> { - let Ok(store_path) = nix_compat::store_path::StorePath::from_bytes(name) else { + async fn get_by_basename(&self, name: &PathComponent) -> Result, Error> { + let Ok(store_path) = nix_compat::store_path::StorePath::from_bytes(name.as_ref()) else { return Ok(None); }; @@ -72,7 +72,7 @@ where .transpose()?) } - fn list(&self) -> BoxStream> { + fn list(&self) -> BoxStream> { Box::pin(self.0.as_ref().list().map(|result| { result.and_then(|path_info| { let node = path_info diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs index ed8c64f6c..253cafad0 100644 --- a/tvix/store/src/proto/tests/pathinfo.rs +++ b/tvix/store/src/proto/tests/pathinfo.rs @@ -35,7 +35,7 @@ fn validate_pathinfo( name: DUMMY_PATH.into(), digest: Bytes::new(), size: 0, -}, Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.into(), ValidateNodeError::InvalidDigestLen(0)))))] +}, Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.try_into().unwrap(), ValidateNodeError::InvalidDigestLen(0)))))] #[case::invalid_node_name_no_storepath(castorepb::DirectoryNode { name: "invalid".into(), digest: DUMMY_DIGEST.clone().into(), @@ -74,7 +74,7 @@ fn validate_directory( digest: Bytes::new(), ..Default::default() }, - Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.into(), ValidateNodeError::InvalidDigestLen(0)))) + Err(ValidatePathInfoError::InvalidRootNode(DirectoryError::InvalidNode(DUMMY_PATH.try_into().unwrap(), ValidateNodeError::InvalidDigestLen(0)))) )] #[case::invalid_node_name( castorepb::FileNode {