refactor(tvix/castore): drop {Directory,File,Symlink}Node

Add a `SymlinkTarget` type to represent validated symlink targets.
With this, no invalid states are representable, so we can make `Node` be
just an enum of all three kind of types, and allow access to these
fields directly.

Change-Id: I20bdd480c8d5e64a827649f303c97023b7e390f2
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12216
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-08-16 02:24:12 +03:00 committed by clbot
parent 49b173786c
commit 8ea7d2b60e
27 changed files with 555 additions and 461 deletions

View file

@ -10,7 +10,7 @@ use petgraph::{
use tracing::instrument;
use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator};
use crate::{B3Digest, Directory, DirectoryNode};
use crate::{B3Digest, Directory, Node};
#[derive(thiserror::Error, Debug)]
pub enum Error {
@ -18,6 +18,8 @@ pub enum Error {
ValidationError(String),
}
type Edge = (B3Digest, u64);
/// This can be used to validate and/or re-order a Directory closure (DAG of
/// connected Directories), and their insertion order.
///
@ -55,7 +57,7 @@ pub struct DirectoryGraph<O> {
//
// The option in the edge weight tracks the pending validation state of the respective edge, for example if
// the child has not been added yet.
graph: DiGraph<Option<Directory>, Option<DirectoryNode>>,
graph: DiGraph<Option<Directory>, Option<Edge>>,
// A lookup table from directory digest to node index.
digest_to_node_ix: HashMap<B3Digest, NodeIndex>,
@ -64,18 +66,18 @@ pub struct DirectoryGraph<O> {
}
pub struct ValidatedDirectoryGraph {
graph: DiGraph<Option<Directory>, Option<DirectoryNode>>,
graph: DiGraph<Option<Directory>, Option<Edge>>,
root: Option<NodeIndex>,
}
fn check_edge(dir: &DirectoryNode, dir_name: &[u8], child: &Directory) -> Result<(), Error> {
fn check_edge(dir: &Edge, dir_name: &[u8], child: &Directory) -> Result<(), Error> {
// Ensure the size specified in the child node matches our records.
if dir.size() != child.size() {
if dir.1 != child.size() {
return Err(Error::ValidationError(format!(
"'{}' has wrong size, specified {}, recorded {}",
dir_name.as_bstr(),
dir.size(),
dir.1,
child.size(),
)));
}
@ -141,21 +143,23 @@ impl<O: OrderValidator> DirectoryGraph<O> {
}
// set up edges to all child directories
for (subdir_name, subdir_node) in directory.directories() {
let child_ix = *self
.digest_to_node_ix
.entry(subdir_node.digest().clone())
.or_insert_with(|| self.graph.add_node(None));
for (name, node) in directory.nodes() {
if let Node::Directory { digest, size } = node {
let child_ix = *self
.digest_to_node_ix
.entry(digest.clone())
.or_insert_with(|| self.graph.add_node(None));
let pending_edge_check = match &self.graph[child_ix] {
Some(child) => {
// child is already available, validate the edge now
check_edge(subdir_node, subdir_name, child)?;
None
}
None => Some(subdir_node.clone()), // pending validation
};
self.graph.add_edge(ix, child_ix, pending_edge_check);
let pending_edge_check = match &self.graph[child_ix] {
Some(child) => {
// child is already available, validate the edge now
check_edge(&(digest.to_owned(), *size), name, child)?;
None
}
None => Some((digest.to_owned(), *size)), // pending validation
};
self.graph.add_edge(ix, child_ix, pending_edge_check);
}
}
// validate the edges from parents to this node
@ -270,7 +274,7 @@ impl ValidatedDirectoryGraph {
#[cfg(test)]
mod tests {
use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
use crate::{Directory, DirectoryNode, Node};
use crate::{Directory, Node};
use lazy_static::lazy_static;
use rstest::rstest;
@ -281,10 +285,10 @@ mod tests {
let mut dir = Directory::new();
dir.add(
"foo".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_A.digest(),
DIRECTORY_A.size() + 42, // wrong!
))).unwrap();
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size() + 42, // wrong!
}).unwrap();
dir
};
}

View file

@ -21,7 +21,7 @@ use super::{
RootToLeavesValidator,
};
use crate::composition::{CompositionContext, ServiceBuilder};
use crate::{proto, B3Digest, Error};
use crate::{proto, B3Digest, Error, Node};
/// Stores directory closures in an object store.
/// Notably, this makes use of the option to disallow accessing child directories except when
@ -85,7 +85,11 @@ impl DirectoryService for ObjectStoreDirectoryService {
#[instrument(skip(self, directory), fields(directory.digest = %directory.digest()))]
async fn put(&self, directory: Directory) -> Result<B3Digest, Error> {
if directory.directories().next().is_some() {
// Ensure the directory doesn't contain other directory children
if directory
.nodes()
.any(|(_, e)| matches!(e, Node::Directory { .. }))
{
return Err(Error::InvalidRequest(
"only put_multiple_start is supported by the ObjectStoreDirectoryService for directories with children".into(),
));

View file

@ -2,7 +2,7 @@ use std::collections::HashSet;
use tracing::warn;
use super::Directory;
use crate::B3Digest;
use crate::{B3Digest, Node};
pub trait OrderValidator {
/// Update the order validator's state with the directory
@ -48,9 +48,11 @@ impl RootToLeavesValidator {
self.expected_digests.insert(directory.digest());
}
for (_, subdir_node) in directory.directories() {
// Allow the children to appear next
self.expected_digests.insert(subdir_node.digest().clone());
// Allow the children to appear next
for (_, node) in directory.nodes() {
if let Node::Directory { digest, .. } = node {
self.expected_digests.insert(digest.clone());
}
}
}
}
@ -79,14 +81,20 @@ impl OrderValidator for LeavesToRootValidator {
fn add_directory(&mut self, directory: &Directory) -> bool {
let digest = directory.digest();
for (_, subdir_node) in directory.directories() {
if !self.allowed_references.contains(subdir_node.digest()) {
warn!(
directory.digest = %digest,
subdirectory.digest = %subdir_node.digest(),
"unexpected directory reference"
);
return false;
for (_, node) in directory.nodes() {
if let Node::Directory {
digest: subdir_node_digest,
..
} = node
{
if !self.allowed_references.contains(subdir_node_digest) {
warn!(
directory.digest = %digest,
subdirectory.digest = %subdir_node_digest,
"unexpected directory reference"
);
return false;
}
}
}

View file

@ -9,7 +9,7 @@ use rstest_reuse::{self, *};
use super::DirectoryService;
use crate::directoryservice;
use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C, DIRECTORY_D};
use crate::{Directory, DirectoryNode, Node};
use crate::{Directory, Node};
mod utils;
use self::utils::make_grpc_directory_service_client;
@ -220,12 +220,13 @@ async fn upload_reject_wrong_size(directory_service: impl DirectoryService) {
let mut dir = Directory::new();
dir.add(
"foo".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_A.digest(),
DIRECTORY_A.size() + 42, // wrong!
)),
Node::Directory {
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size() + 42, // wrong!
},
)
.unwrap();
dir
};

View file

@ -15,26 +15,24 @@ where
let mut parent_node = root_node;
for component in path.as_ref().components() {
match parent_node {
Node::File(_) | Node::Symlink(_) => {
Node::File { .. } | Node::Symlink { .. } => {
// There's still some path left, but the parent node is no directory.
// This means the path doesn't exist, as we can't reach it.
return Ok(None);
}
Node::Directory(directory_node) => {
Node::Directory { digest, .. } => {
// fetch the linked node from the directory_service.
let directory = directory_service
.as_ref()
.get(directory_node.digest())
.await?
.ok_or_else(|| {
// If we didn't get the directory node that's linked, that's a store inconsistency, bail out!
warn!("directory {} does not exist", directory_node.digest());
let directory =
directory_service
.as_ref()
.get(&digest)
.await?
.ok_or_else(|| {
// If we didn't get the directory node that's linked, that's a store inconsistency, bail out!
warn!("directory {} does not exist", digest);
Error::StorageError(format!(
"directory {} does not exist",
directory_node.digest()
))
})?;
Error::StorageError(format!("directory {} does not exist", digest))
})?;
// look for the component in the [Directory].
if let Some((_child_name, child_node)) = directory
@ -59,8 +57,8 @@ where
mod tests {
use crate::{
directoryservice,
fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP},
DirectoryNode, Node, PathBuf,
fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST},
Node, PathBuf,
};
use super::descend_to;
@ -82,23 +80,23 @@ mod tests {
handle.close().await.expect("must upload");
// construct the node for DIRECTORY_COMPLICATED
let node_directory_complicated = Node::Directory(DirectoryNode::new(
DIRECTORY_COMPLICATED.digest(),
DIRECTORY_COMPLICATED.size(),
));
let node_directory_complicated = Node::Directory {
digest: DIRECTORY_COMPLICATED.digest(),
size: DIRECTORY_COMPLICATED.size(),
};
// construct the node for DIRECTORY_COMPLICATED
let node_directory_with_keep = Node::Directory(
DIRECTORY_COMPLICATED
.directories()
.next()
.unwrap()
.1
.clone(),
);
let node_directory_with_keep = Node::Directory {
digest: DIRECTORY_WITH_KEEP.digest(),
size: DIRECTORY_WITH_KEEP.size(),
};
// construct the node for the .keep file
let node_file_keep = Node::File(DIRECTORY_WITH_KEEP.files().next().unwrap().1.clone());
let node_file_keep = Node::File {
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false,
};
// traversal to an empty subpath should return the root node.
{

View file

@ -2,6 +2,7 @@ use super::Directory;
use super::DirectoryService;
use crate::B3Digest;
use crate::Error;
use crate::Node;
use async_stream::try_stream;
use futures::stream::BoxStream;
use std::collections::{HashSet, VecDeque};
@ -57,15 +58,15 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>(
// enqueue all child directory digests to the work queue, as
// long as they're not part of the worklist or already sent.
// This panics if the digest looks invalid, it's supposed to be checked first.
for (_, child_directory_node) in current_directory.directories() {
let child_digest = child_directory_node.digest();
if worklist_directory_digests.contains(child_digest)
|| sent_directory_digests.contains(child_digest)
{
continue;
for (_, child_directory_node) in current_directory.nodes() {
if let Node::Directory{digest: child_digest, ..} = child_directory_node {
if worklist_directory_digests.contains(child_digest)
|| sent_directory_digests.contains(child_digest)
{
continue;
}
worklist_directory_digests.push_back(child_digest.clone());
}
worklist_directory_digests.push_back(child_digest.clone());
}
yield current_directory;

View file

@ -1,6 +1,4 @@
use crate::{
B3Digest, {Directory, DirectoryNode, FileNode, Node, SymlinkNode},
};
use crate::{B3Digest, Directory, Node};
use lazy_static::lazy_static;
pub const HELLOWORLD_BLOB_CONTENTS: &[u8] = b"Hello World!";
@ -37,11 +35,11 @@ lazy_static! {
let mut dir = Directory::new();
dir.add(
".keep".into(),
Node::File(FileNode::new(
EMPTY_BLOB_DIGEST.clone(),
0,
false
))).unwrap();
Node::File{
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false
}).unwrap();
dir
};
@ -49,22 +47,22 @@ lazy_static! {
let mut dir = Directory::new();
dir.add(
"keep".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_WITH_KEEP.digest(),
DIRECTORY_WITH_KEEP.size()
))).unwrap();
Node::Directory{
digest: DIRECTORY_WITH_KEEP.digest(),
size: DIRECTORY_WITH_KEEP.size()
}).unwrap();
dir.add(
".keep".into(),
Node::File(FileNode::new(
EMPTY_BLOB_DIGEST.clone(),
0,
false
))).unwrap();
Node::File{
digest: EMPTY_BLOB_DIGEST.clone(),
size: 0,
executable: false
}).unwrap();
dir.add(
"aa".into(),
Node::Symlink(SymlinkNode::new(
b"/nix/store/somewhereelse".to_vec().into()
).unwrap())).unwrap();
Node::Symlink{
target: "/nix/store/somewhereelse".try_into().unwrap()
}).unwrap();
dir
};
@ -73,10 +71,10 @@ lazy_static! {
let mut dir = Directory::new();
dir.add(
"a".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_A.digest(),
DIRECTORY_A.size(),
))).unwrap();
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
}).unwrap();
dir
};
@ -84,16 +82,16 @@ lazy_static! {
let mut dir = Directory::new();
dir.add(
"a".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_A.digest(),
DIRECTORY_A.size(),
))).unwrap();
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
}).unwrap();
dir.add(
"a'".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_A.digest(),
DIRECTORY_A.size(),
))).unwrap();
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
}).unwrap();
dir
};
@ -101,16 +99,16 @@ lazy_static! {
let mut dir = Directory::new();
dir.add(
"a".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_A.digest(),
DIRECTORY_A.size(),
))).unwrap();
Node::Directory{
digest: DIRECTORY_A.digest(),
size: DIRECTORY_A.size(),
}).unwrap();
dir.add(
"b".into(),
Node::Directory(DirectoryNode::new(
DIRECTORY_B.digest(),
DIRECTORY_B.size(),
))).unwrap();
Node::Directory{
digest: DIRECTORY_B.digest(),
size: DIRECTORY_B.size(),
}).unwrap();
dir
};

View file

@ -16,7 +16,7 @@ use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST
use crate::{
blobservice::{BlobService, MemoryBlobService},
directoryservice::{DirectoryService, MemoryDirectoryService},
fixtures, {DirectoryNode, FileNode, Node, SymlinkNode},
fixtures, Node,
};
const BLOB_A_NAME: &str = "00000000000000000000000000000000-test";
@ -68,11 +68,11 @@ async fn populate_blob_a(
root_nodes.insert(
BLOB_A_NAME.into(),
Node::File(FileNode::new(
fixtures::BLOB_A_DIGEST.clone(),
fixtures::BLOB_A.len() as u64,
false,
)),
Node::File {
digest: fixtures::BLOB_A_DIGEST.clone(),
size: fixtures::BLOB_A.len() as u64,
executable: false,
},
);
}
@ -88,11 +88,11 @@ async fn populate_blob_b(
root_nodes.insert(
BLOB_B_NAME.into(),
Node::File(FileNode::new(
fixtures::BLOB_B_DIGEST.clone(),
fixtures::BLOB_B.len() as u64,
false,
)),
Node::File {
digest: fixtures::BLOB_B_DIGEST.clone(),
size: fixtures::BLOB_B.len() as u64,
executable: false,
},
);
}
@ -112,18 +112,20 @@ async fn populate_blob_helloworld(
root_nodes.insert(
HELLOWORLD_BLOB_NAME.into(),
Node::File(FileNode::new(
fixtures::HELLOWORLD_BLOB_DIGEST.clone(),
fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
true,
)),
Node::File {
digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone(),
size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: true,
},
);
}
async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
SYMLINK_NAME.into(),
Node::Symlink(SymlinkNode::new(BLOB_A_NAME.into()).unwrap()),
Node::Symlink {
target: BLOB_A_NAME.try_into().unwrap(),
},
);
}
@ -132,7 +134,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) {
async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
SYMLINK_NAME2.into(),
Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into()).unwrap()),
Node::Symlink {
target: "/nix/store/somewhereelse".try_into().unwrap(),
},
);
}
@ -156,10 +160,10 @@ async fn populate_directory_with_keep(
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
Node::Directory(DirectoryNode::new(
fixtures::DIRECTORY_WITH_KEEP.digest(),
fixtures::DIRECTORY_WITH_KEEP.size(),
)),
Node::Directory {
digest: fixtures::DIRECTORY_WITH_KEEP.digest(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
},
);
}
@ -168,10 +172,10 @@ async fn populate_directory_with_keep(
async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
DIRECTORY_WITH_KEEP_NAME.into(),
Node::Directory(DirectoryNode::new(
fixtures::DIRECTORY_WITH_KEEP.digest(),
fixtures::DIRECTORY_WITH_KEEP.size(),
)),
Node::Directory {
digest: fixtures::DIRECTORY_WITH_KEEP.digest(),
size: fixtures::DIRECTORY_WITH_KEEP.size(),
},
);
}
@ -179,11 +183,11 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte
async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) {
root_nodes.insert(
BLOB_A_NAME.into(),
Node::File(FileNode::new(
fixtures::BLOB_A_DIGEST.clone(),
fixtures::BLOB_A.len() as u64,
false,
)),
Node::File {
digest: fixtures::BLOB_A_DIGEST.clone(),
size: fixtures::BLOB_A.len() as u64,
executable: false,
},
);
}
@ -213,10 +217,10 @@ async fn populate_directory_complicated(
root_nodes.insert(
DIRECTORY_COMPLICATED_NAME.into(),
Node::Directory(DirectoryNode::new(
fixtures::DIRECTORY_COMPLICATED.digest(),
fixtures::DIRECTORY_COMPLICATED.size(),
)),
Node::Directory {
digest: fixtures::DIRECTORY_COMPLICATED.digest(),
size: fixtures::DIRECTORY_COMPLICATED.size(),
},
);
}

View file

@ -25,11 +25,15 @@ impl InodeData {
/// Constructs a new InodeData by consuming a [Node].
pub fn from_node(node: &Node) -> Self {
match node {
Node::Directory(n) => {
Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size()))
Node::Directory { digest, size } => {
Self::Directory(DirectoryInodeData::Sparse(digest.clone(), *size))
}
Node::File(n) => Self::Regular(n.digest().clone(), n.size(), n.executable()),
Node::Symlink(n) => Self::Symlink(n.target().clone()),
Node::File {
digest,
size,
executable,
} => Self::Regular(digest.clone(), *size, *executable),
Node::Symlink { target } => Self::Symlink(target.clone().into()),
}
}

View file

@ -6,7 +6,7 @@
use crate::directoryservice::{DirectoryPutter, DirectoryService};
use crate::path::{Path, PathBuf};
use crate::{B3Digest, Directory, DirectoryNode, FileNode, Node, SymlinkNode};
use crate::{B3Digest, Directory, Node};
use futures::{Stream, StreamExt};
use tracing::Level;
@ -84,22 +84,31 @@ where
IngestionError::UploadDirectoryError(entry.path().to_owned(), e)
})?;
Node::Directory(DirectoryNode::new(directory_digest, directory_size))
}
IngestionEntry::Symlink { ref target, .. } => {
Node::Symlink(SymlinkNode::new(target.to_owned().into()).map_err(|e| {
IngestionError::UploadDirectoryError(
entry.path().to_owned(),
crate::Error::StorageError(e.to_string()),
)
})?)
Node::Directory {
digest: directory_digest,
size: directory_size,
}
}
IngestionEntry::Symlink { ref target, .. } => Node::Symlink {
target: bytes::Bytes::copy_from_slice(target).try_into().map_err(
|e: crate::ValidateNodeError| {
IngestionError::UploadDirectoryError(
entry.path().to_owned(),
crate::Error::StorageError(e.to_string()),
)
},
)?,
},
IngestionEntry::Regular {
size,
executable,
digest,
..
} => Node::File(FileNode::new(digest.clone(), *size, *executable)),
} => Node::File {
digest: digest.clone(),
size: *size,
executable: *executable,
},
};
let parent = entry
@ -153,8 +162,8 @@ where
#[cfg(debug_assertions)]
{
if let Node::Directory(directory_node) = &root_node {
debug_assert_eq!(&root_directory_digest, directory_node.digest())
if let Node::Directory { digest, .. } = &root_node {
debug_assert_eq!(&root_directory_digest, digest);
} else {
unreachable!("Tvix bug: directory putter initialized but no root directory node");
}
@ -201,7 +210,7 @@ mod test {
use crate::fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST};
use crate::{directoryservice::MemoryDirectoryService, fixtures::DUMMY_DIGEST};
use crate::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
use crate::{Directory, Node};
use super::ingest_entries;
use super::IngestionEntry;
@ -213,18 +222,18 @@ mod test {
executable: true,
digest: DUMMY_DIGEST.clone(),
}],
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 42, true))
Node::File{digest: DUMMY_DIGEST.clone(), size: 42, executable: true}
)]
#[case::single_symlink(vec![IngestionEntry::Symlink {
path: "foo".parse().unwrap(),
target: b"blub".into(),
}],
Node::Symlink(SymlinkNode::new("blub".into()).unwrap())
Node::Symlink{target: "blub".try_into().unwrap()}
)]
#[case::single_dir(vec![IngestionEntry::Dir {
path: "foo".parse().unwrap(),
}],
Node::Directory(DirectoryNode::new(Directory::default().digest(), Directory::default().size()))
Node::Directory{digest: Directory::default().digest(), size: Directory::default().size()}
)]
#[case::dir_with_keep(vec![
IngestionEntry::Regular {
@ -237,7 +246,7 @@ mod test {
path: "foo".parse().unwrap(),
},
],
Node::Directory(DirectoryNode::new(DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size()))
Node::Directory{ digest: DIRECTORY_WITH_KEEP.digest(), size: DIRECTORY_WITH_KEEP.size()}
)]
/// This is intentionally a bit unsorted, though it still satisfies all
/// requirements we have on the order of elements in the stream.
@ -265,7 +274,7 @@ mod test {
path: "blub".parse().unwrap(),
},
],
Node::Directory(DirectoryNode::new(DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size()))
Node::Directory{ digest: DIRECTORY_COMPLICATED.digest(), size: DIRECTORY_COMPLICATED.size() }
)]
#[tokio::test]
async fn test_ingestion(#[case] entries: Vec<IngestionEntry>, #[case] exp_root_node: Node) {

View file

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode};
use crate::{errors::DirectoryError, proto, B3Digest, Node};
/// A Directory contains nodes, which can be Directory, File or Symlink nodes.
/// It attached names to these nodes, which is the basename in that directory.
@ -27,7 +27,14 @@ impl Directory {
pub fn size(&self) -> u64 {
// It's impossible to create a Directory where the size overflows, because we
// check before every add() that the size won't overflow.
(self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>()
(self.nodes.len() as u64)
+ self
.nodes()
.map(|(_name, n)| match n {
Node::Directory { size, .. } => 1 + size,
Node::File { .. } | Node::Symlink { .. } => 1,
})
.sum::<u64>()
}
/// Calculates the digest of a Directory, which is the blake3 hash of a
@ -43,40 +50,6 @@ impl Directory {
self.nodes.iter()
}
/// Allows iterating over the FileNode entries of this directory.
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ {
self.nodes.iter().filter_map(|(name, node)| match node {
Node::File(n) => Some((name, n)),
_ => None,
})
}
/// Allows iterating over the DirectoryNode entries (subdirectories) of this directory.
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn directories(
&self,
) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ {
self.nodes.iter().filter_map(|(name, node)| match node {
Node::Directory(n) => Some((name, n)),
_ => None,
})
}
/// Allows iterating over the SymlinkNode entries of this directory
/// For each, it returns a tuple of its name and node.
/// The elements are sorted by their names.
pub fn symlinks(
&self,
) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ {
self.nodes.iter().filter_map(|(name, node)| match node {
Node::Symlink(n) => Some((name, n)),
_ => None,
})
}
/// Checks a Node name for validity as a directory entry
/// We disallow slashes, null bytes, '.', '..' and the empty string.
pub(crate) fn is_valid_name(name: &[u8]) -> bool {
@ -106,7 +79,7 @@ impl Directory {
self.size(),
1,
match node {
Node::Directory(ref dir) => dir.size(),
Node::Directory { size, .. } => size,
_ => 0,
},
])
@ -130,7 +103,7 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
#[cfg(test)]
mod test {
use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
use super::{Directory, Node};
use crate::fixtures::DUMMY_DIGEST;
use crate::DirectoryError;
@ -140,49 +113,76 @@ mod test {
d.add(
"b".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
d.add(
"a".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
d.add(
"z".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
d.add(
"f".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true,
},
)
.unwrap();
d.add(
"c".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true,
},
)
.unwrap();
d.add(
"g".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)),
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true,
},
)
.unwrap();
d.add(
"t".into(),
Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
d.add(
"o".into(),
Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
d.add(
"e".into(),
Node::Symlink(SymlinkNode::new("a".into()).unwrap()),
Node::Symlink {
target: "a".try_into().unwrap(),
},
)
.unwrap();
@ -198,7 +198,10 @@ mod test {
assert_eq!(
d.add(
"foo".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX))
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: u64::MAX
}
),
Err(DirectoryError::SizeOverflow)
);
@ -210,7 +213,10 @@ mod test {
d.add(
"a".into(),
Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)),
Node::Directory {
digest: DUMMY_DIGEST.clone(),
size: 1,
},
)
.unwrap();
assert_eq!(
@ -218,7 +224,11 @@ mod test {
"{}",
d.add(
"a".into(),
Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true))
Node::File {
digest: DUMMY_DIGEST.clone(),
size: 1,
executable: true
}
)
.expect_err("adding duplicate dir entry must fail")
),
@ -233,7 +243,9 @@ mod test {
assert!(
dir.add(
"".into(), // wrong! can not be added to directory
Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap())
Node::Symlink {
target: "doesntmatter".try_into().unwrap(),
},
)
.is_err(),
"invalid symlink entry be rejected"

View file

@ -1,35 +0,0 @@
use crate::B3Digest;
/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
/// It also records a`size`.
/// Such a node is either an element in the [Directory] it itself is contained in,
/// or a standalone root node./
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DirectoryNode {
/// The blake3 hash of a Directory message, serialized in protobuf canonical form.
digest: B3Digest,
/// Number of child elements in the Directory referred to by `digest`.
/// Calculated by summing up the numbers of nodes, and for each directory.
/// its size field. Can be used for inode allocation.
/// This field is precisely as verifiable as any other Merkle tree edge.
/// Resolve `digest`, and you can compute it incrementally. Resolve the entire
/// tree, and you can fully compute it from scratch.
/// A credulous implementation won't reject an excessive size, but this is
/// harmless: you'll have some ordinals without nodes. Undersizing is obvious
/// and easy to reject: you won't have an ordinal for some nodes.
size: u64,
}
impl DirectoryNode {
pub fn new(digest: B3Digest, size: u64) -> Self {
Self { digest, size }
}
pub fn digest(&self) -> &B3Digest {
&self.digest
}
pub fn size(&self) -> u64 {
self.size
}
}

View file

@ -1,36 +0,0 @@
use crate::B3Digest;
/// A FileNode represents a regular or executable file in a Directory or at the root.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FileNode {
/// The blake3 digest of the file contents
digest: B3Digest,
/// The file content size
size: u64,
/// Whether the file is executable
executable: bool,
}
impl FileNode {
pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self {
Self {
digest,
size,
executable,
}
}
pub fn digest(&self) -> &B3Digest {
&self.digest
}
pub fn size(&self) -> u64 {
self.size
}
pub fn executable(&self) -> bool {
self.executable
}
}

View file

@ -1,20 +1,48 @@
//! This holds types describing nodes in the tvix-castore model.
mod directory;
mod directory_node;
mod file_node;
mod symlink_node;
mod symlink_target;
use crate::B3Digest;
pub use directory::Directory;
pub use directory_node::DirectoryNode;
pub use file_node::FileNode;
pub use symlink_node::SymlinkNode;
use symlink_target::SymlinkTarget;
/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
/// Nodes themselves don't have names, what gives them names is either them
/// being inside a [Directory], or a root node with its own name attached to it.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Node {
Directory(DirectoryNode),
File(FileNode),
Symlink(SymlinkNode),
/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
/// It also records a`size`.
/// Such a node is either an element in the [Directory] it itself is contained in,
/// or a standalone root node.
Directory {
/// The blake3 hash of a Directory message, serialized in protobuf canonical form.
digest: B3Digest,
/// Number of child elements in the Directory referred to by `digest`.
/// Calculated by summing up the numbers of nodes, and for each directory,
/// its size field. Can be used for inode allocation.
/// This field is precisely as verifiable as any other Merkle tree edge.
/// Resolve `digest`, and you can compute it incrementally. Resolve the entire
/// tree, and you can fully compute it from scratch.
/// A credulous implementation won't reject an excessive size, but this is
/// harmless: you'll have some ordinals without nodes. Undersizing is obvious
/// and easy to reject: you won't have an ordinal for some nodes.
size: u64,
},
/// A FileNode represents a regular or executable file in a Directory or at the root.
File {
/// The blake3 digest of the file contents
digest: B3Digest,
/// The file content size
size: u64,
/// Whether the file is executable
executable: bool,
},
/// A SymlinkNode represents a symbolic link in a Directory or at the root.
Symlink {
/// The target of the symlink.
target: SymlinkTarget,
},
}

View file

@ -1,21 +0,0 @@
use crate::ValidateNodeError;
/// A SymlinkNode represents a symbolic link in a Directory or at the root.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SymlinkNode {
/// The target of the symlink.
target: bytes::Bytes,
}
impl SymlinkNode {
pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> {
if target.is_empty() || target.contains(&b'\0') {
return Err(ValidateNodeError::InvalidSymlinkTarget(target));
}
Ok(Self { target })
}
pub fn target(&self) -> &bytes::Bytes {
&self.target
}
}

View file

@ -0,0 +1,82 @@
// TODO: split out this error
use crate::ValidateNodeError;
use bstr::ByteSlice;
use std::fmt::{self, Debug, Display};
/// A wrapper type for symlink targets.
/// Internally uses a [bytes::Bytes], but disallows empty targets and those
/// containing null bytes.
#[repr(transparent)]
#[derive(Clone, PartialEq, Eq)]
pub struct SymlinkTarget {
inner: bytes::Bytes,
}
impl AsRef<[u8]> for SymlinkTarget {
fn as_ref(&self) -> &[u8] {
self.inner.as_ref()
}
}
impl From<SymlinkTarget> for bytes::Bytes {
fn from(value: SymlinkTarget) -> Self {
value.inner
}
}
impl TryFrom<bytes::Bytes> for SymlinkTarget {
type Error = ValidateNodeError;
fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
if value.is_empty() || value.contains(&b'\0') {
return Err(ValidateNodeError::InvalidSymlinkTarget(value));
}
Ok(Self { inner: value })
}
}
impl TryFrom<&'static [u8]> for SymlinkTarget {
type Error = ValidateNodeError;
fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> {
if value.is_empty() || value.contains(&b'\0') {
return Err(ValidateNodeError::InvalidSymlinkTarget(
bytes::Bytes::from_static(value),
));
}
Ok(Self {
inner: bytes::Bytes::from_static(value),
})
}
}
impl TryFrom<&str> for SymlinkTarget {
type Error = ValidateNodeError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
if value.is_empty() {
return Err(ValidateNodeError::InvalidSymlinkTarget(
bytes::Bytes::copy_from_slice(value.as_bytes()),
));
}
Ok(Self {
inner: bytes::Bytes::copy_from_slice(value.as_bytes()),
})
}
}
impl Debug for SymlinkTarget {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Debug::fmt(self.inner.as_bstr(), f)
}
}
impl Display for SymlinkTarget {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Display::fmt(self.inner.as_bstr(), f)
}
}

View file

@ -157,21 +157,25 @@ impl From<&crate::Directory> for Directory {
for (name, node) in directory.nodes() {
match node {
crate::Node::File(n) => files.push(FileNode {
crate::Node::File {
digest,
size,
executable,
} => files.push(FileNode {
name: name.clone(),
digest: n.digest().to_owned().into(),
size: n.size(),
executable: n.executable(),
digest: digest.to_owned().into(),
size: *size,
executable: *executable,
}),
crate::Node::Directory(n) => directories.push(DirectoryNode {
crate::Node::Directory { digest, size } => directories.push(DirectoryNode {
name: name.clone(),
digest: n.digest().to_owned().into(),
size: n.size(),
digest: digest.to_owned().into(),
size: *size,
}),
crate::Node::Symlink(n) => {
crate::Node::Symlink { target } => {
symlinks.push(SymlinkNode {
name: name.clone(),
target: n.target().to_owned(),
target: target.to_owned().into(),
});
}
}
@ -192,7 +196,10 @@ impl Node {
let digest = B3Digest::try_from(n.digest)
.map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?;
let node = crate::Node::Directory(crate::DirectoryNode::new(digest, n.size));
let node = crate::Node::Directory {
digest,
size: n.size,
};
Ok((n.name, node))
}
@ -200,16 +207,22 @@ impl Node {
let digest = B3Digest::try_from(n.digest)
.map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?;
let node = crate::Node::File(crate::FileNode::new(digest, n.size, n.executable));
let node = crate::Node::File {
digest,
size: n.size,
executable: n.executable,
};
Ok((n.name, node))
}
node::Node::Symlink(n) => {
let node = crate::Node::Symlink(
crate::SymlinkNode::new(n.target)
let node = crate::Node::Symlink {
target: n
.target
.try_into()
.map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?,
);
};
Ok((n.name, node))
}
@ -218,27 +231,30 @@ impl Node {
/// Construsts a [Node] from a name and [crate::Node].
pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self {
// TODO: make these pub(crate) so we can avoid cloning?
match n {
crate::Node::Directory(directory_node) => Self {
crate::Node::Directory { digest, size } => Self {
node: Some(node::Node::Directory(DirectoryNode {
name,
digest: directory_node.digest().to_owned().into(),
size: directory_node.size(),
digest: digest.into(),
size,
})),
},
crate::Node::File(file_node) => Self {
crate::Node::File {
digest,
size,
executable,
} => Self {
node: Some(node::Node::File(FileNode {
name,
digest: file_node.digest().to_owned().into(),
size: file_node.size(),
executable: file_node.executable(),
digest: digest.into(),
size,
executable,
})),
},
crate::Node::Symlink(symlink_node) => Self {
crate::Node::Symlink { target } => Self {
node: Some(node::Node::Symlink(SymlinkNode {
name,
target: symlink_node.target().to_owned(),
target: target.into(),
})),
},
}

View file

@ -2,7 +2,7 @@ use crate::blobservice::{self, BlobService};
use crate::directoryservice;
use crate::fixtures::*;
use crate::import::fs::ingest_path;
use crate::{DirectoryNode, Node, SymlinkNode};
use crate::Node;
use tempfile::TempDir;
@ -30,7 +30,9 @@ async fn symlink() {
.expect("must succeed");
assert_eq!(
Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into(),).unwrap()),
Node::Symlink {
target: "/nix/store/somewhereelse".try_into().unwrap()
},
root_node,
)
}
@ -53,11 +55,11 @@ async fn single_file() {
.expect("must succeed");
assert_eq!(
Node::File(crate::FileNode::new(
HELLOWORLD_BLOB_DIGEST.clone(),
HELLOWORLD_BLOB_CONTENTS.len() as u64,
false,
)),
Node::File {
digest: HELLOWORLD_BLOB_DIGEST.clone(),
size: HELLOWORLD_BLOB_CONTENTS.len() as u64,
executable: false,
},
root_node,
);
@ -88,10 +90,10 @@ async fn complicated() {
// ensure root_node matched expectations
assert_eq!(
Node::Directory(DirectoryNode::new(
DIRECTORY_COMPLICATED.digest().clone(),
DIRECTORY_COMPLICATED.size(),
)),
Node::Directory {
digest: DIRECTORY_COMPLICATED.digest().clone(),
size: DIRECTORY_COMPLICATED.size(),
},
root_node,
);