feat(tvix/store/directorysvc): add traverse_to

This walks from a node further down until it reaches the requested path.

Change-Id: I2f9a15a8601db4d06c95d7b47cd6153264e203e3
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8568
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
This commit is contained in:
Florian Klink 2023-05-14 14:49:53 +03:00 committed by flokli
parent 3a4e29c261
commit a1324513ad
2 changed files with 238 additions and 0 deletions

View file

@ -2,11 +2,13 @@ use crate::{proto, Error};
mod grpc;
mod memory;
mod sled;
mod traverse;
mod utils;
pub use self::grpc::GRPCDirectoryService;
pub use self::memory::MemoryDirectoryService;
pub use self::sled::SledDirectoryService;
pub use self::traverse::traverse_to;
pub use self::utils::DirectoryTraverser;
/// The base trait all Directory services need to implement.

View file

@ -0,0 +1,236 @@
use super::DirectoryService;
use crate::Error;
use std::os::unix::prelude::OsStrExt;
use tracing::{instrument, warn};
/// This traverses from a (root) node to the given (sub)path, returning the Node
/// at that path, or none, if there's nothing at that path.
/// TODO: Do we want to rewrite this in a non-recursing fashion, and use
/// [DirectoryService.get_recursive] to do less lookups?
/// Or do we consider this to be a non-issue due to store composition and local caching?
/// TODO: the name of this function (and mod) is a bit bad, because it doesn't
/// clearly distinguish it from the BFS traversers.
#[instrument(skip(directory_service))]
pub fn traverse_to<DS: DirectoryService>(
directory_service: &mut DS,
node: crate::proto::node::Node,
path: &std::path::Path,
) -> Result<Option<crate::proto::node::Node>, Error> {
// strip a possible `/` prefix from the path.
let path = {
if path.starts_with("/") {
path.strip_prefix("/").unwrap()
} else {
path
}
};
let mut it = path.components();
match it.next() {
None => {
// the (remaining) path is empty, return the node we've been called with.
Ok(Some(node))
}
Some(first_component) => {
// convert first_component to bytes, which are later used for comparison.
let first_component_bytes: &[u8] = first_component.as_os_str().as_bytes();
match node {
crate::proto::node::Node::File(_) | crate::proto::node::Node::Symlink(_) => {
// There's still some path left, but the current node is no directory.
// This means the path doesn't exist, as we can't reach it.
Ok(None)
}
crate::proto::node::Node::Directory(directory_node) => {
// fetch the linked node from the directory_service
let digest: [u8; 32] = directory_node
.digest
.try_into()
.map_err(|_e| Error::StorageError("invalid digest length".to_string()))?;
match directory_service.get(&digest)? {
// If we didn't get the directory node that's linked, that's a store inconsistency, bail out!
None => {
let digest_b64 = data_encoding::BASE64.encode(&digest);
warn!("directory {} does not exist", digest_b64);
return Err(Error::StorageError(format!(
"directory {} does not exist",
digest_b64
)));
}
Some(directory) => {
// look for first_component in the [Directory].
// FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we
// could stop as soon as e.name is larger than the search string.
let child_node = directory.nodes().find(|n| match n {
crate::proto::node::Node::Directory(e) => {
&e.name.to_string().into_bytes() == first_component_bytes
}
crate::proto::node::Node::File(e) => {
&e.name.to_string().into_bytes() == first_component_bytes
}
crate::proto::node::Node::Symlink(e) => {
&e.name.to_string().into_bytes() == first_component_bytes
}
});
match child_node {
// child node not found means there's no such element inside the directory.
None => Ok(None),
// child node found, recurse with it and the rest of the path.
Some(child_node) => {
let rest_path: std::path::PathBuf = it.collect();
traverse_to(directory_service, child_node, &rest_path)
}
}
}
}
}
}
}
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use crate::{
directoryservice::DirectoryPutter,
directoryservice::DirectoryService,
tests::{
fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP},
utils::gen_directory_service,
},
};
use super::traverse_to;
#[test]
fn test_traverse_to() {
let mut directory_service = gen_directory_service();
let mut handle = directory_service.put_multiple_start();
handle
.put(DIRECTORY_WITH_KEEP.clone())
.expect("must succeed");
handle
.put(DIRECTORY_COMPLICATED.clone())
.expect("must succeed");
// construct the node for DIRECTORY_COMPLICATED
let node_directory_complicated =
crate::proto::node::Node::Directory(crate::proto::DirectoryNode {
name: "doesntmatter".to_string(),
digest: DIRECTORY_COMPLICATED.digest().to_vec(),
size: DIRECTORY_COMPLICATED.size(),
});
// construct the node for DIRECTORY_COMPLICATED
let node_directory_with_keep = crate::proto::node::Node::Directory(
DIRECTORY_COMPLICATED.directories.first().unwrap().clone(),
);
// construct the node for the .keep file
let node_file_keep =
crate::proto::node::Node::File(DIRECTORY_WITH_KEEP.files.first().unwrap().clone());
// traversal to an empty subpath should return the root node.
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from(""),
)
.expect("must succeed");
assert_eq!(Some(node_directory_complicated.clone()), resp);
}
// traversal to `keep` should return the node for DIRECTORY_WITH_KEEP
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("keep"),
)
.expect("must succeed");
assert_eq!(Some(node_directory_with_keep.clone()), resp);
}
// traversal to `keep/.keep` should return the node for the .keep file
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("keep/.keep"),
)
.expect("must succeed");
assert_eq!(Some(node_file_keep.clone()), resp);
}
// traversal to `keep/.keep` should return the node for the .keep file
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("/keep/.keep"),
)
.expect("must succeed");
assert_eq!(Some(node_file_keep.clone()), resp);
}
// traversal to `void` should return None (doesn't exist)
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("void"),
)
.expect("must succeed");
assert_eq!(None, resp);
}
// traversal to `void` should return None (doesn't exist)
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("//v/oid"),
)
.expect("must succeed");
assert_eq!(None, resp);
}
// traversal to `keep/.keep/404` should return None (the path can't be
// reached, as keep/.keep already is a file)
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("keep/.keep/foo"),
)
.expect("must succeed");
assert_eq!(None, resp);
}
// traversal to a subpath of '/' should return the root node.
{
let resp = traverse_to(
&mut directory_service,
node_directory_complicated.clone(),
&PathBuf::from("/"),
)
.expect("must succeed");
assert_eq!(Some(node_directory_complicated.clone()), resp);
}
}
}