refactor(tvix/castore/fs): remove From<Node> for InodeData

These were copying unnecessarily. Instead, have a
InodeData::from_node(), which *consumes* the Node entirely, returns
`InodeData` and the split-off name (which is not part of InodeData).

Callers can then use the result in various helper functions, like:

 - InodeData::as_fuse_type
 - InodeData::as_fuse_file_attr
 - InodeData::as_fuse_entry

… to prepare their replies to the kernel.

This removes not only a bunch of clones, but also a lot of copy-pasted
code.

Change-Id: Idbca5f25cc29e96c1f4c614b33dff2becb0a8738
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11435
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
This commit is contained in:
Florian Klink 2024-04-15 15:14:03 +03:00 committed by clbot
parent fb852b0245
commit b025a30d27
2 changed files with 76 additions and 107 deletions

View file

@ -2,6 +2,8 @@
//! about inodes, which present tvix-castore nodes in a filesystem. //! about inodes, which present tvix-castore nodes in a filesystem.
use std::time::Duration; use std::time::Duration;
use bytes::Bytes;
use crate::proto as castorepb; use crate::proto as castorepb;
use crate::B3Digest; use crate::B3Digest;
@ -12,7 +14,36 @@ pub enum InodeData {
Directory(DirectoryInodeData), // either [DirectoryInodeData:Sparse] or [DirectoryInodeData:Populated] Directory(DirectoryInodeData), // either [DirectoryInodeData:Sparse] or [DirectoryInodeData:Populated]
} }
/// This encodes the two different states of [InodeData::Directory].
/// Either the data still is sparse (we only saw a [castorepb::DirectoryNode],
/// but didn't fetch the [castorepb::Directory] struct yet, or we processed a
/// lookup and did fetch the data.
#[derive(Clone, Debug)]
pub enum DirectoryInodeData {
Sparse(B3Digest, u64), // digest, size
Populated(B3Digest, Vec<(u64, castorepb::node::Node)>), // [(child_inode, node)]
}
impl InodeData { impl InodeData {
/// Constructs a new InodeData by consuming a [Node].
/// It splits off the orginal name, so it can be used later.
pub fn from_node(node: castorepb::node::Node) -> (Self, Bytes) {
match node {
castorepb::node::Node::Directory(n) => (
Self::Directory(DirectoryInodeData::Sparse(
n.digest.try_into().unwrap(),
n.size,
)),
n.name,
),
castorepb::node::Node::File(n) => (
Self::Regular(n.digest.try_into().unwrap(), n.size, n.executable),
n.name,
),
castorepb::node::Node::Symlink(n) => (Self::Symlink(n.target), n.name),
}
}
pub fn as_fuse_file_attr(&self, inode: u64) -> fuse_backend_rs::abi::fuse_abi::Attr { pub fn as_fuse_file_attr(&self, inode: u64) -> fuse_backend_rs::abi::fuse_abi::Attr {
fuse_backend_rs::abi::fuse_abi::Attr { fuse_backend_rs::abi::fuse_abi::Attr {
ino: inode, ino: inode,
@ -45,50 +76,19 @@ impl InodeData {
..Default::default() ..Default::default()
} }
} }
}
/// This encodes the two different states of [InodeData::Directory]. /// Returns the u32 fuse type
/// Either the data still is sparse (we only saw a [castorepb::DirectoryNode], pub fn as_fuse_type(&self) -> u32 {
/// but didn't fetch the [castorepb::Directory] struct yet, or we processed a #[allow(clippy::let_and_return)]
/// lookup and did fetch the data. let ty = match self {
#[derive(Clone, Debug)] InodeData::Regular(_, _, _) => libc::S_IFREG,
pub enum DirectoryInodeData { InodeData::Symlink(_) => libc::S_IFLNK,
Sparse(B3Digest, u64), // digest, size InodeData::Directory(_) => libc::S_IFDIR,
Populated(B3Digest, Vec<(u64, castorepb::node::Node)>), // [(child_inode, node)] };
} // libc::S_IFDIR is u32 on Linux and u16 on MacOS
#[cfg(target_os = "macos")]
let ty = ty as u32;
impl From<&castorepb::node::Node> for InodeData { ty
fn from(value: &castorepb::node::Node) -> Self {
match value {
castorepb::node::Node::Directory(directory_node) => directory_node.into(),
castorepb::node::Node::File(file_node) => file_node.into(),
castorepb::node::Node::Symlink(symlink_node) => symlink_node.into(),
}
}
}
impl From<&castorepb::SymlinkNode> for InodeData {
fn from(value: &castorepb::SymlinkNode) -> Self {
InodeData::Symlink(value.target.clone())
}
}
impl From<&castorepb::FileNode> for InodeData {
fn from(value: &castorepb::FileNode) -> Self {
InodeData::Regular(
value.digest.clone().try_into().unwrap(),
value.size,
value.executable,
)
}
}
/// Converts a DirectoryNode to a sparsely populated InodeData::Directory.
impl From<&castorepb::DirectoryNode> for InodeData {
fn from(value: &castorepb::DirectoryNode) -> Self {
InodeData::Directory(DirectoryInodeData::Sparse(
value.digest.clone().try_into().unwrap(),
value.size,
))
} }
} }

View file

@ -26,6 +26,7 @@ use crate::{
B3Digest, B3Digest,
}; };
use bstr::ByteVec; use bstr::ByteVec;
use bytes::Bytes;
use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions}; use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions};
use fuse_backend_rs::api::filesystem::{ use fuse_backend_rs::api::filesystem::{
Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID, Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID,
@ -91,7 +92,7 @@ pub struct TvixStoreFs<BS, DS, RN> {
show_xattr: bool, show_xattr: bool,
/// This maps a given basename in the root to the inode we allocated for the node. /// This maps a given basename in the root to the inode we allocated for the node.
root_nodes: RwLock<HashMap<Vec<u8>, u64>>, root_nodes: RwLock<HashMap<Bytes, u64>>,
/// This keeps track of inodes and data alongside them. /// This keeps track of inodes and data alongside them.
inode_tracker: RwLock<InodeTracker>, inode_tracker: RwLock<InodeTracker>,
@ -196,13 +197,16 @@ where
// Turn the retrieved directory into a InodeData::Directory(DirectoryInodeData::Populated(..)), // Turn the retrieved directory into a InodeData::Directory(DirectoryInodeData::Populated(..)),
// allocating inodes for the children on the way. // allocating inodes for the children on the way.
// FUTUREWORK: there's a bunch of cloning going on here, which we can probably avoid.
let children = { let children = {
let mut inode_tracker = self.inode_tracker.write(); let mut inode_tracker = self.inode_tracker.write();
let children: Vec<(u64, castorepb::node::Node)> = directory let children: Vec<(u64, castorepb::node::Node)> = directory
.nodes() .nodes()
.map(|child_node| { .map(|child_node| {
let child_ino = inode_tracker.put((&child_node).into()); let (inode_data, _) = InodeData::from_node(child_node.clone());
let child_ino = inode_tracker.put(inode_data);
(child_ino, child_node) (child_ino, child_node)
}) })
.collect(); .collect();
@ -263,7 +267,7 @@ where
// the root node doesn't exist, so the file doesn't exist. // the root node doesn't exist, so the file doesn't exist.
Ok(None) => Err(io::Error::from_raw_os_error(libc::ENOENT)), Ok(None) => Err(io::Error::from_raw_os_error(libc::ENOENT)),
// The root node does exist // The root node does exist
Ok(Some(ref root_node)) => { Ok(Some(root_node)) => {
// The name must match what's passed in the lookup, otherwise this is also a ENOENT. // The name must match what's passed in the lookup, otherwise this is also a ENOENT.
if root_node.get_name() != name.to_bytes() { if root_node.get_name() != name.to_bytes() {
debug!(root_node.name=?root_node.get_name(), found_node.name=%name.to_string_lossy(), "node name mismatch"); debug!(root_node.name=?root_node.get_name(), found_node.name=%name.to_string_lossy(), "node name mismatch");
@ -286,9 +290,9 @@ where
// insert the (sparse) inode data and register in // insert the (sparse) inode data and register in
// self.root_nodes. // self.root_nodes.
let inode_data: InodeData = root_node.into(); let (inode_data, name) = InodeData::from_node(root_node);
let ino = inode_tracker.put(inode_data.clone()); let ino = inode_tracker.put(inode_data.clone());
root_nodes.insert(name.to_bytes().into(), ino); root_nodes.insert(name, ino);
Ok((ino, Arc::new(inode_data))) Ok((ino, Arc::new(inode_data)))
} }
@ -463,30 +467,22 @@ where
io::Error::from_raw_os_error(libc::EIO) io::Error::from_raw_os_error(libc::EIO)
})?; })?;
let name = root_node.get_name(); let (inode_data, name) = InodeData::from_node(root_node);
let ty = match root_node {
Node::Directory(_) => libc::S_IFDIR,
Node::File(_) => libc::S_IFREG,
Node::Symlink(_) => libc::S_IFLNK,
};
// obtain the inode, or allocate a new one. // obtain the inode, or allocate a new one.
let ino = self.get_inode_for_root_name(name).unwrap_or_else(|| { let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| {
// insert the (sparse) inode data and register in // insert the (sparse) inode data and register in
// self.root_nodes. // self.root_nodes.
let ino = self.inode_tracker.write().put((&root_node).into()); let ino = self.inode_tracker.write().put(inode_data.clone());
self.root_nodes.write().insert(name.into(), ino); self.root_nodes.write().insert(name.clone(), ino);
ino ino
}); });
#[cfg(target_os = "macos")]
let ty = ty as u32;
let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry { let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry {
ino, ino,
offset: offset + i as u64 + 1, offset: offset + i as u64 + 1,
type_: ty, type_: inode_data.as_fuse_type(),
name, name: &name,
})?; })?;
// If the buffer is full, add_entry will return `Ok(0)`. // If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 { if written == 0 {
@ -500,23 +496,15 @@ where
let (parent_digest, children) = self.get_directory_children(inode)?; let (parent_digest, children) = self.get_directory_children(inode)?;
Span::current().record("directory.digest", parent_digest.to_string()); Span::current().record("directory.digest", parent_digest.to_string());
for (i, (ino, child_node)) in children.iter().skip(offset as usize).enumerate() { for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() {
let (inode_data, name) = InodeData::from_node(child_node);
// the second parameter will become the "offset" parameter on the next call. // the second parameter will become the "offset" parameter on the next call.
let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry { let written = add_entry(fuse_backend_rs::api::filesystem::DirEntry {
ino: *ino, ino,
offset: offset + i as u64 + 1, offset: offset + i as u64 + 1,
type_: match child_node { type_: inode_data.as_fuse_type(),
#[allow(clippy::unnecessary_cast)] name: &name,
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
Node::Directory(_) => libc::S_IFDIR as u32,
#[allow(clippy::unnecessary_cast)]
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
Node::File(_) => libc::S_IFREG as u32,
#[allow(clippy::unnecessary_cast)]
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
Node::Symlink(_) => libc::S_IFLNK as u32,
},
name: child_node.get_name(),
})?; })?;
// If the buffer is full, add_entry will return `Ok(0)`. // If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 { if written == 0 {
@ -566,33 +554,23 @@ where
io::Error::from_raw_os_error(libc::EPERM) io::Error::from_raw_os_error(libc::EPERM)
})?; })?;
let name = root_node.get_name(); let (inode_data, name) = InodeData::from_node(root_node);
let ty = match root_node {
Node::Directory(_) => libc::S_IFDIR,
Node::File(_) => libc::S_IFREG,
Node::Symlink(_) => libc::S_IFLNK,
};
let inode_data: InodeData = (&root_node).into();
// obtain the inode, or allocate a new one. // obtain the inode, or allocate a new one.
let ino = self.get_inode_for_root_name(name).unwrap_or_else(|| { let ino = self.get_inode_for_root_name(&name).unwrap_or_else(|| {
// insert the (sparse) inode data and register in // insert the (sparse) inode data and register in
// self.root_nodes. // self.root_nodes.
let ino = self.inode_tracker.write().put(inode_data.clone()); let ino = self.inode_tracker.write().put(inode_data.clone());
self.root_nodes.write().insert(name.into(), ino); self.root_nodes.write().insert(name.clone(), ino);
ino ino
}); });
#[cfg(target_os = "macos")]
let ty = ty as u32;
let written = add_entry( let written = add_entry(
fuse_backend_rs::api::filesystem::DirEntry { fuse_backend_rs::api::filesystem::DirEntry {
ino, ino,
offset: offset + i as u64 + 1, offset: offset + i as u64 + 1,
type_: ty, type_: inode_data.as_fuse_type(),
name, name: &name,
}, },
inode_data.as_fuse_entry(ino), inode_data.as_fuse_entry(ino),
)?; )?;
@ -608,27 +586,18 @@ where
let (parent_digest, children) = self.get_directory_children(inode)?; let (parent_digest, children) = self.get_directory_children(inode)?;
Span::current().record("directory.digest", parent_digest.to_string()); Span::current().record("directory.digest", parent_digest.to_string());
for (i, (ino, child_node)) in children.iter().skip(offset as usize).enumerate() { for (i, (ino, child_node)) in children.into_iter().skip(offset as usize).enumerate() {
let inode_data: InodeData = child_node.into(); let (inode_data, name) = InodeData::from_node(child_node);
// the second parameter will become the "offset" parameter on the next call. // the second parameter will become the "offset" parameter on the next call.
let written = add_entry( let written = add_entry(
fuse_backend_rs::api::filesystem::DirEntry { fuse_backend_rs::api::filesystem::DirEntry {
ino: *ino, ino,
offset: offset + i as u64 + 1, offset: offset + i as u64 + 1,
type_: match child_node { type_: inode_data.as_fuse_type(),
#[allow(clippy::unnecessary_cast)] name: &name,
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
Node::Directory(_) => libc::S_IFDIR as u32,
#[allow(clippy::unnecessary_cast)]
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
Node::File(_) => libc::S_IFREG as u32,
#[allow(clippy::unnecessary_cast)]
// libc::S_IFDIR is u32 on Linux and u16 on MacOS
Node::Symlink(_) => libc::S_IFLNK as u32,
}, },
name: child_node.get_name(), inode_data.as_fuse_entry(ino),
},
inode_data.as_fuse_entry(*ino),
)?; )?;
// If the buffer is full, add_entry will return `Ok(0)`. // If the buffer is full, add_entry will return `Ok(0)`.
if written == 0 { if written == 0 {