feat(tvix/store/fs): Add support for virtiofs backend

This adds a virtiofs daemon implementation which hooks into the existing
tvix-store filesystem implementation that is used for FUSE.

This allows adding the filesystem to a microvm without having to set up
FUSE inside the guest.

Change-Id: If80c36c9657f2289853e8d9a364bf4f1f7b7559c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9344
Autosubmit: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
This commit is contained in:
Connor Brewster 2023-09-16 13:58:52 -05:00
parent e5f2281856
commit 993c505cdb
6 changed files with 587 additions and 5 deletions

60
tvix/Cargo.lock generated
View file

@ -775,6 +775,8 @@ dependencies = [
"log",
"mio",
"nix 0.24.3",
"vhost",
"virtio-queue",
"vm-memory",
"vmm-sys-util",
]
@ -2865,6 +2867,12 @@ dependencies = [
"tracing-subscriber",
"tvix-castore",
"url",
"vhost",
"vhost-user-backend",
"virtio-bindings 0.2.1",
"virtio-queue",
"vm-memory",
"vmm-sys-util",
"walkdir",
]
@ -2948,12 +2956,64 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "vhost"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6769e8dbf5276b4376439fbf36bb880d203bf614bf7ef444198edc24b5a9f35"
dependencies = [
"bitflags",
"libc",
"vm-memory",
"vmm-sys-util",
]
[[package]]
name = "vhost-user-backend"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f237b91db4ac339d639fb43398b52d785fa51e3c7760ac9425148863c1f4303"
dependencies = [
"libc",
"log",
"vhost",
"virtio-bindings 0.1.0",
"virtio-queue",
"vm-memory",
"vmm-sys-util",
]
[[package]]
name = "virtio-bindings"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ff512178285488516ed85f15b5d0113a7cdb89e9e8a760b269ae4f02b84bd6b"
[[package]]
name = "virtio-bindings"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c18d7b74098a946470ea265b5bacbbf877abc3373021388454de0d47735a5b98"
[[package]]
name = "virtio-queue"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ba81e2bcc21c0d2fc5e6683e79367e26ad219197423a498df801d79d5ba77bd"
dependencies = [
"log",
"virtio-bindings 0.1.0",
"vm-memory",
"vmm-sys-util",
]
[[package]]
name = "vm-memory"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "688a70366615b45575a424d9c665561c1b5ab2224d494f706b6a6812911a827c"
dependencies = [
"arc-swap",
"libc",
"winapi",
]

View file

@ -2221,6 +2221,17 @@ rec {
name = "nix";
packageId = "nix 0.24.3";
}
{
name = "vhost";
packageId = "vhost";
optional = true;
features = [ "vhost-user-slave" ];
}
{
name = "virtio-queue";
packageId = "virtio-queue";
optional = true;
}
{
name = "vm-memory";
packageId = "vm-memory";
@ -2259,7 +2270,7 @@ rec {
"virtiofs" = [ "virtio-queue" "caps" "vmm-sys-util" ];
"vmm-sys-util" = [ "dep:vmm-sys-util" ];
};
resolvedDefaultFeatures = [ "caps" "core-foundation-sys" "default" "fusedev" "vmm-sys-util" ];
resolvedDefaultFeatures = [ "caps" "core-foundation-sys" "default" "fusedev" "vhost" "vhost-user-fs" "virtio-queue" "virtiofs" "vmm-sys-util" ];
};
"futures" = rec {
crateName = "futures";
@ -8568,6 +8579,7 @@ rec {
name = "fuse-backend-rs";
packageId = "fuse-backend-rs";
optional = true;
features = [ "vhost-user-fs" ];
}
{
name = "futures";
@ -8656,6 +8668,36 @@ rec {
name = "url";
packageId = "url";
}
{
name = "vhost";
packageId = "vhost";
optional = true;
}
{
name = "vhost-user-backend";
packageId = "vhost-user-backend";
optional = true;
}
{
name = "virtio-bindings";
packageId = "virtio-bindings 0.2.1";
optional = true;
}
{
name = "virtio-queue";
packageId = "virtio-queue";
optional = true;
}
{
name = "vm-memory";
packageId = "vm-memory";
optional = true;
}
{
name = "vmm-sys-util";
packageId = "vmm-sys-util";
optional = true;
}
{
name = "walkdir";
packageId = "walkdir";
@ -8686,13 +8728,14 @@ rec {
}
];
features = {
"default" = [ "fuse" "reflection" ];
"default" = [ "fuse" "virtiofs" "reflection" ];
"fs" = [ "dep:libc" "dep:fuse-backend-rs" ];
"fuse" = [ "fs" ];
"reflection" = [ "tonic-reflection" ];
"tonic-reflection" = [ "dep:tonic-reflection" ];
"virtiofs" = [ "fs" "dep:vhost" "dep:vhost-user-backend" "dep:virtio-queue" "dep:vm-memory" "dep:vmm-sys-util" "dep:virtio-bindings" ];
};
resolvedDefaultFeatures = [ "default" "fs" "fuse" "reflection" "tonic-reflection" ];
resolvedDefaultFeatures = [ "default" "fs" "fuse" "reflection" "tonic-reflection" "virtiofs" ];
};
"typenum" = rec {
crateName = "typenum";
@ -8868,6 +8911,156 @@ rec {
];
};
"vhost" = rec {
crateName = "vhost";
version = "0.6.1";
edition = "2018";
sha256 = "0dczb95w5vcq852fzxsbc6zh7ll0p1mz7yrrchvv8xjjpy6rwxm6";
authors = [
"Liu Jiang <gerry@linux.alibaba.com>"
];
dependencies = [
{
name = "bitflags";
packageId = "bitflags";
}
{
name = "libc";
packageId = "libc";
}
{
name = "vm-memory";
packageId = "vm-memory";
}
{
name = "vmm-sys-util";
packageId = "vmm-sys-util";
}
];
devDependencies = [
{
name = "vm-memory";
packageId = "vm-memory";
features = [ "backend-mmap" ];
}
];
features = {
"vhost-net" = [ "vhost-kern" ];
"vhost-user-master" = [ "vhost-user" ];
"vhost-user-slave" = [ "vhost-user" ];
"vhost-vdpa" = [ "vhost-kern" ];
};
resolvedDefaultFeatures = [ "default" "vhost-user" "vhost-user-slave" ];
};
"vhost-user-backend" = rec {
crateName = "vhost-user-backend";
version = "0.8.0";
edition = "2018";
sha256 = "00s33wy8cj2i8b4hlxn7wd8zm1fpaa5kjhzv77b3khsavf8pn8wz";
authors = [
"The Cloud Hypervisor Authors"
];
dependencies = [
{
name = "libc";
packageId = "libc";
}
{
name = "log";
packageId = "log";
}
{
name = "vhost";
packageId = "vhost";
features = [ "vhost-user-slave" ];
}
{
name = "virtio-bindings";
packageId = "virtio-bindings 0.1.0";
}
{
name = "virtio-queue";
packageId = "virtio-queue";
}
{
name = "vm-memory";
packageId = "vm-memory";
features = [ "backend-mmap" "backend-atomic" ];
}
{
name = "vmm-sys-util";
packageId = "vmm-sys-util";
}
];
devDependencies = [
{
name = "vhost";
packageId = "vhost";
features = [ "vhost-user-master" "vhost-user-slave" ];
}
{
name = "vm-memory";
packageId = "vm-memory";
features = [ "backend-mmap" "backend-atomic" "backend-bitmap" ];
}
];
};
"virtio-bindings 0.1.0" = rec {
crateName = "virtio-bindings";
version = "0.1.0";
edition = "2018";
sha256 = "0sxxhhmz1r4s4q5pd2lykswcv9qk05fmpwc5xlb8aj45h8bi5x9z";
authors = [
"Sergio Lopez <slp@redhat.com>"
];
features = { };
};
"virtio-bindings 0.2.1" = rec {
crateName = "virtio-bindings";
version = "0.2.1";
edition = "2021";
sha256 = "162vb9rlf3fyaj23h89h6z1snxzqpfn5nnr6x9q6954a15s7p3f1";
authors = [
"Sergio Lopez <slp@redhat.com>"
];
features = { };
};
"virtio-queue" = rec {
crateName = "virtio-queue";
version = "0.7.1";
edition = "2021";
sha256 = "1gbppbapj7c0vyca88vl34cx4sp2cy9yg0v6bvyd5h11rhmixa1v";
authors = [
"The Chromium OS Authors"
];
dependencies = [
{
name = "log";
packageId = "log";
}
{
name = "virtio-bindings";
packageId = "virtio-bindings 0.1.0";
}
{
name = "vm-memory";
packageId = "vm-memory";
}
{
name = "vmm-sys-util";
packageId = "vmm-sys-util";
}
];
devDependencies = [
{
name = "vm-memory";
packageId = "vm-memory";
features = [ "backend-mmap" "backend-atomic" ];
}
];
features = { };
};
"vm-memory" = rec {
crateName = "vm-memory";
version = "0.10.0";
@ -8877,6 +9070,11 @@ rec {
"Liu Jiang <gerry@linux.alibaba.com>"
];
dependencies = [
{
name = "arc-swap";
packageId = "arc-swap";
optional = true;
}
{
name = "libc";
packageId = "libc";
@ -8892,7 +9090,7 @@ rec {
"arc-swap" = [ "dep:arc-swap" ];
"backend-atomic" = [ "arc-swap" ];
};
resolvedDefaultFeatures = [ "backend-mmap" "default" ];
resolvedDefaultFeatures = [ "arc-swap" "backend-atomic" "backend-mmap" "default" ];
};
"vmm-sys-util" = rec {
crateName = "vmm-sys-util";

View file

@ -36,6 +36,35 @@ optional = true
# TODO: Switch back to upstream version once https://github.com/cloud-hypervisor/fuse-backend-rs/pull/153 lands.
git = "https://github.com/cbrewster/fuse-backend-rs.git"
branch = "optional-allow_other"
# Ideally this would only be enabled if virtiofs is enabled
# Ex: virtiofs = [..., "fuse-backend-rs/?vhost-user-fs", ...]
# However, crate2nix doesn't properly understand this syntax and doesn't
# enable this feature properly.
features = ["vhost-user-fs"]
[dependencies.vhost]
optional = true
version = "0.6"
[dependencies.vhost-user-backend]
optional = true
version = "0.8"
[dependencies.virtio-queue]
optional = true
version = "0.7"
[dependencies.vm-memory]
optional = true
version = "0.10"
[dependencies.vmm-sys-util]
optional = true
version = "0.11"
[dependencies.virtio-bindings]
optional = true
version = "0.2.1"
[dependencies.tonic-reflection]
optional = true
@ -55,7 +84,8 @@ tempfile = "3.3.0"
tonic-mock = { git = "https://github.com/brainrake/tonic-mock", branch = "bump-dependencies" }
[features]
default = ["fuse", "reflection"]
default = ["fuse", "virtiofs", "reflection"]
fs = ["dep:libc", "dep:fuse-backend-rs"]
virtiofs = ["fs", "dep:vhost", "dep:vhost-user-backend", "dep:virtio-queue", "dep:vm-memory", "dep:vmm-sys-util", "dep:virtio-bindings"]
fuse = ["fs"]
reflection = ["tonic-reflection"]

View file

@ -29,6 +29,9 @@ use tvix_store::fs::TvixStoreFs;
#[cfg(feature = "fuse")]
use tvix_store::fs::fuse::FuseDaemon;
#[cfg(feature = "virtiofs")]
use tvix_store::fs::virtiofs::start_virtiofs_daemon;
#[cfg(feature = "reflection")]
use tvix_castore::proto::FILE_DESCRIPTOR_SET as CASTORE_FILE_DESCRIPTOR_SET;
#[cfg(feature = "reflection")]
@ -105,6 +108,28 @@ enum Commands {
#[arg(long, env, default_value_t = default_threads())]
threads: usize,
/// Whether to list elements at the root of the mount point.
/// This is useful if your PathInfoService doesn't provide an
/// (exhaustive) listing.
#[clap(long, short, action)]
list_root: bool,
},
/// Starts a tvix-store virtiofs daemon at the given socket path.
#[cfg(feature = "virtiofs")]
#[command(name = "virtiofs")]
VirtioFs {
#[clap(value_name = "PATH")]
socket: PathBuf,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
blob_service_addr: String,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
directory_service_addr: String,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
path_info_service_addr: String,
/// Whether to list elements at the root of the mount point.
/// This is useful if your PathInfoService doesn't provide an
/// (exhaustive) listing.
@ -328,6 +353,35 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
})
.await??;
}
#[cfg(feature = "virtiofs")]
Commands::VirtioFs {
socket,
blob_service_addr,
directory_service_addr,
path_info_service_addr,
list_root,
} => {
let blob_service = blobservice::from_addr(&blob_service_addr)?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
tokio::task::spawn_blocking(move || {
let fs = TvixStoreFs::new(
blob_service,
directory_service,
path_info_service,
list_root,
);
info!("starting tvix-store virtiofs daemon on {:?}", &socket);
start_virtiofs_daemon(fs, socket)
})
.await??;
}
};
Ok(())
}

View file

@ -5,6 +5,9 @@ mod inodes;
#[cfg(feature = "fuse")]
pub mod fuse;
#[cfg(feature = "virtiofs")]
pub mod virtiofs;
#[cfg(test)]
mod tests;

View file

@ -0,0 +1,237 @@
use std::{
convert, error, fmt, io,
ops::Deref,
path::Path,
sync::{Arc, MutexGuard, RwLock},
};
use fuse_backend_rs::{
api::{filesystem::FileSystem, server::Server},
transport::{FsCacheReqHandler, Reader, VirtioFsWriter},
};
use tracing::error;
use vhost::vhost_user::{
Listener, SlaveFsCacheReq, VhostUserProtocolFeatures, VhostUserVirtioFeatures,
};
use vhost_user_backend::{VhostUserBackendMut, VhostUserDaemon, VringMutex, VringState, VringT};
use virtio_bindings::bindings::virtio_ring::{
VIRTIO_RING_F_EVENT_IDX, VIRTIO_RING_F_INDIRECT_DESC,
};
use virtio_queue::QueueT;
use vm_memory::{GuestAddressSpace, GuestMemoryAtomic, GuestMemoryMmap};
use vmm_sys_util::epoll::EventSet;
const VIRTIO_F_VERSION_1: u32 = 32;
const NUM_QUEUES: usize = 2;
const QUEUE_SIZE: usize = 1024;
#[derive(Debug)]
enum Error {
/// Failed to handle non-input event.
HandleEventNotEpollIn,
/// Failed to handle unknown event.
HandleEventUnknownEvent,
/// Invalid descriptor chain.
InvlaidDescriptorChain,
/// Failed to handle filesystem requests.
HandleRequests(fuse_backend_rs::Error),
/// Failed to construct new vhost user daemon.
NewDaemon,
/// Failed to start the vhost user daemon.
StartDaemon,
/// Failed to wait for the vhost user daemon.
WaitDaemon,
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "vhost_user_fs_error: {self:?}")
}
}
impl error::Error for Error {}
impl convert::From<Error> for io::Error {
fn from(e: Error) -> Self {
io::Error::new(io::ErrorKind::Other, e)
}
}
struct VhostUserFsBackend<FS>
where
FS: FileSystem + Send + Sync,
{
server: Arc<Server<Arc<FS>>>,
event_idx: bool,
guest_mem: GuestMemoryAtomic<GuestMemoryMmap>,
cache_req: Option<SlaveFsCacheReq>,
}
impl<FS> VhostUserFsBackend<FS>
where
FS: FileSystem + Send + Sync,
{
fn process_queue(&mut self, vring: &mut MutexGuard<VringState>) -> std::io::Result<bool> {
let mut used_descs = false;
while let Some(desc_chain) = vring
.get_queue_mut()
.pop_descriptor_chain(self.guest_mem.memory())
{
let memory = desc_chain.memory();
let reader = Reader::from_descriptor_chain(memory, desc_chain.clone())
.map_err(|_| Error::InvlaidDescriptorChain)?;
let writer = VirtioFsWriter::new(memory, desc_chain.clone())
.map_err(|_| Error::InvlaidDescriptorChain)?;
self.server
.handle_message(
reader,
writer.into(),
self.cache_req
.as_mut()
.map(|req| req as &mut dyn FsCacheReqHandler),
None,
)
.map_err(Error::HandleRequests)?;
// TODO: Is len 0 correct?
if let Err(error) = vring
.get_queue_mut()
.add_used(memory, desc_chain.head_index(), 0)
{
error!(?error, "failed to add desc back to ring");
}
// TODO: What happens if we error out before here?
used_descs = true;
}
let needs_notification = if self.event_idx {
match vring
.get_queue_mut()
.needs_notification(self.guest_mem.memory().deref())
{
Ok(needs_notification) => needs_notification,
Err(error) => {
error!(?error, "failed to check if queue needs notification");
true
}
}
} else {
true
};
if needs_notification {
if let Err(error) = vring.signal_used_queue() {
error!(?error, "failed to signal used queue");
}
}
Ok(used_descs)
}
}
impl<FS> VhostUserBackendMut<VringMutex> for VhostUserFsBackend<FS>
where
FS: FileSystem + Send + Sync,
{
fn num_queues(&self) -> usize {
NUM_QUEUES
}
fn max_queue_size(&self) -> usize {
QUEUE_SIZE
}
fn features(&self) -> u64 {
1 << VIRTIO_F_VERSION_1
| 1 << VIRTIO_RING_F_INDIRECT_DESC
| 1 << VIRTIO_RING_F_EVENT_IDX
| VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits()
}
fn protocol_features(&self) -> VhostUserProtocolFeatures {
VhostUserProtocolFeatures::MQ | VhostUserProtocolFeatures::SLAVE_REQ
}
fn set_event_idx(&mut self, enabled: bool) {
self.event_idx = enabled;
}
fn update_memory(&mut self, _mem: GuestMemoryAtomic<GuestMemoryMmap>) -> std::io::Result<()> {
// This is what most the vhost user implementations do...
Ok(())
}
fn set_slave_req_fd(&mut self, cache_req: SlaveFsCacheReq) {
self.cache_req = Some(cache_req);
}
fn handle_event(
&mut self,
device_event: u16,
evset: vmm_sys_util::epoll::EventSet,
vrings: &[VringMutex],
_thread_id: usize,
) -> std::io::Result<bool> {
if evset != EventSet::IN {
return Err(Error::HandleEventNotEpollIn.into());
}
let mut queue = match device_event {
// High priority queue
0 => vrings[0].get_mut(),
// Regurlar priority queue
1 => vrings[1].get_mut(),
_ => {
return Err(Error::HandleEventUnknownEvent.into());
}
};
if self.event_idx {
loop {
queue
.get_queue_mut()
.enable_notification(self.guest_mem.memory().deref())
.map_err(|e| io::Error::new(io::ErrorKind::Other, e.to_string()))?;
if !self.process_queue(&mut queue)? {
break;
}
}
} else {
self.process_queue(&mut queue)?;
}
Ok(false)
}
}
pub fn start_virtiofs_daemon<FS, P>(fs: FS, socket: P) -> io::Result<()>
where
FS: FileSystem + Send + Sync + 'static,
P: AsRef<Path>,
{
let guest_mem = GuestMemoryAtomic::new(GuestMemoryMmap::new());
let server = Arc::new(fuse_backend_rs::api::server::Server::new(Arc::new(fs)));
let backend = Arc::new(RwLock::new(VhostUserFsBackend {
server,
guest_mem: guest_mem.clone(),
event_idx: false,
cache_req: None,
}));
let listener = Listener::new(socket, true).unwrap();
let mut fs_daemon =
VhostUserDaemon::new(String::from("vhost-user-fs-tvix-store"), backend, guest_mem)
.map_err(|_| Error::NewDaemon)?;
fs_daemon.start(listener).map_err(|_| Error::StartDaemon)?;
fs_daemon.wait().map_err(|_| Error::WaitDaemon)?;
Ok(())
}