refactor(tvix/store/pathinfosvc): add from_addr

Change-Id: I24e822351a837fce2aed568a647d009099ef32ec
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8747
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2023-06-12 00:04:00 +03:00 committed by clbot
parent bb7c76739a
commit 35bff2bda6
6 changed files with 552 additions and 18 deletions

View file

@ -8,13 +8,10 @@ use std::sync::Arc;
use tracing_subscriber::prelude::*;
use tvix_store::blobservice;
use tvix_store::directoryservice;
use tvix_store::pathinfoservice::GRPCPathInfoService;
use tvix_store::pathinfoservice::PathInfoService;
use tvix_store::pathinfoservice::SledPathInfoService;
use tvix_store::pathinfoservice;
use tvix_store::proto::blob_service_server::BlobServiceServer;
use tvix_store::proto::directory_service_server::DirectoryServiceServer;
use tvix_store::proto::node::Node;
use tvix_store::proto::path_info_service_client::PathInfoServiceClient;
use tvix_store::proto::path_info_service_server::PathInfoServiceServer;
use tvix_store::proto::GRPCBlobServiceWrapper;
use tvix_store::proto::GRPCDirectoryServiceWrapper;
@ -59,6 +56,9 @@ enum Commands {
default_value = "sled:///var/lib/tvix-store/directories.sled"
)]
directory_service_addr: String,
#[arg(long, env, default_value = "sled:///var/lib/tvix-store/pathinfo.sled")]
path_info_service_addr: String,
},
/// Imports a list of paths into the store (not using the daemon)
Import {
@ -70,6 +70,9 @@ enum Commands {
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
directory_service_addr: String,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
path_info_service_addr: String,
},
/// Mounts a tvix-store at the given mountpoint
#[cfg(feature = "fuse")]
@ -82,6 +85,9 @@ enum Commands {
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
directory_service_addr: String,
#[arg(long, env, default_value = "grpc+http://[::1]:8000")]
path_info_service_addr: String,
},
}
@ -119,15 +125,16 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
listen_address,
blob_service_addr,
directory_service_addr,
path_info_service_addr,
} => {
// initialize stores
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let path_info_service: Arc<dyn PathInfoService> = Arc::new(SledPathInfoService::new(
"pathinfo.sled".into(),
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?);
)?;
let listen_address = listen_address
.unwrap_or_else(|| "[::]:8000".to_string())
@ -164,18 +171,20 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
paths,
blob_service_addr,
directory_service_addr,
path_info_service_addr,
} => {
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let path_info_service_client =
PathInfoServiceClient::connect("http://[::1]:8000").await?;
let path_info_service =
GRPCPathInfoService::from_client(path_info_service_client.clone());
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
let io = Arc::new(TvixStoreIO::new(
blob_service,
directory_service,
Arc::new(path_info_service),
path_info_service,
));
let tasks = paths
@ -200,16 +209,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
dest,
blob_service_addr,
directory_service_addr,
path_info_service_addr,
} => {
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let path_info_service_client =
PathInfoServiceClient::connect("http://[::1]:8000").await?;
let path_info_service =
GRPCPathInfoService::from_client(path_info_service_client.clone());
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
tokio::task::spawn_blocking(move || {
let f = FUSE::new(blob_service, directory_service, Arc::new(path_info_service));
let f = FUSE::new(blob_service, directory_service, path_info_service);
fuser::mount2(f, &dest, &[])
})
.await??

View file

@ -0,0 +1,58 @@
use std::sync::Arc;
use url::Url;
use crate::{blobservice::BlobService, directoryservice::DirectoryService};
use super::{GRPCPathInfoService, MemoryPathInfoService, PathInfoService, SledPathInfoService};
/// Constructs a new instance of a [PathInfoService] from an URI.
///
/// The following URIs are supported:
/// - `memory:`
/// Uses a in-memory implementation.
/// - `sled:`
/// Uses a in-memory sled implementation.
/// - `sled:///absolute/path/to/somewhere`
/// Uses sled, using a path on the disk for persistency. Can be only opened
/// from one process at the same time.
/// - `grpc+unix:///absolute/path/to/somewhere`
/// Connects to a local tvix-store gRPC service via Unix socket.
/// - `grpc+http://host:port`, `grpc+https://host:port`
/// Connects to a (remote) tvix-store gRPC service.
///
/// As the [PathInfoService] needs to talk to [BlobService] and [DirectoryService],
/// these also need to be passed in.
pub fn from_addr(
uri: &str,
blob_service: Arc<dyn BlobService>,
directory_service: Arc<dyn DirectoryService>,
) -> Result<Arc<dyn PathInfoService>, crate::Error> {
let url = Url::parse(uri).map_err(|e| {
crate::Error::StorageError(format!("unable to parse url: {}", e.to_string()))
})?;
Ok(if url.scheme() == "memory" {
Arc::new(MemoryPathInfoService::from_url(
&url,
blob_service,
directory_service,
)?)
} else if url.scheme() == "sled" {
Arc::new(SledPathInfoService::from_url(
&url,
blob_service,
directory_service,
)?)
} else if url.scheme().starts_with("grpc+") {
Arc::new(GRPCPathInfoService::from_url(
&url,
blob_service,
directory_service,
)?)
} else {
Err(crate::Error::StorageError(format!(
"unknown scheme: {}",
url.scheme()
)))?
})
}

View file

@ -1,5 +1,7 @@
use super::PathInfoService;
use crate::proto;
use crate::{blobservice::BlobService, directoryservice::DirectoryService, proto};
use std::sync::Arc;
use tokio::net::UnixStream;
use tonic::{transport::Channel, Code, Status};
/// Connects to a (remote) tvix-store PathInfoService over gRPC.
@ -27,6 +29,65 @@ impl GRPCPathInfoService {
}
impl PathInfoService for GRPCPathInfoService {
/// Constructs a [GRPCPathInfoService] from the passed [url::Url]:
/// - scheme has to match `grpc+*://`.
/// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
/// - In the case of unix sockets, there must be a path, but may not be a host.
/// - In the case of non-unix sockets, there must be a host, but no path.
/// The blob_service and directory_service arguments are ignored, because the gRPC service already provides answers to these questions.
fn from_url(
url: &url::Url,
_blob_service: Arc<dyn BlobService>,
_directory_service: Arc<dyn DirectoryService>,
) -> Result<Self, crate::Error> {
// Start checking for the scheme to start with grpc+.
match url.scheme().strip_prefix("grpc+") {
None => Err(crate::Error::StorageError("invalid scheme".to_string())),
Some(rest) => {
if rest == "unix" {
if url.host_str().is_some() {
return Err(crate::Error::StorageError(
"host may not be set".to_string(),
));
}
let path = url.path().to_string();
let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter
.unwrap()
.connect_with_connector_lazy(tower::service_fn(
move |_: tonic::transport::Uri| UnixStream::connect(path.clone()),
));
let grpc_client =
proto::path_info_service_client::PathInfoServiceClient::new(channel);
Ok(Self::from_client(grpc_client))
} else {
// ensure path is empty, not supported with gRPC.
if !url.path().is_empty() {
return Err(crate::Error::StorageError(
"path may not be set".to_string(),
));
}
// clone the uri, and drop the grpc+ from the scheme.
// Recreate a new uri with the `grpc+` prefix dropped from the scheme.
// We can't use `url.set_scheme(rest)`, as it disallows
// setting something http(s) that previously wasn't.
let url = {
let url_str = url.to_string();
let s_stripped = url_str.strip_prefix("grpc+").unwrap();
url::Url::parse(s_stripped).unwrap()
};
let channel = tonic::transport::Endpoint::try_from(url.to_string())
.unwrap()
.connect_lazy();
let grpc_client =
proto::path_info_service_client::PathInfoServiceClient::new(channel);
Ok(Self::from_client(grpc_client))
}
}
}
}
fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, crate::Error> {
// Get a new handle to the gRPC client.
let mut grpc_client = self.grpc_client.clone();
@ -99,3 +160,172 @@ impl PathInfoService for GRPCPathInfoService {
Ok((resp.nar_size, nar_sha256))
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use std::thread;
use tempfile::TempDir;
use tokio::net::UnixListener;
use tokio::task;
use tokio::time;
use tokio_stream::wrappers::UnixListenerStream;
use crate::pathinfoservice::MemoryPathInfoService;
use crate::proto::GRPCPathInfoServiceWrapper;
use crate::tests::fixtures;
use crate::tests::utils::gen_blob_service;
use crate::tests::utils::gen_directory_service;
use super::GRPCPathInfoService;
use super::PathInfoService;
/// This uses the wrong scheme
#[test]
fn test_invalid_scheme() {
let url = url::Url::parse("http://foo.example/test").expect("must parse");
assert!(
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This uses the correct scheme for a unix socket.
/// The fact that /path/to/somewhere doesn't exist yet is no problem, because we connect lazily.
#[tokio::test]
async fn test_valid_unix_path() {
let url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse");
assert!(
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_ok()
);
}
/// This uses the correct scheme for a unix socket,
/// but sets a host, which is unsupported.
#[tokio::test]
async fn test_invalid_unix_path_with_domain() {
let url =
url::Url::parse("grpc+unix://host.example/path/to/somewhere").expect("must parse");
assert!(
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This uses the correct scheme for a HTTP server.
/// The fact that nothing is listening there is no problem, because we connect lazily.
#[tokio::test]
async fn test_valid_http() {
let url = url::Url::parse("grpc+http://localhost").expect("must parse");
assert!(
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_ok()
);
}
/// This uses the correct scheme for a HTTPS server.
/// The fact that nothing is listening there is no problem, because we connect lazily.
#[tokio::test]
async fn test_valid_https() {
let url = url::Url::parse("grpc+https://localhost").expect("must parse");
assert!(
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_ok()
);
}
/// This uses the correct scheme, but also specifies
/// an additional path, which is not supported for gRPC.
/// The fact that nothing is listening there is no problem, because we connect lazily.
#[tokio::test]
async fn test_invalid_http_with_path() {
let url = url::Url::parse("grpc+https://localhost/some-path").expect("must parse");
assert!(
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This uses the correct scheme for a unix socket, and provides a server on the other side.
#[tokio::test]
async fn test_valid_unix_path_ping_pong() {
let tmpdir = TempDir::new().unwrap();
let path = tmpdir.path().join("daemon");
// let mut join_set = JoinSet::new();
// prepare a client
let client = {
let mut url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse");
url.set_path(path.to_str().unwrap());
GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.expect("must succeed")
};
let path_copy = path.clone();
// Spin up a server, in a thread far away, which spawns its own tokio runtime,
// and blocks on the task.
thread::spawn(move || {
// Create the runtime
let rt = tokio::runtime::Runtime::new().unwrap();
// Get a handle from this runtime
let handle = rt.handle();
let task = handle.spawn(async {
let uds = UnixListener::bind(path_copy).unwrap();
let uds_stream = UnixListenerStream::new(uds);
// spin up a new server
let mut server = tonic::transport::Server::builder();
let router = server.add_service(
crate::proto::path_info_service_server::PathInfoServiceServer::new(
GRPCPathInfoServiceWrapper::from(Arc::new(MemoryPathInfoService::new(
gen_blob_service(),
gen_directory_service(),
))
as Arc<dyn PathInfoService>),
),
);
router.serve_with_incoming(uds_stream).await
});
handle.block_on(task)
});
// wait for the socket to be created
{
let mut socket_created = false;
for _try in 1..20 {
if path.exists() {
socket_created = true;
break;
}
tokio::time::sleep(time::Duration::from_millis(20)).await;
}
assert!(
socket_created,
"expected socket path to eventually get created, but never happened"
);
}
let pi = task::spawn_blocking(move || {
client
.get(fixtures::DUMMY_OUTPUT_HASH.to_vec().try_into().unwrap())
.expect("must not be error")
})
.await
.expect("must not be err");
assert!(pi.is_none());
}
}

View file

@ -29,6 +29,26 @@ impl MemoryPathInfoService {
}
impl PathInfoService for MemoryPathInfoService {
/// Constructs a [MemoryPathInfoService] from the passed [url::Url]:
/// - scheme has to be `memory://`
/// - there may not be a host.
/// - there may not be a path.
fn from_url(
url: &url::Url,
blob_service: Arc<dyn BlobService>,
directory_service: Arc<dyn DirectoryService>,
) -> Result<Self, Error> {
if url.scheme() != "memory" {
return Err(crate::Error::StorageError("invalid scheme".to_string()));
}
if url.has_host() || !url.path().is_empty() {
return Err(crate::Error::StorageError("invalid url".to_string()));
}
Ok(Self::new(blob_service, directory_service))
}
fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, Error> {
let db = self.db.read().unwrap();
@ -66,3 +86,67 @@ impl PathInfoService for MemoryPathInfoService {
.map_err(|e| Error::StorageError(e.to_string()))
}
}
#[cfg(test)]
mod tests {
use crate::tests::utils::gen_blob_service;
use crate::tests::utils::gen_directory_service;
use super::MemoryPathInfoService;
use super::PathInfoService;
/// This uses a wrong scheme.
#[test]
fn test_invalid_scheme() {
let url = url::Url::parse("http://foo.example/test").expect("must parse");
assert!(
MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This correctly sets the scheme, and doesn't set a path.
#[test]
fn test_valid_scheme() {
let url = url::Url::parse("memory://").expect("must parse");
assert!(
MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_ok()
);
}
/// This sets the host to `foo`
#[test]
fn test_invalid_host() {
let url = url::Url::parse("memory://foo").expect("must parse");
assert!(
MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This has the path "/", which is invalid.
#[test]
fn test_invalid_has_path() {
let url = url::Url::parse("memory:///").expect("must parse");
assert!(
MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This has the path "/foo", which is invalid.
#[test]
fn test_invalid_path2() {
let url = url::Url::parse("memory:///foo").expect("must parse");
assert!(
MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
}

View file

@ -1,9 +1,15 @@
mod from_addr;
mod grpc;
mod memory;
mod sled;
use std::sync::Arc;
use crate::blobservice::BlobService;
use crate::directoryservice::DirectoryService;
use crate::{proto, Error};
pub use self::from_addr::from_addr;
pub use self::grpc::GRPCPathInfoService;
pub use self::memory::MemoryPathInfoService;
pub use self::sled::SledPathInfoService;
@ -11,6 +17,17 @@ pub use self::sled::SledPathInfoService;
/// The base trait all PathInfo services need to implement.
/// This is a simple get and put of [proto::Directory], returning their digest.
pub trait PathInfoService: Send + Sync {
/// Create a new instance by passing in a connection URL, as well
/// as instances of a [PathInfoService] and [DirectoryService] (as the
/// [PathInfoService] needs to talk to them).
fn from_url(
url: &url::Url,
blob_service: Arc<dyn BlobService>,
directory_service: Arc<dyn DirectoryService>,
) -> Result<Self, Error>
where
Self: Sized;
/// Retrieve a PathInfo message by the output digest.
fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, Error>;

View file

@ -50,6 +50,42 @@ impl SledPathInfoService {
}
impl PathInfoService for SledPathInfoService {
/// Constructs a [SledBlobService] from the passed [url::Url]:
/// - scheme has to be `sled://`
/// - there may not be a host.
/// - a path to the sled needs to be provided (which may not be `/`).
fn from_url(
url: &url::Url,
blob_service: Arc<dyn BlobService>,
directory_service: Arc<dyn DirectoryService>,
) -> Result<Self, Error> {
if url.scheme() != "sled" {
return Err(crate::Error::StorageError("invalid scheme".to_string()));
}
if url.has_host() {
return Err(crate::Error::StorageError(format!(
"invalid host: {}",
url.host().unwrap()
)));
}
// TODO: expose compression and other parameters as URL parameters, drop new and new_temporary?
if url.path().is_empty() {
Self::new_temporary(blob_service, directory_service)
.map_err(|e| Error::StorageError(e.to_string()))
} else {
if url.path() == "/" {
Err(crate::Error::StorageError(
"cowardly refusing to open / with sled".to_string(),
))
} else {
Self::new(url.path().into(), blob_service, directory_service)
.map_err(|e| Error::StorageError(e.to_string()))
}
}
}
fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, Error> {
match self.db.get(digest) {
Ok(None) => Ok(None),
@ -103,3 +139,101 @@ impl PathInfoService for SledPathInfoService {
.map_err(|e| Error::StorageError(e.to_string()))
}
}
#[cfg(test)]
mod tests {
use tempfile::TempDir;
use crate::tests::utils::gen_blob_service;
use crate::tests::utils::gen_directory_service;
use super::PathInfoService;
use super::SledPathInfoService;
/// This uses a wrong scheme.
#[test]
fn test_invalid_scheme() {
let url = url::Url::parse("http://foo.example/test").expect("must parse");
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This uses the correct scheme, and doesn't specify a path (temporary sled).
#[test]
fn test_valid_scheme_temporary() {
let url = url::Url::parse("sled://").expect("must parse");
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_ok()
);
}
/// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p)
#[test]
fn test_nonexistent_path() {
let tmpdir = TempDir::new().unwrap();
let mut url = url::Url::parse("sled://foo.example").expect("must parse");
url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap());
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This uses the correct scheme, and specifies / as path (which should fail
// for obvious reasons)
#[test]
fn test_invalid_path_root() {
let url = url::Url::parse("sled:///").expect("must parse");
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This uses the correct scheme, and sets a tempdir as location.
#[test]
fn test_valid_scheme_path() {
let tmpdir = TempDir::new().unwrap();
let mut url = url::Url::parse("sled://").expect("must parse");
url.set_path(tmpdir.path().to_str().unwrap());
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_ok()
);
}
/// This sets a host, rather than a path, which should fail.
#[test]
fn test_invalid_host() {
let url = url::Url::parse("sled://foo.example").expect("must parse");
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
/// This sets a host AND a valid path, which should fail
#[test]
fn test_invalid_host_and_path() {
let tmpdir = TempDir::new().unwrap();
let mut url = url::Url::parse("sled://foo.example").expect("must parse");
url.set_path(tmpdir.path().to_str().unwrap());
assert!(
SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
.is_err()
);
}
}