refactor(tvix): use composition & registry for from_addr

Change-Id: I3c94ecb5958294b5973c6fcdf5ee9c0d37fa54ad
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11976
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: yuka <yuka@yuka.dev>
This commit is contained in:
Yureka 2024-07-18 19:09:07 +02:00 committed by yuka
parent 79317be214
commit 168e4fda59
18 changed files with 316 additions and 229 deletions

View file

@ -1,5 +1,3 @@
use std::sync::Arc;
use clap::Parser; use clap::Parser;
use clap::Subcommand; use clap::Subcommand;
use tokio_listener::Listener; use tokio_listener::Listener;
@ -51,7 +49,7 @@ enum Commands {
} }
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> { async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let cli = Cli::parse(); let cli = Cli::parse();
let _ = tvix_tracing::TracingBuilder::default() let _ = tvix_tracing::TracingBuilder::default()
@ -69,11 +67,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let blob_service = blobservice::from_addr(&blob_service_addr).await?; let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr).await?; let directory_service = directoryservice::from_addr(&directory_service_addr).await?;
let build_service = buildservice::from_addr( let build_service =
&build_service_addr, buildservice::from_addr(&build_service_addr, blob_service, directory_service)
Arc::from(blob_service),
Arc::from(directory_service),
)
.await?; .await?;
let listen_address = listen_address let listen_address = listen_address

View file

@ -6,7 +6,7 @@ use tonic::async_trait;
use tracing::{instrument, warn}; use tracing::{instrument, warn};
use crate::composition::{CompositionContext, ServiceBuilder}; use crate::composition::{CompositionContext, ServiceBuilder};
use crate::B3Digest; use crate::{B3Digest, Error};
use super::{naive_seeker::NaiveSeeker, BlobReader, BlobService, BlobWriter}; use super::{naive_seeker::NaiveSeeker, BlobReader, BlobService, BlobWriter};
@ -103,6 +103,16 @@ pub struct CombinedBlobServiceConfig {
remote: String, remote: String,
} }
impl TryFrom<url::Url> for CombinedBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(_url: url::Url) -> Result<Self, Self::Error> {
Err(Error::StorageError(
"Instantiating a CombinedBlobService from a url is not supported".into(),
)
.into())
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for CombinedBlobServiceConfig { impl ServiceBuilder for CombinedBlobServiceConfig {
type Output = dyn BlobService; type Output = dyn BlobService;

View file

@ -1,8 +1,12 @@
use std::sync::Arc;
use url::Url; use url::Url;
use crate::{proto::blob_service_client::BlobServiceClient, Error}; use crate::composition::{
with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG,
};
use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobService}; use super::BlobService;
/// Constructs a new instance of a [BlobService] from an URI. /// Constructs a new instance of a [BlobService] from an URI.
/// ///
@ -12,53 +16,19 @@ use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobServ
/// - `objectstore+*://` ([ObjectStoreBlobService]) /// - `objectstore+*://` ([ObjectStoreBlobService])
/// ///
/// See their `from_url` methods for more details about their syntax. /// See their `from_url` methods for more details about their syntax.
pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error> { pub async fn from_addr(
uri: &str,
) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync>> {
let url = Url::parse(uri) let url = Url::parse(uri)
.map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?; .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
let blob_service: Box<dyn BlobService> = match url.scheme() { let blob_service_config = with_registry(&REG, || {
"memory" => { <DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn BlobService>>>>::try_from(url)
// memory doesn't support host or path in the URL. })?
if url.has_host() || !url.path().is_empty() { .0;
return Err(Error::StorageError("invalid url".to_string())); let blob_service = blob_service_config
} .build("anonymous", &CompositionContext::blank())
Box::<MemoryBlobService>::default() .await?;
}
scheme if scheme.starts_with("grpc+") => {
// schemes starting with grpc+ go to the GRPCPathInfoService.
// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
Box::new(GRPCBlobService::from_client(
BlobServiceClient::with_interceptor(
crate::tonic::channel_from_url(&url).await?,
tvix_tracing::propagate::tonic::send_trace,
),
))
}
scheme if scheme.starts_with("objectstore+") => {
// We need to convert the URL to string, strip the prefix there, and then
// parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
let trimmed_url = {
let s = url.to_string();
let mut url = Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap();
// trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
url.set_query(None);
url
};
Box::new(
ObjectStoreBlobService::parse_url_opts(&trimmed_url, url.query_pairs())
.map_err(|e| Error::StorageError(e.to_string()))?,
)
}
scheme => {
return Err(crate::Error::StorageError(format!(
"unknown scheme: {}",
scheme
)))
}
};
Ok(blob_service) Ok(blob_service)
} }

View file

@ -187,6 +187,19 @@ pub struct GRPCBlobServiceConfig {
url: String, url: String,
} }
impl TryFrom<url::Url> for GRPCBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
Ok(GRPCBlobServiceConfig {
url: url.to_string(),
})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for GRPCBlobServiceConfig { impl ServiceBuilder for GRPCBlobServiceConfig {
type Output = dyn BlobService; type Output = dyn BlobService;

View file

@ -7,7 +7,7 @@ use tracing::instrument;
use super::{BlobReader, BlobService, BlobWriter}; use super::{BlobReader, BlobService, BlobWriter};
use crate::composition::{CompositionContext, ServiceBuilder}; use crate::composition::{CompositionContext, ServiceBuilder};
use crate::B3Digest; use crate::{B3Digest, Error};
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct MemoryBlobService { pub struct MemoryBlobService {
@ -42,6 +42,17 @@ impl BlobService for MemoryBlobService {
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct MemoryBlobServiceConfig {} pub struct MemoryBlobServiceConfig {}
impl TryFrom<url::Url> for MemoryBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// memory doesn't support host or path in the URL.
if url.has_host() || !url.path().is_empty() {
return Err(Error::StorageError("invalid url".to_string()).into());
}
Ok(MemoryBlobServiceConfig {})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for MemoryBlobServiceConfig { impl ServiceBuilder for MemoryBlobServiceConfig {
type Output = dyn BlobService; type Output = dyn BlobService;

View file

@ -21,21 +21,11 @@ use url::Url;
use crate::{ use crate::{
composition::{CompositionContext, ServiceBuilder}, composition::{CompositionContext, ServiceBuilder},
proto::{stat_blob_response::ChunkMeta, StatBlobResponse}, proto::{stat_blob_response::ChunkMeta, StatBlobResponse},
B3Digest, B3HashingReader, B3Digest, B3HashingReader, Error,
}; };
use super::{BlobReader, BlobService, BlobWriter, ChunkedReader}; use super::{BlobReader, BlobService, BlobWriter, ChunkedReader};
#[derive(Clone)]
pub struct ObjectStoreBlobService {
object_store: Arc<dyn ObjectStore>,
base_path: Path,
/// Average chunk size for FastCDC, in bytes.
/// min value is half, max value double of that number.
avg_chunk_size: u32,
}
/// Uses any object storage supported by the [object_store] crate to provide a /// Uses any object storage supported by the [object_store] crate to provide a
/// tvix-castore [BlobService]. /// tvix-castore [BlobService].
/// ///
@ -72,31 +62,14 @@ pub struct ObjectStoreBlobService {
/// It also allows signalling any compression of chunks in the content-type. /// It also allows signalling any compression of chunks in the content-type.
/// Migration *should* be possible by simply adding the right content-types to /// Migration *should* be possible by simply adding the right content-types to
/// all keys stored so far, but no promises ;-) /// all keys stored so far, but no promises ;-)
impl ObjectStoreBlobService { #[derive(Clone)]
/// Constructs a new [ObjectStoreBlobService] from a [Url] supported by pub struct ObjectStoreBlobService {
/// [object_store]. object_store: Arc<dyn ObjectStore>,
/// Any path suffix becomes the base path of the object store. base_path: Path,
/// additional options, the same as in [object_store::parse_url_opts] can
/// be passed.
pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
where
I: IntoIterator<Item = (K, V)>,
K: AsRef<str>,
V: Into<String>,
{
let (object_store, path) = object_store::parse_url_opts(url, options)?;
Ok(Self { /// Average chunk size for FastCDC, in bytes.
object_store: Arc::new(object_store), /// min value is half, max value double of that number.
base_path: path, avg_chunk_size: u32,
avg_chunk_size: 256 * 1024,
})
}
/// Like [Self::parse_url_opts], except without the options.
pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
Self::parse_url_opts(url, Vec::<(String, String)>::new())
}
} }
#[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))] #[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
@ -281,10 +254,41 @@ pub struct ObjectStoreBlobServiceConfig {
object_store_url: String, object_store_url: String,
#[serde(default = "default_avg_chunk_size")] #[serde(default = "default_avg_chunk_size")]
avg_chunk_size: u32, avg_chunk_size: u32,
#[serde(default)]
object_store_options: HashMap<String, String>, object_store_options: HashMap<String, String>,
} }
impl TryFrom<url::Url> for ObjectStoreBlobServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
/// Constructs a new [ObjectStoreBlobService] from a [Url] supported by
/// [object_store].
/// Any path suffix becomes the base path of the object store.
/// additional options, the same as in [object_store::parse_url_opts] can
/// be passed.
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// We need to convert the URL to string, strip the prefix there, and then
// parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
let trimmed_url = {
let s = url.to_string();
let mut url = Url::parse(
s.strip_prefix("objectstore+")
.ok_or(Error::StorageError("Missing objectstore uri".into()))?,
)?;
// trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
url.set_query(None);
url
};
Ok(ObjectStoreBlobServiceConfig {
object_store_url: trimmed_url.into(),
object_store_options: url
.query_pairs()
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
avg_chunk_size: 256 * 1024,
})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for ObjectStoreBlobServiceConfig { impl ServiceBuilder for ObjectStoreBlobServiceConfig {
type Output = dyn BlobService; type Output = dyn BlobService;
@ -548,7 +552,7 @@ where
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::chunk_and_upload; use super::{chunk_and_upload, default_avg_chunk_size};
use crate::{ use crate::{
blobservice::{BlobService, ObjectStoreBlobService}, blobservice::{BlobService, ObjectStoreBlobService},
fixtures::{BLOB_A, BLOB_A_DIGEST}, fixtures::{BLOB_A, BLOB_A_DIGEST},
@ -559,13 +563,18 @@ mod test {
/// Tests chunk_and_upload directly, bypassing the BlobWriter at open_write(). /// Tests chunk_and_upload directly, bypassing the BlobWriter at open_write().
#[tokio::test] #[tokio::test]
async fn test_chunk_and_upload() { async fn test_chunk_and_upload() {
let blobsvc = Arc::new( let (object_store, base_path) =
ObjectStoreBlobService::parse_url(&Url::parse("memory:///").unwrap()).unwrap(), object_store::parse_url(&Url::parse("memory:///").unwrap()).unwrap();
); let object_store: Arc<dyn object_store::ObjectStore> = Arc::from(object_store);
let blobsvc = Arc::new(ObjectStoreBlobService {
object_store: object_store.clone(),
avg_chunk_size: default_avg_chunk_size(),
base_path,
});
let blob_digest = chunk_and_upload( let blob_digest = chunk_and_upload(
&mut Cursor::new(BLOB_A.to_vec()), &mut Cursor::new(BLOB_A.to_vec()),
blobsvc.object_store.clone(), object_store,
object_store::path::Path::from("/"), object_store::path::Path::from("/"),
1024 / 2, 1024 / 2,
1024, 1024,

View file

@ -31,6 +31,13 @@
//! } //! }
//! } //! }
//! //!
//! impl TryFrom<url::Url> for MyBlobServiceConfig {
//! type Error = Box<dyn std::error::Error + Send + Sync>;
//! fn try_from(url: url::Url) -> Result<Self, Self::Error> {
//! todo!()
//! }
//! }
//!
//! pub fn add_my_service(reg: &mut Registry) { //! pub fn add_my_service(reg: &mut Registry) {
//! reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, MyBlobServiceConfig>("myblobservicetype"); //! reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, MyBlobServiceConfig>("myblobservicetype");
//! } //! }
@ -100,7 +107,7 @@ use tonic::async_trait;
// This is really ugly. Really we would want to store this as a generic static field: // This is really ugly. Really we would want to store this as a generic static field:
// //
// ``` // ```
// struct Registry<T>(BTreeMap<(&'static str), BoxSeedFn<T>); // struct Registry<T>(BTreeMap<(&'static str), RegistryEntry<T>);
// static REG<T>: Registry<T>; // static REG<T>: Registry<T>;
// ``` // ```
// //
@ -116,6 +123,12 @@ use tonic::async_trait;
// I said it was ugly... // I said it was ugly...
#[derive(Default)] #[derive(Default)]
pub struct Registry(BTreeMap<(TypeId, &'static str), Box<dyn Any + Sync>>); pub struct Registry(BTreeMap<(TypeId, &'static str), Box<dyn Any + Sync>>);
pub type FromUrlSeed<T> =
Box<dyn Fn(url::Url) -> Result<T, Box<dyn std::error::Error + Send + Sync>> + Sync>;
pub struct RegistryEntry<T> {
serde_deserialize_seed: BoxFnSeed<DeserializeWithRegistry<T>>,
from_url_seed: FromUrlSeed<DeserializeWithRegistry<T>>,
}
struct RegistryWithFakeType<'r, T>(&'r Registry, PhantomData<T>); struct RegistryWithFakeType<'r, T>(&'r Registry, PhantomData<T>);
@ -137,7 +150,9 @@ impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithF
.ok_or_else(|| serde::de::Error::custom("Unknown tag"))? .ok_or_else(|| serde::de::Error::custom("Unknown tag"))?
.1; .1;
Ok(<dyn Any>::downcast_ref(&**seed).unwrap()) let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap();
Ok(&entry.serde_deserialize_seed)
} }
} }
@ -146,7 +161,7 @@ impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithF
/// Wrap your type in this in order to deserialize it using a registry, e.g. /// Wrap your type in this in order to deserialize it using a registry, e.g.
/// `RegistryWithFakeType<Box<dyn MyTrait>>`, then the types registered for `Box<dyn MyTrait>` /// `RegistryWithFakeType<Box<dyn MyTrait>>`, then the types registered for `Box<dyn MyTrait>`
/// will be used. /// will be used.
pub struct DeserializeWithRegistry<T>(T); pub struct DeserializeWithRegistry<T>(pub T);
impl Registry { impl Registry {
/// Registers a mapping from type tag to a concrete type into the registry. /// Registers a mapping from type tag to a concrete type into the registry.
@ -156,14 +171,30 @@ impl Registry {
/// deserializes into an input with the type tag "myblobservicetype" into a /// deserializes into an input with the type tag "myblobservicetype" into a
/// `Box<dyn FooTrait>`, it will first call the Deserialize imple of `FooStruct` and /// `Box<dyn FooTrait>`, it will first call the Deserialize imple of `FooStruct` and
/// then convert it into a `Box<dyn FooTrait>` using From::from. /// then convert it into a `Box<dyn FooTrait>` using From::from.
pub fn register<T: 'static, C: DeserializeOwned + Into<T>>(&mut self, type_name: &'static str) { pub fn register<
let seed = BoxFnSeed::new(|x| { T: 'static,
C: DeserializeOwned
+ TryFrom<url::Url, Error = Box<dyn std::error::Error + Send + Sync>>
+ Into<T>,
>(
&mut self,
type_name: &'static str,
) {
self.0.insert(
(TypeId::of::<T>(), type_name),
Box::new(RegistryEntry {
serde_deserialize_seed: BoxFnSeed::new(|x| {
deserialize::<C>(x) deserialize::<C>(x)
.map(Into::into) .map(Into::into)
.map(DeserializeWithRegistry) .map(DeserializeWithRegistry)
}); }),
self.0 from_url_seed: Box::new(|url| {
.insert((TypeId::of::<T>(), type_name), Box::new(seed)); C::try_from(url)
.map(Into::into)
.map(DeserializeWithRegistry)
}),
}),
);
} }
} }
@ -180,6 +211,30 @@ impl<'de, T: 'static> serde::Deserialize<'de> for DeserializeWithRegistry<T> {
} }
} }
#[derive(Debug, thiserror::Error)]
enum TryFromUrlError {
#[error("Unknown tag: {0}")]
UnknownTag(String),
}
impl<T: 'static> TryFrom<url::Url> for DeserializeWithRegistry<T> {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
let tag = url.scheme().split('+').next().unwrap();
// same as in the SeedFactory impl: using find() and not get() because of https://github.com/rust-lang/rust/issues/80389
let seed = ACTIVE_REG
.get()
.unwrap()
.0
.iter()
.find(|(k, _)| *k == &(TypeId::of::<T>(), tag))
.ok_or_else(|| Box::new(TryFromUrlError::UnknownTag(tag.into())))?
.1;
let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap();
(entry.from_url_seed)(url)
}
}
thread_local! { thread_local! {
/// The active Registry is global state, because there is no convenient and universal way to pass state /// The active Registry is global state, because there is no convenient and universal way to pass state
/// into the functions usually used for deserialization, e.g. `serde_json::from_str`, `toml::from_str`, /// into the functions usually used for deserialization, e.g. `serde_json::from_str`, `toml::from_str`,
@ -200,7 +255,7 @@ pub fn with_registry<R>(reg: &'static Registry, f: impl FnOnce() -> R) -> R {
lazy_static! { lazy_static! {
/// The provided registry of tvix_castore, with all builtin BlobStore/DirectoryStore implementations /// The provided registry of tvix_castore, with all builtin BlobStore/DirectoryStore implementations
pub static ref REG: Registry = { pub static ref REG: Registry = {
let mut reg = Registry(Default::default()); let mut reg = Default::default();
add_default_services(&mut reg); add_default_services(&mut reg);
reg reg
}; };

View file

@ -361,6 +361,26 @@ impl ServiceBuilder for BigtableParameters {
} }
} }
impl TryFrom<url::Url> for BigtableParameters {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(mut url: url::Url) -> Result<Self, Self::Error> {
// parse the instance name from the hostname.
let instance_name = url
.host_str()
.ok_or_else(|| Error::StorageError("instance name missing".into()))?
.to_string();
// … but add it to the query string now, so we just need to parse that.
url.query_pairs_mut()
.append_pair("instance_name", &instance_name);
let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default())
.map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?;
Ok(params)
}
}
fn default_app_profile_id() -> String { fn default_app_profile_id() -> String {
"default".to_owned() "default".to_owned()
} }

View file

@ -151,6 +151,16 @@ pub struct CacheConfig {
far: String, far: String,
} }
impl TryFrom<url::Url> for CacheConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(_url: url::Url) -> Result<Self, Self::Error> {
Err(Error::StorageError(
"Instantiating a CombinedDirectoryService from a url is not supported".into(),
)
.into())
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for CacheConfig { impl ServiceBuilder for CacheConfig {
type Output = dyn DirectoryService; type Output = dyn DirectoryService;

View file

@ -1,12 +1,13 @@
use std::sync::Arc;
use url::Url; use url::Url;
use crate::{proto::directory_service_client::DirectoryServiceClient, Error}; use crate::composition::{
with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG,
use super::{
DirectoryService, GRPCDirectoryService, MemoryDirectoryService, ObjectStoreDirectoryService,
SledDirectoryService,
}; };
use super::DirectoryService;
/// Constructs a new instance of a [DirectoryService] from an URI. /// Constructs a new instance of a [DirectoryService] from an URI.
/// ///
/// The following URIs are supported: /// The following URIs are supported:
@ -21,101 +22,23 @@ use super::{
/// Connects to a local tvix-store gRPC service via Unix socket. /// Connects to a local tvix-store gRPC service via Unix socket.
/// - `grpc+http://host:port`, `grpc+https://host:port` /// - `grpc+http://host:port`, `grpc+https://host:port`
/// Connects to a (remote) tvix-store gRPC service. /// Connects to a (remote) tvix-store gRPC service.
pub async fn from_addr(uri: &str) -> Result<Box<dyn DirectoryService>, crate::Error> { pub async fn from_addr(
uri: &str,
) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync>> {
#[allow(unused_mut)] #[allow(unused_mut)]
let mut url = Url::parse(uri) let mut url = Url::parse(uri)
.map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?; .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
let directory_service: Box<dyn DirectoryService> = match url.scheme() { let directory_service_config = with_registry(&REG, || {
"memory" => { <DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>>>::try_from(
// memory doesn't support host or path in the URL. url,
if url.has_host() || !url.path().is_empty() {
return Err(Error::StorageError("invalid url".to_string()));
}
Box::<MemoryDirectoryService>::default()
}
"sled" => {
// sled doesn't support host, and a path can be provided (otherwise
// it'll live in memory only).
if url.has_host() {
return Err(Error::StorageError("no host allowed".to_string()));
}
if url.path() == "/" {
return Err(Error::StorageError(
"cowardly refusing to open / with sled".to_string(),
));
}
// TODO: expose compression and other parameters as URL parameters?
Box::new(if url.path().is_empty() {
SledDirectoryService::new_temporary()
.map_err(|e| Error::StorageError(e.to_string()))?
} else {
SledDirectoryService::new(url.path())
.map_err(|e| Error::StorageError(e.to_string()))?
})
}
scheme if scheme.starts_with("grpc+") => {
// schemes starting with grpc+ go to the GRPCPathInfoService.
// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
Box::new(GRPCDirectoryService::from_client(
DirectoryServiceClient::with_interceptor(
crate::tonic::channel_from_url(&url).await?,
tvix_tracing::propagate::tonic::send_trace,
),
))
}
scheme if scheme.starts_with("objectstore+") => {
// We need to convert the URL to string, strip the prefix there, and then
// parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
let trimmed_url = {
let s = url.to_string();
let mut url = Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap();
// trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
url.set_query(None);
url
};
Box::new(
ObjectStoreDirectoryService::parse_url_opts(&trimmed_url, url.query_pairs())
.map_err(|e| Error::StorageError(e.to_string()))?,
) )
} })?
#[cfg(feature = "cloud")] .0;
"bigtable" => { let directory_service = directory_service_config
use super::bigtable::BigtableParameters; .build("anonymous", &CompositionContext::blank())
use super::BigtableDirectoryService; .await?;
// parse the instance name from the hostname.
let instance_name = url
.host_str()
.ok_or_else(|| Error::StorageError("instance name missing".into()))?
.to_string();
// … but add it to the query string now, so we just need to parse that.
url.query_pairs_mut()
.append_pair("instance_name", &instance_name);
let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default())
.map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?;
Box::new(
BigtableDirectoryService::connect(params)
.await
.map_err(|e| Error::StorageError(e.to_string()))?,
)
}
_ => {
return Err(crate::Error::StorageError(format!(
"unknown scheme: {}",
url.scheme()
)))
}
};
Ok(directory_service) Ok(directory_service)
} }

View file

@ -224,6 +224,19 @@ pub struct GRPCDirectoryServiceConfig {
url: String, url: String,
} }
impl TryFrom<url::Url> for GRPCDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// This is normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
Ok(GRPCDirectoryServiceConfig {
url: url.to_string(),
})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for GRPCDirectoryServiceConfig { impl ServiceBuilder for GRPCDirectoryServiceConfig {
type Output = dyn DirectoryService; type Output = dyn DirectoryService;

View file

@ -91,6 +91,17 @@ impl DirectoryService for MemoryDirectoryService {
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct MemoryDirectoryServiceConfig {} pub struct MemoryDirectoryServiceConfig {}
impl TryFrom<url::Url> for MemoryDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// memory doesn't support host or path in the URL.
if url.has_host() || !url.path().is_empty() {
return Err(Error::StorageError("invalid url".to_string()).into());
}
Ok(MemoryDirectoryServiceConfig {})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for MemoryDirectoryServiceConfig { impl ServiceBuilder for MemoryDirectoryServiceConfig {
type Output = dyn DirectoryService; type Output = dyn DirectoryService;

View file

@ -179,6 +179,32 @@ pub struct ObjectStoreDirectoryServiceConfig {
object_store_options: HashMap<String, String>, object_store_options: HashMap<String, String>,
} }
impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// We need to convert the URL to string, strip the prefix there, and then
// parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
let trimmed_url = {
let s = url.to_string();
let mut url = Url::parse(
s.strip_prefix("objectstore+")
.ok_or(Error::StorageError("Missing objectstore uri".into()))?,
)?;
// trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
url.set_query(None);
url
};
Ok(ObjectStoreDirectoryServiceConfig {
object_store_url: trimmed_url.into(),
object_store_options: url
.query_pairs()
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for ObjectStoreDirectoryServiceConfig { impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
type Output = dyn DirectoryService; type Output = dyn DirectoryService;

View file

@ -145,6 +145,31 @@ pub struct SledDirectoryServiceConfig {
path: Option<String>, path: Option<String>,
} }
impl TryFrom<url::Url> for SledDirectoryServiceConfig {
type Error = Box<dyn std::error::Error + Send + Sync>;
fn try_from(url: url::Url) -> Result<Self, Self::Error> {
// sled doesn't support host, and a path can be provided (otherwise
// it'll live in memory only).
if url.has_host() {
return Err(Error::StorageError("no host allowed".to_string()).into());
}
// TODO: expose compression and other parameters as URL parameters?
Ok(if url.path().is_empty() {
SledDirectoryServiceConfig {
is_temporary: true,
path: None,
}
} else {
SledDirectoryServiceConfig {
is_temporary: false,
path: Some(url.path().to_string()),
}
})
}
}
#[async_trait] #[async_trait]
impl ServiceBuilder for SledDirectoryServiceConfig { impl ServiceBuilder for SledDirectoryServiceConfig {
type Output = dyn DirectoryService; type Output = dyn DirectoryService;

View file

@ -4,7 +4,6 @@ use crate::fixtures::*;
use crate::import::fs::ingest_path; use crate::import::fs::ingest_path;
use crate::proto; use crate::proto;
use std::sync::Arc;
use tempfile::TempDir; use tempfile::TempDir;
#[cfg(target_family = "unix")] #[cfg(target_family = "unix")]
@ -26,7 +25,7 @@ async fn symlink() {
.unwrap(); .unwrap();
let root_node = ingest_path( let root_node = ingest_path(
Arc::from(blob_service), blob_service,
directory_service, directory_service,
tmpdir.path().join("doesntmatter"), tmpdir.path().join("doesntmatter"),
) )
@ -44,8 +43,7 @@ async fn symlink() {
#[tokio::test] #[tokio::test]
async fn single_file() { async fn single_file() {
let blob_service = let blob_service = blobservice::from_addr("memory://").await.unwrap();
Arc::from(blobservice::from_addr("memory://").await.unwrap()) as Arc<dyn BlobService>;
let directory_service = directoryservice::from_addr("memory://").await.unwrap(); let directory_service = directoryservice::from_addr("memory://").await.unwrap();
let tmpdir = TempDir::new().unwrap(); let tmpdir = TempDir::new().unwrap();
@ -77,8 +75,7 @@ async fn single_file() {
#[cfg(target_family = "unix")] #[cfg(target_family = "unix")]
#[tokio::test] #[tokio::test]
async fn complicated() { async fn complicated() {
let blob_service = let blob_service = blobservice::from_addr("memory://").await.unwrap();
Arc::from(blobservice::from_addr("memory://").await.unwrap()) as Arc<dyn BlobService>;
let directory_service = directoryservice::from_addr("memory://").await.unwrap(); let directory_service = directoryservice::from_addr("memory://").await.unwrap();
let tmpdir = TempDir::new().unwrap(); let tmpdir = TempDir::new().unwrap();

View file

@ -236,9 +236,8 @@ mod test_builtins {
fn deserialize_with_extra_builtin() { fn deserialize_with_extra_builtin() {
let code = "builtins.prependHello \"world\""; let code = "builtins.prependHello \"world\"";
let result: String = from_str_with_config(code, |eval| { let result: String =
eval.add_builtins(test_builtins::builtins()) from_str_with_config(code, |eval| eval.add_builtins(test_builtins::builtins()))
})
.expect("should deserialize"); .expect("should deserialize");
assert_eq!(result, "hello world"); assert_eq!(result, "hello world");

View file

@ -200,7 +200,7 @@ fn default_threads() -> usize {
} }
#[instrument(skip_all)] #[instrument(skip_all)]
async fn run_cli(cli: Cli) -> Result<(), Box<dyn std::error::Error>> { async fn run_cli(cli: Cli) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
match cli.command { match cli.command {
Commands::Daemon { Commands::Daemon {
listen_args, listen_args,
@ -538,7 +538,7 @@ async fn run_cli(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
} }
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> { async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let cli = Cli::parse(); let cli = Cli::parse();
let tracing_handle = { let tracing_handle = {

View file

@ -19,19 +19,19 @@ pub async fn construct_services(
blob_service_addr: impl AsRef<str>, blob_service_addr: impl AsRef<str>,
directory_service_addr: impl AsRef<str>, directory_service_addr: impl AsRef<str>,
path_info_service_addr: impl AsRef<str>, path_info_service_addr: impl AsRef<str>,
) -> std::io::Result<( ) -> Result<
(
Arc<dyn BlobService>, Arc<dyn BlobService>,
Arc<dyn DirectoryService>, Arc<dyn DirectoryService>,
Box<dyn PathInfoService>, Box<dyn PathInfoService>,
Box<dyn NarCalculationService>, Box<dyn NarCalculationService>,
)> { ),
let blob_service: Arc<dyn BlobService> = blobservice::from_addr(blob_service_addr.as_ref()) Box<dyn std::error::Error + Send + Sync>,
.await? > {
.into(); let blob_service: Arc<dyn BlobService> =
blobservice::from_addr(blob_service_addr.as_ref()).await?;
let directory_service: Arc<dyn DirectoryService> = let directory_service: Arc<dyn DirectoryService> =
directoryservice::from_addr(directory_service_addr.as_ref()) directoryservice::from_addr(directory_service_addr.as_ref()).await?;
.await?
.into();
let path_info_service = pathinfoservice::from_addr( let path_info_service = pathinfoservice::from_addr(
path_info_service_addr.as_ref(), path_info_service_addr.as_ref(),