refactor(tvix/castore/tonic): make async, support wait-connect=?

This moves the sync `channel::from_url` to a async
`tonic::channel_from_url`. It now allows connecting non-lazily if `wait-
connect=1` is set in the URL params.

Also, make the pingpong tests for blobsvc and directorysvc use the wait-
connect=1 codepath.

Change-Id: Ibeea33117c8121814627e7f6aba0e943ae2e92ca
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10030
Tested-by: BuildkiteCI
Reviewed-by: Connor Brewster <cbrewster@hey.com>
This commit is contained in:
Florian Klink 2023-11-13 14:32:24 +02:00 committed by flokli
parent c83841d3a1
commit 30e0c32066
11 changed files with 170 additions and 182 deletions

View file

@ -13,7 +13,7 @@ use super::{BlobService, GRPCBlobService, MemoryBlobService, SledBlobService};
/// - `grpc+*://` ([GRPCBlobService])
///
/// See their `from_url` methods for more details about their syntax.
pub fn from_addr(uri: &str) -> Result<Arc<dyn BlobService>, crate::Error> {
pub async fn from_addr(uri: &str) -> Result<Arc<dyn BlobService>, crate::Error> {
let url = Url::parse(uri)
.map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
@ -53,7 +53,7 @@ pub fn from_addr(uri: &str) -> Result<Arc<dyn BlobService>, crate::Error> {
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
let client = BlobServiceClient::new(crate::channel::from_url(&url)?);
let client = BlobServiceClient::new(crate::tonic::channel_from_url(&url).await?);
Arc::new(GRPCBlobService::from_client(client))
} else {
Err(crate::Error::StorageError(format!(
@ -95,12 +95,6 @@ mod tests {
#[test_case("memory:///", false; "memory invalid root path")]
/// This sets a memory url path to "/foo", which is invalid.
#[test_case("memory:///foo", false; "memory invalid root path foo")]
fn test_from_addr(uri_str: &str, is_ok: bool) {
assert_eq!(from_addr(uri_str).is_ok(), is_ok)
}
// the gRPC tests below don't fail, because we connect lazily.
/// Correct scheme to connect to a unix socket.
#[test_case("grpc+unix:///path/to/somewhere", true; "grpc valid unix socket")]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
@ -115,6 +109,6 @@ mod tests {
#[test_case("grpc+http://localhost/some-path", false; "grpc valid invalid host and path")]
#[tokio::test]
async fn test_from_addr_tokio(uri_str: &str, is_ok: bool) {
assert_eq!(from_addr(uri_str).is_ok(), is_ok)
assert_eq!(from_addr(uri_str).await.is_ok(), is_ok)
}
}

View file

@ -285,10 +285,16 @@ mod tests {
// prepare a client
let grpc_client = {
let url = url::Url::parse(&format!("grpc+unix://{}", socket_path.display()))
.expect("must parse");
let client =
BlobServiceClient::new(crate::channel::from_url(&url).expect("must succeed"));
let url = url::Url::parse(&format!(
"grpc+unix://{}?wait-connect=1",
socket_path.display()
))
.expect("must parse");
let client = BlobServiceClient::new(
crate::tonic::channel_from_url(&url)
.await
.expect("must succeed"),
);
GRPCBlobService::from_client(client)
};

View file

@ -1,128 +0,0 @@
use tokio::net::UnixStream;
use tonic::transport::Channel;
/// Turn a [url::Url] to a [Channel] if it can be parsed successfully.
/// It supports `grpc+unix:/path/to/socket`,
/// as well as the regular schemes supported by tonic, prefixed with grpc+,
/// for example `grpc+http://[::1]:8000`.
pub fn from_url(url: &url::Url) -> Result<Channel, self::Error> {
// Start checking for the scheme to start with grpc+.
// If it doesn't start with that, bail out.
match url.scheme().strip_prefix("grpc+") {
None => Err(Error::MissingGRPCPrefix()),
Some(rest) => {
if rest == "unix" {
if url.host_str().is_some() {
return Err(Error::HostSetForUnixSocket());
}
let url = url.clone();
Ok(
tonic::transport::Endpoint::from_static("http://[::]:50051") // doesn't matter
.connect_with_connector_lazy(tower::service_fn(
move |_: tonic::transport::Uri| {
UnixStream::connect(url.path().to_string().clone())
},
)),
)
} else {
// ensure path is empty, not supported with gRPC.
if !url.path().is_empty() {
return Err(Error::PathMayNotBeSet());
}
// Stringify the URL and remove the grpc+ prefix.
// We can't use `url.set_scheme(rest)`, as it disallows
// setting something http(s) that previously wasn't.
let url = url.to_string().strip_prefix("grpc+").unwrap().to_owned();
// Use the regular tonic transport::Endpoint logic to
Ok(tonic::transport::Endpoint::try_from(url)
.unwrap()
.connect_lazy())
}
}
}
}
/// Errors occuring when trying to connect to a backend
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("grpc+ prefix is missing from Url")]
MissingGRPCPrefix(),
#[error("host may not be set for unix domain sockets")]
HostSetForUnixSocket(),
#[error("path may not be set")]
PathMayNotBeSet(),
#[error("transport error: {0}")]
TransportError(tonic::transport::Error),
}
impl From<tonic::transport::Error> for Error {
fn from(value: tonic::transport::Error) -> Self {
Self::TransportError(value)
}
}
#[cfg(test)]
mod tests {
use super::from_url;
/// This uses the correct scheme for a unix socket.
/// The fact that /path/to/somewhere doesn't exist yet is no problem, because we connect lazily.
#[tokio::test]
async fn test_valid_unix_path() {
let url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse");
assert!(from_url(&url).is_ok())
}
/// This uses the correct scheme for a unix socket,
/// but sets a host, which is unsupported.
#[tokio::test]
async fn test_invalid_unix_path_with_domain() {
let url =
url::Url::parse("grpc+unix://host.example/path/to/somewhere").expect("must parse");
assert!(from_url(&url).is_err())
}
/// This uses the wrong scheme
#[test]
fn test_invalid_scheme() {
let url = url::Url::parse("http://foo.example/test").expect("must parse");
assert!(from_url(&url).is_err());
}
/// This uses the correct scheme for a HTTP server.
/// The fact that nothing is listening there is no problem, because we connect lazily.
#[tokio::test]
async fn test_valid_http() {
let url = url::Url::parse("grpc+http://localhost").expect("must parse");
assert!(from_url(&url).is_ok());
}
/// This uses the correct scheme for a HTTPS server.
/// The fact that nothing is listening there is no problem, because we connect lazily.
#[tokio::test]
async fn test_valid_https() {
let url = url::Url::parse("grpc+https://localhost").expect("must parse");
assert!(from_url(&url).is_ok());
}
/// This uses the correct scheme, but also specifies
/// an additional path, which is not supported for gRPC.
/// The fact that nothing is listening there is no problem, because we connect lazily.
#[tokio::test]
async fn test_invalid_http_with_path() {
let url = url::Url::parse("grpc+https://localhost/some-path").expect("must parse");
assert!(from_url(&url).is_err());
}
}

View file

@ -19,7 +19,7 @@ use super::{DirectoryService, GRPCDirectoryService, MemoryDirectoryService, Sled
/// Connects to a local tvix-store gRPC service via Unix socket.
/// - `grpc+http://host:port`, `grpc+https://host:port`
/// Connects to a (remote) tvix-store gRPC service.
pub fn from_addr(uri: &str) -> Result<Arc<dyn DirectoryService>, crate::Error> {
pub async fn from_addr(uri: &str) -> Result<Arc<dyn DirectoryService>, crate::Error> {
let url = Url::parse(uri)
.map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
@ -60,7 +60,7 @@ pub fn from_addr(uri: &str) -> Result<Arc<dyn DirectoryService>, crate::Error> {
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
let client = DirectoryServiceClient::new(crate::channel::from_url(&url)?);
let client = DirectoryServiceClient::new(crate::tonic::channel_from_url(&url).await?);
Arc::new(GRPCDirectoryService::from_client(client))
} else {
Err(crate::Error::StorageError(format!(
@ -102,12 +102,6 @@ mod tests {
#[test_case("memory:///", false; "memory invalid root path")]
/// This sets a memory url path to "/foo", which is invalid.
#[test_case("memory:///foo", false; "memory invalid root path foo")]
fn test_from_addr(uri_str: &str, is_ok: bool) {
assert_eq!(from_addr(uri_str).is_ok(), is_ok)
}
// the gRPC tests below don't fail, because we connect lazily.
/// Correct scheme to connect to a unix socket.
#[test_case("grpc+unix:///path/to/somewhere", true; "grpc valid unix socket")]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
@ -122,6 +116,6 @@ mod tests {
#[test_case("grpc+http://localhost/some-path", false; "grpc valid invalid host and path")]
#[tokio::test]
async fn test_from_addr_tokio(uri_str: &str, is_ok: bool) {
assert_eq!(from_addr(uri_str).is_ok(), is_ok)
assert_eq!(from_addr(uri_str).await.is_ok(), is_ok)
}
}

View file

@ -462,10 +462,16 @@ mod tests {
// prepare a client
let grpc_client = {
let url = url::Url::parse(&format!("grpc+unix://{}", socket_path.display()))
.expect("must parse");
let client =
DirectoryServiceClient::new(crate::channel::from_url(&url).expect("must succeed"));
let url = url::Url::parse(&format!(
"grpc+unix://{}?wait-connect=1",
socket_path.display()
))
.expect("must parse");
let client = DirectoryServiceClient::new(
crate::tonic::channel_from_url(&url)
.await
.expect("must succeed"),
);
GRPCDirectoryService::from_client(client)
};

View file

@ -34,8 +34,8 @@ impl From<Error> for Status {
}
}
impl From<crate::channel::Error> for Error {
fn from(value: crate::channel::Error) -> Self {
impl From<crate::tonic::Error> for Error {
fn from(value: crate::tonic::Error) -> Self {
Self::StorageError(value.to_string())
}
}

View file

@ -2,11 +2,11 @@ mod digests;
mod errors;
pub mod blobservice;
pub mod channel;
pub mod directoryservice;
pub mod fixtures;
pub mod import;
pub mod proto;
pub mod tonic;
pub mod utils;
pub use digests::{B3Digest, B3_LEN};

115
tvix/castore/src/tonic.rs Normal file
View file

@ -0,0 +1,115 @@
use tokio::net::UnixStream;
use tonic::transport::{Channel, Endpoint};
fn url_wants_wait_connect(url: &url::Url) -> bool {
url.query_pairs()
.filter(|(k, v)| k == "wait-connect" && v == "1")
.count()
> 0
}
/// Turn a [url::Url] to a [Channel] if it can be parsed successfully.
/// It supports `grpc+unix:/path/to/socket`, as well as the regular schemes supported
/// by tonic, for example `grpc+http://[::1]:8000`.
/// It supports wait-connect=1 as a URL parameter, in which case we don't connect lazily.
pub async fn channel_from_url(url: &url::Url) -> Result<Channel, self::Error> {
// Stringify the URL and remove the grpc+ prefix.
// We can't use `url.set_scheme(rest)`, as it disallows
// setting something http(s) that previously wasn't.
let unprefixed_url_str = match url.to_string().strip_prefix("grpc+") {
None => return Err(Error::MissingGRPCPrefix()),
Some(url_str) => url_str.to_owned(),
};
if url.scheme() == "grpc+unix" {
if url.host_str().is_some() {
return Err(Error::HostSetForUnixSocket());
}
let connector = tower::service_fn({
let url = url.clone();
move |_: tonic::transport::Uri| UnixStream::connect(url.path().to_string().clone())
});
let channel = if url_wants_wait_connect(url) {
Endpoint::from_static("http://[::]:50051")
.connect_with_connector(connector)
.await?
} else {
Endpoint::from_static("http://[::]:50051").connect_with_connector_lazy(connector)
};
return Ok(channel);
}
// ensure path is empty, not supported with gRPC.
if !url.path().is_empty() {
return Err(Error::PathMayNotBeSet());
}
// Use the regular tonic transport::Endpoint logic, but unprefixed_url_str,
// as tonic doesn't know about grpc+http[s].
let endpoint = Endpoint::try_from(unprefixed_url_str)?;
let channel = if url_wants_wait_connect(url) {
endpoint.connect().await?
} else {
endpoint.connect_lazy()
};
Ok(channel)
}
/// Errors occuring when trying to connect to a backend
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("grpc+ prefix is missing from URL")]
MissingGRPCPrefix(),
#[error("host may not be set for unix domain sockets")]
HostSetForUnixSocket(),
#[error("path may not be set")]
PathMayNotBeSet(),
#[error("transport error: {0}")]
TransportError(tonic::transport::Error),
}
impl From<tonic::transport::Error> for Error {
fn from(value: tonic::transport::Error) -> Self {
Self::TransportError(value)
}
}
#[cfg(test)]
mod tests {
use super::channel_from_url;
use test_case::test_case;
use url::Url;
/// Correct scheme to connect to a unix socket.
#[test_case("grpc+unix:///path/to/somewhere", true; "grpc valid unix socket")]
/// Connecting with wait-connect set to 0 succeeds, as that's the default.
#[test_case("grpc+unix:///path/to/somewhere?wait-connect=0", true; "grpc valid unix wait-connect=0")]
/// Connecting with wait-connect set to 1 fails, as the path doesn't exist.
#[test_case("grpc+unix:///path/to/somewhere?wait-connect=1", false; "grpc valid unix wait-connect=1")]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
#[test_case("grpc+unix://host.example/path/to/somewhere", false; "grpc invalid unix socket and host")]
/// Correct scheme to connect to localhost, with port 12345
#[test_case("grpc+http://[::1]:12345", true; "grpc valid IPv6 localhost port 12345")]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[test_case("grpc+http://localhost", true; "grpc valid http host without port")]
/// Correct scheme to connect to localhost over http, without specifying a port.
#[test_case("grpc+https://localhost", true; "grpc valid https host without port")]
/// Correct scheme to connect to localhost over http, but with additional path, which is invalid.
#[test_case("grpc+http://localhost/some-path", false; "grpc valid invalid host and path")]
/// Connecting with wait-connect set to 0 succeeds, as that's the default.
#[test_case("grpc+http://localhost?wait-connect=0", true; "grpc valid host wait-connect=0")]
/// Connecting with wait-connect set to 1 fails, as the host doesn't exist.
#[test_case("grpc+http://nonexist.invalid?wait-connect=1", false; "grpc valid host wait-connect=1")]
#[tokio::test]
async fn test_from_addr_tokio(uri_str: &str, is_ok: bool) {
let url = Url::parse(uri_str).expect("must parse");
assert_eq!(channel_from_url(&url).await.is_ok(), is_ok)
}
}

View file

@ -193,13 +193,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
path_info_service_addr,
} => {
// initialize stores
let blob_service = blobservice::from_addr(&blob_service_addr)?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr).await?;
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
)
.await?;
let listen_address = listen_address
.unwrap_or_else(|| "[::]:8000".to_string())
@ -247,13 +248,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
path_info_service_addr,
} => {
// FUTUREWORK: allow flat for single files?
let blob_service = blobservice::from_addr(&blob_service_addr)?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr).await?;
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
)
.await?;
let tasks = paths
.into_iter()
@ -343,13 +345,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
list_root,
threads,
} => {
let blob_service = blobservice::from_addr(&blob_service_addr)?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr).await?;
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
)
.await?;
let mut fuse_daemon = tokio::task::spawn_blocking(move || {
let f = TvixStoreFs::new(
@ -383,13 +386,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
path_info_service_addr,
list_root,
} => {
let blob_service = blobservice::from_addr(&blob_service_addr)?;
let directory_service = directoryservice::from_addr(&directory_service_addr)?;
let blob_service = blobservice::from_addr(&blob_service_addr).await?;
let directory_service = directoryservice::from_addr(&directory_service_addr).await?;
let path_info_service = pathinfoservice::from_addr(
&path_info_service_addr,
blob_service.clone(),
directory_service.clone(),
)?;
)
.await?;
tokio::task::spawn_blocking(move || {
let fs = TvixStoreFs::new(

View file

@ -23,7 +23,7 @@ use url::Url;
///
/// As the [PathInfoService] needs to talk to [BlobService] and [DirectoryService],
/// these also need to be passed in.
pub fn from_addr(
pub async fn from_addr(
uri: &str,
blob_service: Arc<dyn BlobService>,
directory_service: Arc<dyn DirectoryService>,
@ -68,7 +68,7 @@ pub fn from_addr(
// - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url.
let client = PathInfoServiceClient::new(tvix_castore::channel::from_url(&url)?);
let client = PathInfoServiceClient::new(tvix_castore::tonic::channel_from_url(&url).await?);
Arc::new(GRPCPathInfoService::from_client(client))
} else {
Err(Error::StorageError(format!(
@ -91,6 +91,8 @@ mod tests {
static ref TMPDIR_SLED_2: TempDir = TempDir::new().unwrap();
}
// the gRPC tests below don't fail, because we connect lazily.
/// This uses a unsupported scheme.
#[test_case("http://foo.example/test", false; "unsupported scheme")]
/// This configures sled in temporary mode.
@ -111,15 +113,6 @@ mod tests {
#[test_case("memory:///", false; "memory invalid root path")]
/// This sets a memory url path to "/foo", which is invalid.
#[test_case("memory:///foo", false; "memory invalid root path foo")]
fn test_from_addr(uri_str: &str, is_ok: bool) {
assert_eq!(
from_addr(uri_str, gen_blob_service(), gen_directory_service()).is_ok(),
is_ok
)
}
// the gRPC tests below don't fail, because we connect lazily.
/// Correct scheme to connect to a unix socket.
#[test_case("grpc+unix:///path/to/somewhere", true; "grpc valid unix socket")]
/// Correct scheme for unix socket, but setting a host too, which is invalid.
@ -135,7 +128,9 @@ mod tests {
#[tokio::test]
async fn test_from_addr_tokio(uri_str: &str, is_ok: bool) {
assert_eq!(
from_addr(uri_str, gen_blob_service(), gen_directory_service()).is_ok(),
from_addr(uri_str, gen_blob_service(), gen_directory_service())
.await
.is_ok(),
is_ok
)
}

View file

@ -180,7 +180,9 @@ mod tests {
let url = url::Url::parse(&format!("grpc+unix://{}", socket_path.display()))
.expect("must parse");
let client = PathInfoServiceClient::new(
tvix_castore::channel::from_url(&url).expect("must succeed"),
tvix_castore::tonic::channel_from_url(&url)
.await
.expect("must succeed"),
);
GRPCPathInfoService::from_client(client)