feat(tvix/tracing): gRPC trace context propagation

This introduces optional helper function in tvix/tracing for trace
propagation and uses these helper in the `tvix-store`.

The GRPCBlobService, GRPCDirectoryService and GRPCPathInfoService now
accept a generic client, meaning the client can be generated with either
`::new` or `::with_interceptor`.

This was tested and validated by starting a `tvix-store daemon` and
`tvix-store import`.

Change-Id: I4b194483bf09266820104b4b56e4a135dca2b77a
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11863
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Simon Hauser 2024-06-20 11:39:09 +02:00
parent 2b20d8d82d
commit 639a00e2ab
18 changed files with 399 additions and 48 deletions

41
tvix/Cargo.lock generated
View file

@ -1543,6 +1543,12 @@ dependencies = [
"pin-project-lite", "pin-project-lite",
] ]
[[package]]
name = "http-range-header"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "add0ab9360ddbd88cfeb3bd9574a1d85cfdfa14db10b3e21d3700dbc4328758f"
[[package]] [[package]]
name = "httparse" name = "httparse"
version = "1.8.0" version = "1.8.0"
@ -2276,6 +2282,18 @@ dependencies = [
"urlencoding", "urlencoding",
] ]
[[package]]
name = "opentelemetry-http"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7690dc77bf776713848c4faa6501157469017eaf332baccd4eb1cea928743d94"
dependencies = [
"async-trait",
"bytes",
"http",
"opentelemetry",
]
[[package]] [[package]]
name = "opentelemetry-otlp" name = "opentelemetry-otlp"
version = "0.15.0" version = "0.15.0"
@ -4032,6 +4050,25 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "tower-http"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c5bb1d698276a2443e5ecfabc1008bf15a36c12e6a7176e7bf089ea9131140"
dependencies = [
"bitflags 2.4.2",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-range-header",
"pin-project-lite",
"tower-layer",
"tower-service",
"tracing",
]
[[package]] [[package]]
name = "tower-layer" name = "tower-layer"
version = "0.3.2" version = "0.3.2"
@ -4430,6 +4467,7 @@ dependencies = [
"tonic-build", "tonic-build",
"tonic-reflection", "tonic-reflection",
"tower", "tower",
"tower-http",
"tracing", "tracing",
"tracing-indicatif", "tracing-indicatif",
"tvix-castore", "tvix-castore",
@ -4442,13 +4480,16 @@ dependencies = [
name = "tvix-tracing" name = "tvix-tracing"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"http",
"indicatif", "indicatif",
"lazy_static", "lazy_static",
"opentelemetry", "opentelemetry",
"opentelemetry-http",
"opentelemetry-otlp", "opentelemetry-otlp",
"opentelemetry_sdk", "opentelemetry_sdk",
"thiserror", "thiserror",
"tokio", "tokio",
"tonic",
"tracing", "tracing",
"tracing-indicatif", "tracing-indicatif",
"tracing-opentelemetry", "tracing-opentelemetry",

View file

@ -4653,6 +4653,13 @@ rec {
]; ];
}; };
"http-range-header" = rec {
crateName = "http-range-header";
version = "0.3.1";
edition = "2018";
sha256 = "13vm511vq3bhschkw2xi9nhxzkw53m55gn9vxg7qigfxc29spl5d";
features = { };
};
"httparse" = rec { "httparse" = rec {
crateName = "httparse"; crateName = "httparse";
version = "1.8.0"; version = "1.8.0";
@ -6928,6 +6935,39 @@ rec {
}; };
resolvedDefaultFeatures = [ "default" "metrics" "pin-project-lite" "trace" ]; resolvedDefaultFeatures = [ "default" "metrics" "pin-project-lite" "trace" ];
}; };
"opentelemetry-http" = rec {
crateName = "opentelemetry-http";
version = "0.11.1";
edition = "2021";
sha256 = "151xfhlakkmi9v6sqarkmxz02sbl2l0nbajgij216rvppxvxr43n";
dependencies = [
{
name = "async-trait";
packageId = "async-trait";
}
{
name = "bytes";
packageId = "bytes";
}
{
name = "http";
packageId = "http";
usesDefaultFeatures = false;
}
{
name = "opentelemetry";
packageId = "opentelemetry";
features = [ "trace" ];
}
];
features = {
"hyper" = [ "dep:hyper" ];
"isahc" = [ "dep:isahc" ];
"reqwest" = [ "dep:reqwest" ];
"reqwest-rustls" = [ "reqwest" "reqwest/rustls-tls-native-roots" ];
"tokio" = [ "dep:tokio" ];
};
};
"opentelemetry-otlp" = rec { "opentelemetry-otlp" = rec {
crateName = "opentelemetry-otlp"; crateName = "opentelemetry-otlp";
version = "0.15.0"; version = "0.15.0";
@ -12569,6 +12609,106 @@ rec {
}; };
resolvedDefaultFeatures = [ "__common" "balance" "buffer" "default" "discover" "futures-core" "futures-util" "indexmap" "limit" "load" "log" "make" "pin-project" "pin-project-lite" "rand" "ready-cache" "slab" "timeout" "tokio" "tokio-util" "tracing" "util" ]; resolvedDefaultFeatures = [ "__common" "balance" "buffer" "default" "discover" "futures-core" "futures-util" "indexmap" "limit" "load" "log" "make" "pin-project" "pin-project-lite" "rand" "ready-cache" "slab" "timeout" "tokio" "tokio-util" "tracing" "util" ];
}; };
"tower-http" = rec {
crateName = "tower-http";
version = "0.4.4";
edition = "2018";
sha256 = "0h0i2flrw25zwxv72sifq4v5mwcb030spksy7r2a4xl2d4fvpib1";
authors = [
"Tower Maintainers <team@tower-rs.com>"
];
dependencies = [
{
name = "bitflags";
packageId = "bitflags 2.4.2";
}
{
name = "bytes";
packageId = "bytes";
}
{
name = "futures-core";
packageId = "futures-core";
}
{
name = "futures-util";
packageId = "futures-util";
usesDefaultFeatures = false;
}
{
name = "http";
packageId = "http";
}
{
name = "http-body";
packageId = "http-body";
}
{
name = "http-range-header";
packageId = "http-range-header";
}
{
name = "pin-project-lite";
packageId = "pin-project-lite";
}
{
name = "tower-layer";
packageId = "tower-layer";
}
{
name = "tower-service";
packageId = "tower-service";
}
{
name = "tracing";
packageId = "tracing";
optional = true;
usesDefaultFeatures = false;
}
];
devDependencies = [
{
name = "bytes";
packageId = "bytes";
}
];
features = {
"async-compression" = [ "dep:async-compression" ];
"auth" = [ "base64" "validate-request" ];
"base64" = [ "dep:base64" ];
"catch-panic" = [ "tracing" "futures-util/std" ];
"compression-br" = [ "async-compression/brotli" "tokio-util" "tokio" ];
"compression-deflate" = [ "async-compression/zlib" "tokio-util" "tokio" ];
"compression-full" = [ "compression-br" "compression-deflate" "compression-gzip" "compression-zstd" ];
"compression-gzip" = [ "async-compression/gzip" "tokio-util" "tokio" ];
"compression-zstd" = [ "async-compression/zstd" "tokio-util" "tokio" ];
"decompression-br" = [ "async-compression/brotli" "tokio-util" "tokio" ];
"decompression-deflate" = [ "async-compression/zlib" "tokio-util" "tokio" ];
"decompression-full" = [ "decompression-br" "decompression-deflate" "decompression-gzip" "decompression-zstd" ];
"decompression-gzip" = [ "async-compression/gzip" "tokio-util" "tokio" ];
"decompression-zstd" = [ "async-compression/zstd" "tokio-util" "tokio" ];
"follow-redirect" = [ "iri-string" "tower/util" ];
"fs" = [ "tokio/fs" "tokio-util/io" "tokio/io-util" "mime_guess" "mime" "percent-encoding" "httpdate" "set-status" "futures-util/alloc" "tracing" ];
"full" = [ "add-extension" "auth" "catch-panic" "compression-full" "cors" "decompression-full" "follow-redirect" "fs" "limit" "map-request-body" "map-response-body" "metrics" "normalize-path" "propagate-header" "redirect" "request-id" "sensitive-headers" "set-header" "set-status" "timeout" "trace" "util" "validate-request" ];
"httpdate" = [ "dep:httpdate" ];
"iri-string" = [ "dep:iri-string" ];
"metrics" = [ "tokio/time" ];
"mime" = [ "dep:mime" ];
"mime_guess" = [ "dep:mime_guess" ];
"percent-encoding" = [ "dep:percent-encoding" ];
"request-id" = [ "uuid" ];
"timeout" = [ "tokio/time" ];
"tokio" = [ "dep:tokio" ];
"tokio-util" = [ "dep:tokio-util" ];
"tower" = [ "dep:tower" ];
"trace" = [ "tracing" ];
"tracing" = [ "dep:tracing" ];
"util" = [ "tower" ];
"uuid" = [ "dep:uuid" ];
"validate-request" = [ "mime" ];
};
resolvedDefaultFeatures = [ "default" "trace" "tracing" ];
};
"tower-layer" = rec { "tower-layer" = rec {
crateName = "tower-layer"; crateName = "tower-layer";
version = "0.3.2"; version = "0.3.2";
@ -13385,6 +13525,7 @@ rec {
{ {
name = "tvix-tracing"; name = "tvix-tracing";
packageId = "tvix-tracing"; packageId = "tvix-tracing";
features = [ "tonic" ];
} }
{ {
name = "url"; name = "url";
@ -14142,6 +14283,11 @@ rec {
name = "tower"; name = "tower";
packageId = "tower"; packageId = "tower";
} }
{
name = "tower-http";
packageId = "tower-http";
features = [ "trace" ];
}
{ {
name = "tracing"; name = "tracing";
packageId = "tracing"; packageId = "tracing";
@ -14157,6 +14303,7 @@ rec {
{ {
name = "tvix-tracing"; name = "tvix-tracing";
packageId = "tvix-tracing"; packageId = "tvix-tracing";
features = [ "tonic" ];
} }
{ {
name = "url"; name = "url";
@ -14221,6 +14368,11 @@ rec {
then lib.cleanSourceWith { filter = sourceFilter; src = ./tracing; } then lib.cleanSourceWith { filter = sourceFilter; src = ./tracing; }
else ./tracing; else ./tracing;
dependencies = [ dependencies = [
{
name = "http";
packageId = "http";
optional = true;
}
{ {
name = "indicatif"; name = "indicatif";
packageId = "indicatif"; packageId = "indicatif";
@ -14234,6 +14386,11 @@ rec {
packageId = "opentelemetry"; packageId = "opentelemetry";
optional = true; optional = true;
} }
{
name = "opentelemetry-http";
packageId = "opentelemetry-http";
optional = true;
}
{ {
name = "opentelemetry-otlp"; name = "opentelemetry-otlp";
packageId = "opentelemetry-otlp"; packageId = "opentelemetry-otlp";
@ -14254,6 +14411,11 @@ rec {
packageId = "tokio"; packageId = "tokio";
features = [ "sync" "rt" ]; features = [ "sync" "rt" ];
} }
{
name = "tonic";
packageId = "tonic";
optional = true;
}
{ {
name = "tracing"; name = "tracing";
packageId = "tracing"; packageId = "tracing";
@ -14281,10 +14443,11 @@ rec {
} }
]; ];
features = { features = {
"otlp" = [ "dep:tracing-opentelemetry" "dep:opentelemetry" "dep:opentelemetry-otlp" "dep:opentelemetry_sdk" ]; "otlp" = [ "dep:tracing-opentelemetry" "dep:opentelemetry" "dep:opentelemetry-otlp" "dep:opentelemetry_sdk" "dep:opentelemetry-http" ];
"tonic" = [ "dep:tonic" "dep:http" ];
"tracy" = [ "dep:tracing-tracy" ]; "tracy" = [ "dep:tracing-tracy" ];
}; };
resolvedDefaultFeatures = [ "default" "otlp" "tracy" ]; resolvedDefaultFeatures = [ "default" "otlp" "tonic" "tracy" ];
}; };
"typenum" = rec { "typenum" = rec {
crateName = "typenum"; crateName = "typenum";

View file

@ -29,7 +29,7 @@ tonic = "0.11.0"
tower = "0.4.13" tower = "0.4.13"
tracing = "0.1.37" tracing = "0.1.37"
tracing-indicatif = "0.3.6" tracing-indicatif = "0.3.6"
tvix-tracing = { path = "../tracing" } tvix-tracing = { path = "../tracing", features = ["tonic"] }
url = "2.4.0" url = "2.4.0"
walkdir = "2.4.0" walkdir = "2.4.0"
zstd = "0.13.0" zstd = "0.13.0"

View file

@ -30,8 +30,12 @@ pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error>
// - In the case of unix sockets, there must be a path, but may not be a host. // - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path. // - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url. // Constructing the channel is handled by tvix_castore::channel::from_url.
let client = BlobServiceClient::new(crate::tonic::channel_from_url(&url).await?); Box::new(GRPCBlobService::from_client(
Box::new(GRPCBlobService::from_client(client)) BlobServiceClient::with_interceptor(
crate::tonic::channel_from_url(&url).await?,
tvix_tracing::propagate::tonic::send_trace,
),
))
} }
scheme if scheme.starts_with("objectstore+") => { scheme if scheme.starts_with("objectstore+") => {
// We need to convert the URL to string, strip the prefix there, and then // We need to convert the URL to string, strip the prefix there, and then

View file

@ -17,29 +17,39 @@ use tokio_util::{
io::{CopyToBytes, SinkWriter}, io::{CopyToBytes, SinkWriter},
sync::PollSender, sync::PollSender,
}; };
use tonic::{async_trait, transport::Channel, Code, Status}; use tonic::{async_trait, Code, Status};
use tracing::{instrument, Instrument as _}; use tracing::{instrument, Instrument as _};
/// Connects to a (remote) tvix-store BlobService over gRPC. /// Connects to a (remote) tvix-store BlobService over gRPC.
#[derive(Clone)] #[derive(Clone)]
pub struct GRPCBlobService { pub struct GRPCBlobService<T>
where
T: Clone,
{
/// The internal reference to a gRPC client. /// The internal reference to a gRPC client.
/// Cloning it is cheap, and it internally handles concurrent requests. /// Cloning it is cheap, and it internally handles concurrent requests.
grpc_client: proto::blob_service_client::BlobServiceClient<Channel>, grpc_client: proto::blob_service_client::BlobServiceClient<T>,
} }
impl GRPCBlobService { impl<T> GRPCBlobService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Clone,
{
/// construct a [GRPCBlobService] from a [proto::blob_service_client::BlobServiceClient]. /// construct a [GRPCBlobService] from a [proto::blob_service_client::BlobServiceClient].
/// panics if called outside the context of a tokio runtime. /// panics if called outside the context of a tokio runtime.
pub fn from_client( pub fn from_client(grpc_client: proto::blob_service_client::BlobServiceClient<T>) -> Self {
grpc_client: proto::blob_service_client::BlobServiceClient<Channel>,
) -> Self {
Self { grpc_client } Self { grpc_client }
} }
} }
#[async_trait] #[async_trait]
impl BlobService for GRPCBlobService { impl<T> BlobService for GRPCBlobService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Send + Sync + Clone + 'static,
T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
<T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError> + Send,
T::Future: Send,
{
#[instrument(skip(self, digest), fields(blob.digest=%digest))] #[instrument(skip(self, digest), fields(blob.digest=%digest))]
async fn has(&self, digest: &B3Digest) -> io::Result<bool> { async fn has(&self, digest: &B3Digest) -> io::Result<bool> {
let mut grpc_client = self.grpc_client.clone(); let mut grpc_client = self.grpc_client.clone();
@ -337,7 +347,6 @@ mod tests {
.await .await
.expect("must succeed"), .expect("must succeed"),
); );
GRPCBlobService::from_client(client) GRPCBlobService::from_client(client)
}; };

View file

@ -63,8 +63,12 @@ pub async fn from_addr(uri: &str) -> Result<Box<dyn DirectoryService>, crate::Er
// - In the case of unix sockets, there must be a path, but may not be a host. // - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path. // - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url. // Constructing the channel is handled by tvix_castore::channel::from_url.
let client = DirectoryServiceClient::new(crate::tonic::channel_from_url(&url).await?); Box::new(GRPCDirectoryService::from_client(
Box::new(GRPCDirectoryService::from_client(client)) DirectoryServiceClient::with_interceptor(
crate::tonic::channel_from_url(&url).await?,
tvix_tracing::propagate::tonic::send_trace,
),
))
} }
scheme if scheme.starts_with("objectstore+") => { scheme if scheme.starts_with("objectstore+") => {
// We need to convert the URL to string, strip the prefix there, and then // We need to convert the URL to string, strip the prefix there, and then

View file

@ -9,31 +9,41 @@ use tokio::spawn;
use tokio::sync::mpsc::UnboundedSender; use tokio::sync::mpsc::UnboundedSender;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio_stream::wrappers::UnboundedReceiverStream; use tokio_stream::wrappers::UnboundedReceiverStream;
use tonic::async_trait; use tonic::{async_trait, Code, Status};
use tonic::Code;
use tonic::{transport::Channel, Status};
use tracing::{instrument, warn, Instrument as _}; use tracing::{instrument, warn, Instrument as _};
/// Connects to a (remote) tvix-store DirectoryService over gRPC. /// Connects to a (remote) tvix-store DirectoryService over gRPC.
#[derive(Clone)] #[derive(Clone)]
pub struct GRPCDirectoryService { pub struct GRPCDirectoryService<T>
where
T: Clone,
{
/// The internal reference to a gRPC client. /// The internal reference to a gRPC client.
/// Cloning it is cheap, and it internally handles concurrent requests. /// Cloning it is cheap, and it internally handles concurrent requests.
grpc_client: proto::directory_service_client::DirectoryServiceClient<Channel>, grpc_client: proto::directory_service_client::DirectoryServiceClient<T>,
} }
impl GRPCDirectoryService { impl<T> GRPCDirectoryService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Clone,
{
/// construct a [GRPCDirectoryService] from a [proto::directory_service_client::DirectoryServiceClient]. /// construct a [GRPCDirectoryService] from a [proto::directory_service_client::DirectoryServiceClient].
/// panics if called outside the context of a tokio runtime. /// panics if called outside the context of a tokio runtime.
pub fn from_client( pub fn from_client(
grpc_client: proto::directory_service_client::DirectoryServiceClient<Channel>, grpc_client: proto::directory_service_client::DirectoryServiceClient<T>,
) -> Self { ) -> Self {
Self { grpc_client } Self { grpc_client }
} }
} }
#[async_trait] #[async_trait]
impl DirectoryService for GRPCDirectoryService { impl<T> DirectoryService for GRPCDirectoryService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Send + Sync + Clone + 'static,
T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
<T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError> + Send,
T::Future: Send,
{
#[instrument(level = "trace", skip_all, fields(directory.digest = %digest))] #[instrument(level = "trace", skip_all, fields(directory.digest = %digest))]
async fn get( async fn get(
&self, &self,

View file

@ -233,9 +233,6 @@ logs etc, but this is something requiring a lot of designing.
- Maybe drop `--log-level` entirely, and only use `RUST_LOG` env exclusively? - Maybe drop `--log-level` entirely, and only use `RUST_LOG` env exclusively?
`debug`,`trace` level across all crates is a bit useless, and `RUST_LOG` can `debug`,`trace` level across all crates is a bit useless, and `RUST_LOG` can
be much more granular… be much more granular…
- gRPC trace propagation (cl/10532 + @simon)
We need to wire trace propagation into our gRPC clients, so if we collect
traces both for the client and server they will be connected.
- Fix OTLP sending batches on shutdown. - Fix OTLP sending batches on shutdown.
It seems for short-lived CLI invocations we don't end up receiving all spans. It seems for short-lived CLI invocations we don't end up receiving all spans.
Ensure we flush these on ctrl-c, and regular process termination. Ensure we flush these on ctrl-c, and regular process termination.

View file

@ -31,13 +31,14 @@ tokio-stream = { version = "0.1.14", features = ["fs"] }
tokio-util = { version = "0.7.9", features = ["io", "io-util", "compat"] } tokio-util = { version = "0.7.9", features = ["io", "io-util", "compat"] }
tonic = { version = "0.11.0", features = ["tls", "tls-roots"] } tonic = { version = "0.11.0", features = ["tls", "tls-roots"] }
tower = "0.4.13" tower = "0.4.13"
tower-http = { version = "0.4.4", features = ["trace"] }
tvix-castore = { path = "../castore" } tvix-castore = { path = "../castore" }
url = "2.4.0" url = "2.4.0"
walkdir = "2.4.0" walkdir = "2.4.0"
reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots", "stream"], default-features = false } reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots", "stream"], default-features = false }
lru = "0.12.3" lru = "0.12.3"
parking_lot = "0.12.2" parking_lot = "0.12.2"
tvix-tracing = { path = "../tracing" } tvix-tracing = { path = "../tracing", features = ["tonic"] }
tracing = "0.1.40" tracing = "0.1.40"
tracing-indicatif = "0.3.6" tracing-indicatif = "0.3.6"

View file

@ -13,6 +13,8 @@ use tokio_listener::Listener;
use tokio_listener::SystemOptions; use tokio_listener::SystemOptions;
use tokio_listener::UserOptions; use tokio_listener::UserOptions;
use tonic::transport::Server; use tonic::transport::Server;
use tower::ServiceBuilder;
use tower_http::trace::{DefaultMakeSpan, TraceLayer};
use tracing::{info, info_span, instrument, Level, Span}; use tracing::{info, info_span, instrument, Level, Span};
use tracing_indicatif::span_ext::IndicatifSpanExt as _; use tracing_indicatif::span_ext::IndicatifSpanExt as _;
use tvix_castore::import::fs::ingest_path; use tvix_castore::import::fs::ingest_path;
@ -215,7 +217,17 @@ async fn run_cli(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
.parse() .parse()
.unwrap(); .unwrap();
let mut server = Server::builder(); let mut server = Server::builder().layer(
ServiceBuilder::new()
.layer(
TraceLayer::new_for_grpc().make_span_with(
DefaultMakeSpan::new()
.level(Level::INFO)
.include_headers(true),
),
)
.map_request(tvix_tracing::propagate::tonic::accept_trace),
);
#[allow(unused_mut)] #[allow(unused_mut)]
let mut router = server let mut router = server

View file

@ -105,9 +105,12 @@ pub async fn from_addr(
// - In the case of unix sockets, there must be a path, but may not be a host. // - In the case of unix sockets, there must be a path, but may not be a host.
// - In the case of non-unix sockets, there must be a host, but no path. // - In the case of non-unix sockets, there must be a host, but no path.
// Constructing the channel is handled by tvix_castore::channel::from_url. // Constructing the channel is handled by tvix_castore::channel::from_url.
let client = Box::new(GRPCPathInfoService::from_client(
PathInfoServiceClient::new(tvix_castore::tonic::channel_from_url(&url).await?); PathInfoServiceClient::with_interceptor(
Box::new(GRPCPathInfoService::from_client(client)) tvix_castore::tonic::channel_from_url(&url).await?,
tvix_tracing::propagate::tonic::send_trace,
),
))
} }
#[cfg(feature = "cloud")] #[cfg(feature = "cloud")]
"bigtable" => { "bigtable" => {

View file

@ -6,31 +6,43 @@ use crate::{
use async_stream::try_stream; use async_stream::try_stream;
use futures::stream::BoxStream; use futures::stream::BoxStream;
use nix_compat::nixbase32; use nix_compat::nixbase32;
use tonic::{async_trait, transport::Channel, Code}; use tonic::{async_trait, Code};
use tracing::{instrument, Span}; use tracing::{instrument, Span};
use tracing_indicatif::span_ext::IndicatifSpanExt; use tracing_indicatif::span_ext::IndicatifSpanExt;
use tvix_castore::{proto as castorepb, Error}; use tvix_castore::{proto as castorepb, Error};
/// Connects to a (remote) tvix-store PathInfoService over gRPC. /// Connects to a (remote) tvix-store PathInfoService over gRPC.
#[derive(Clone)] #[derive(Clone)]
pub struct GRPCPathInfoService { pub struct GRPCPathInfoService<T>
where
T: Clone,
{
/// The internal reference to a gRPC client. /// The internal reference to a gRPC client.
/// Cloning it is cheap, and it internally handles concurrent requests. /// Cloning it is cheap, and it internally handles concurrent requests.
grpc_client: proto::path_info_service_client::PathInfoServiceClient<Channel>, grpc_client: proto::path_info_service_client::PathInfoServiceClient<T>,
} }
impl GRPCPathInfoService { impl<T> GRPCPathInfoService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Clone,
{
/// construct a [GRPCPathInfoService] from a [proto::path_info_service_client::PathInfoServiceClient]. /// construct a [GRPCPathInfoService] from a [proto::path_info_service_client::PathInfoServiceClient].
/// panics if called outside the context of a tokio runtime. /// panics if called outside the context of a tokio runtime.
pub fn from_client( pub fn from_client(
grpc_client: proto::path_info_service_client::PathInfoServiceClient<Channel>, grpc_client: proto::path_info_service_client::PathInfoServiceClient<T>,
) -> Self { ) -> Self {
Self { grpc_client } Self { grpc_client }
} }
} }
#[async_trait] #[async_trait]
impl PathInfoService for GRPCPathInfoService { impl<T> PathInfoService for GRPCPathInfoService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Send + Sync + Clone + 'static,
T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
<T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError> + Send,
T::Future: Send,
{
#[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))]
async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> {
let path_info = self let path_info = self
@ -107,7 +119,13 @@ impl PathInfoService for GRPCPathInfoService {
} }
#[async_trait] #[async_trait]
impl NarCalculationService for GRPCPathInfoService { impl<T> NarCalculationService for GRPCPathInfoService<T>
where
T: tonic::client::GrpcService<tonic::body::BoxBody> + Send + Sync + Clone + 'static,
T::ResponseBody: tonic::codegen::Body<Data = tonic::codegen::Bytes> + Send + 'static,
<T::ResponseBody as tonic::codegen::Body>::Error: Into<tonic::codegen::StdError> + Send,
T::Future: Send,
{
#[instrument(level = "trace", skip_all, fields(root_node = ?root_node, indicatif.pb_show=1))] #[instrument(level = "trace", skip_all, fields(root_node = ?root_node, indicatif.pb_show=1))]
async fn calculate_nar( async fn calculate_nar(
&self, &self,

View file

@ -16,8 +16,11 @@ use crate::{
/// Constructs and returns a gRPC PathInfoService. /// Constructs and returns a gRPC PathInfoService.
/// We also return memory-based {Blob,Directory}Service, /// We also return memory-based {Blob,Directory}Service,
/// as the consumer of this function accepts a 3-tuple. /// as the consumer of this function accepts a 3-tuple.
pub async fn make_grpc_path_info_service_client( pub async fn make_grpc_path_info_service_client() -> (
) -> (impl BlobService, impl DirectoryService, GRPCPathInfoService) { impl BlobService,
impl DirectoryService,
GRPCPathInfoService<tonic::transport::Channel>,
) {
let (left, right) = tokio::io::duplex(64); let (left, right) = tokio::io::duplex(64);
let blob_service = blob_service(); let blob_service = blob_service();

View file

@ -51,12 +51,14 @@ pub async fn construct_services(
.map_err(|e| io::Error::other(e.to_string()))?; .map_err(|e| io::Error::other(e.to_string()))?;
if url.scheme().starts_with("grpc+") { if url.scheme().starts_with("grpc+") {
let client = PathInfoServiceClient::new( Box::new(GRPCPathInfoService::from_client(
PathInfoServiceClient::with_interceptor(
tvix_castore::tonic::channel_from_url(&url) tvix_castore::tonic::channel_from_url(&url)
.await .await
.map_err(|e| io::Error::other(e.to_string()))?, .map_err(|e| io::Error::other(e.to_string()))?,
); tvix_tracing::propagate::tonic::send_trace,
Box::new(GRPCPathInfoService::from_client(client)) ),
))
} else { } else {
Box::new(SimpleRenderer::new( Box::new(SimpleRenderer::new(
blob_service.clone(), blob_service.clone(),

View file

@ -17,6 +17,10 @@ opentelemetry = { version = "0.22.0", optional = true }
opentelemetry-otlp = { version = "0.15.0", optional = true } opentelemetry-otlp = { version = "0.15.0", optional = true }
opentelemetry_sdk = { version = "0.22.1", features = ["rt-tokio"], optional = true } opentelemetry_sdk = { version = "0.22.1", features = ["rt-tokio"], optional = true }
tracing-tracy = { version = "0.11.0", features = ["flush-on-exit"], optional = true } tracing-tracy = { version = "0.11.0", features = ["flush-on-exit"], optional = true }
opentelemetry-http = { version = "0.11.0", optional = true }
tonic = { version = "0.11.0", optional = true }
http = { version = "0.2.11", optional = true }
[features] [features]
default = [] default = []
@ -24,11 +28,16 @@ otlp = [
"dep:tracing-opentelemetry", "dep:tracing-opentelemetry",
"dep:opentelemetry", "dep:opentelemetry",
"dep:opentelemetry-otlp", "dep:opentelemetry-otlp",
"dep:opentelemetry_sdk" "dep:opentelemetry_sdk",
"dep:opentelemetry-http"
] ]
tracy = [ tracy = [
"dep:tracing-tracy" "dep:tracing-tracy"
] ]
tonic = [
"dep:tonic",
"dep:http",
]
[lints] [lints]
workspace = true workspace = true

View file

@ -9,6 +9,7 @@ use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilte
use opentelemetry::{trace::Tracer, KeyValue}; use opentelemetry::{trace::Tracer, KeyValue};
#[cfg(feature = "otlp")] #[cfg(feature = "otlp")]
use opentelemetry_sdk::{ use opentelemetry_sdk::{
propagation::TraceContextPropagator,
resource::{ResourceDetector, SdkProvidedResourceDetector}, resource::{ResourceDetector, SdkProvidedResourceDetector},
trace::BatchConfigBuilder, trace::BatchConfigBuilder,
Resource, Resource,
@ -16,6 +17,9 @@ use opentelemetry_sdk::{
#[cfg(feature = "tracy")] #[cfg(feature = "tracy")]
use tracing_tracy::TracyLayer; use tracing_tracy::TracyLayer;
#[cfg(feature = "tonic")] // TODO or http
pub mod propagate;
lazy_static! { lazy_static! {
pub static ref PB_PROGRESS_STYLE: ProgressStyle = ProgressStyle::with_template( pub static ref PB_PROGRESS_STYLE: ProgressStyle = ProgressStyle::with_template(
"{span_child_prefix} {wide_msg} {bar:10} ({elapsed}) {pos:>7}/{len:7}" "{span_child_prefix} {wide_msg} {bar:10} ({elapsed}) {pos:>7}/{len:7}"
@ -186,6 +190,9 @@ impl TracingBuilder {
#[cfg(feature = "otlp")] #[cfg(feature = "otlp")]
{ {
if let Some(service_name) = self.service_name { if let Some(service_name) = self.service_name {
// register a text map propagator for trace propagation
opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());
let (tracer, tx) = gen_otlp_tracer(service_name.to_string()); let (tracer, tx) = gen_otlp_tracer(service_name.to_string());
// Create a tracing layer with the configured tracer // Create a tracing layer with the configured tracer
let layer = tracing_opentelemetry::layer().with_tracer(tracer); let layer = tracing_opentelemetry::layer().with_tracer(tracer);

View file

@ -0,0 +1,9 @@
#[cfg(feature = "tonic")]
pub mod tonic;
// TODO: Helper library for reqwest. We could use
// https://github.com/TrueLayer/reqwest-middleware/tree/main/reqwest-tracing to realise this
// TODO: Helper library for axum or another http server, see
// https://github.com/hseeberger/hello-tracing-rs/blob/main/hello-tracing-common/src/otel/http.rs
// as an example and we can reuse tonic::accept_trace fun, at least for a tower::ServiceBuilder

View file

@ -0,0 +1,59 @@
use tonic::{
metadata::{MetadataKey, MetadataMap, MetadataValue},
Status,
};
use tracing::{warn, Span};
#[cfg(feature = "otlp")]
use opentelemetry::{global, propagation::Injector};
#[cfg(feature = "otlp")]
use opentelemetry_http::HeaderExtractor;
#[cfg(feature = "otlp")]
use tracing_opentelemetry::OpenTelemetrySpanExt;
/// Trace context propagation: associate the current span with the otlp trace of the given request,
/// if any and valid. This only sets the parent trace if the otlp feature is also enabled.
pub fn accept_trace<B>(request: http::Request<B>) -> http::Request<B> {
// we only extract and set a parent trace if otlp feature is enabled, otherwise this feature is
// an noop and we return the request as is
#[cfg(feature = "otlp")]
{
// Current context, if no or invalid data is received.
let parent_context = global::get_text_map_propagator(|propagator| {
propagator.extract(&HeaderExtractor(request.headers()))
});
Span::current().set_parent(parent_context);
}
request
}
#[cfg(feature = "otlp")]
struct MetadataInjector<'a>(&'a mut MetadataMap);
#[cfg(feature = "otlp")]
impl Injector for MetadataInjector<'_> {
fn set(&mut self, key: &str, value: String) {
match MetadataKey::from_bytes(key.as_bytes()) {
Ok(key) => match MetadataValue::try_from(&value) {
Ok(value) => {
self.0.insert(key, value);
}
Err(error) => warn!(value, error = format!("{error:#}"), "parse metadata value"),
},
Err(error) => warn!(key, error = format!("{error:#}"), "parse metadata key"),
}
}
}
/// Trace context propagation: send the trace context by injecting it into the metadata of the given
/// request. This only injects the current span if the otlp feature is also enabled.
pub fn send_trace<T>(mut request: tonic::Request<T>) -> Result<tonic::Request<T>, Status> {
#[cfg(feature = "otlp")]
{
global::get_text_map_propagator(|propagator| {
let context = Span::current().context();
propagator.inject_context(&context, &mut MetadataInjector(request.metadata_mut()))
});
}
Ok(request)
}