refactor(tvix/glue/register_in_path_info_service): return only PathInfo

The store path is already contained in the PathInfo, and the ca bits is
already passed into the function, so known to the caller - there's no
need to duplicate this.

We can also avoid having two separate block_on in our import builtin -
we already know the content hash before constructing, as we pass it in
via ca_hash.

There's still some room to unclutter some more of the code around
importing - we still do NAR calculation twice in some cases, and some of
the code might be share-able from other places producing PathInfo too.
Log a TODO for this cleanup.

Change-Id: I6a5fc427d15bc9293a396310143c7694dd2996c0
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12592
Reviewed-by: Marijan Petričević <marijan.petricevic94@gmail.com>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-10-11 01:23:00 +03:00 committed by flokli
parent 48fa320cf4
commit 6a116d5057
3 changed files with 42 additions and 33 deletions

View file

@ -77,6 +77,20 @@ correctness:
"some amount of outgoing bytes" in memory.
This is somewhat blocked until the {Chunk/Blob}Service split is done, as then
prefetching would only be a matter of adding it into the one `BlobReader`.
- The import builtins (`builtins.path` and `builtins.filterSource`) use(d) some
helper functions in TvixStoreIO that deals with constructing the `PathInfo`
structs and inserting them, but some of the abstractions where probably done
at the wrong layer:
- Some ways of importing calculate the NAR representation twice
- The code isn't very usable from other consumers that also create structs
`PathInfo`.
- `node_to_path_info` is ony called by `register_in_path_info_service` (due
to this marked as private for now).
Instead of fighting these abstractions, maybe it's best to explicitly add the
code back to the two builtins, remove redundant calls and calculations. A
later phase could then see how/if some of this can be reasonably factored out in
a way it's also usable for other places having a node and wanting to persist
it (if at all).
### Error cleanup
- Currently, all services use tvix_castore::Error, which only has two kinds

View file

@ -287,12 +287,6 @@ mod import_builtins {
}
};
let (path_info, _hash, output_path) = state.tokio_handle.block_on(async {
state
.node_to_path_info(name.as_ref(), path.as_ref(), &ca_hash, root_node)
.await
})?;
if let Some(expected_sha256) = expected_sha256 {
if *ca_hash.hash() != expected_sha256 {
Err(ImportError::HashMismatch(
@ -303,16 +297,20 @@ mod import_builtins {
}
}
state
let path_info = state
.tokio_handle
.block_on(async { state.path_info_service.as_ref().put(path_info).await })
.block_on(async {
state
.register_in_path_info_service(name.as_ref(), path.as_ref(), ca_hash, root_node)
.await
})
.map_err(|e| tvix_eval::ErrorKind::IO {
path: Some(path.to_path_buf()),
error: Rc::new(e.into()),
error: Rc::new(e),
})?;
// We need to attach context to the final output path.
let outpath = output_path.to_absolute_path();
let outpath = path_info.store_path.to_absolute_path();
Ok(
NixString::new_context_from(NixContextElement::Plain(outpath.clone()).into(), outpath)
@ -331,7 +329,7 @@ mod import_builtins {
let root_node = filtered_ingest(Rc::clone(&state), co, &p, Some(&filter)).await?;
let name = tvix_store::import::path_to_name(&p)?;
let outpath = state
let path_info = state
.tokio_handle
.block_on(async {
let (_, nar_sha256) = state
@ -341,10 +339,10 @@ mod import_builtins {
.await?;
state
.register_node_in_path_info_service(
.register_in_path_info_service(
name,
&p,
&CAHash::Nar(NixHash::Sha256(nar_sha256)),
CAHash::Nar(NixHash::Sha256(nar_sha256)),
root_node,
)
.await
@ -352,8 +350,10 @@ mod import_builtins {
.map_err(|err| ErrorKind::IO {
path: Some(p.to_path_buf()),
error: err.into(),
})?
.to_absolute_path();
})?;
// We need to attach context to the final output path.
let outpath = path_info.store_path.to_absolute_path();
Ok(
NixString::new_context_from(NixContextElement::Plain(outpath.clone()).into(), outpath)

View file

@ -1,7 +1,6 @@
//! This module provides an implementation of EvalIO talking to tvix-store.
use bytes::Bytes;
use futures::{StreamExt, TryStreamExt};
use nix_compat::nixhash::NixHash;
use nix_compat::store_path::StorePathRef;
use nix_compat::{nixhash::CAHash, store_path::StorePath};
use std::collections::BTreeMap;
@ -387,13 +386,13 @@ impl TvixStoreIO {
.map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))
}
pub(crate) async fn node_to_path_info<'a>(
async fn node_to_path_info<'a>(
&self,
name: &'a str,
path: &Path,
ca: &CAHash,
ca: CAHash,
root_node: Node,
) -> io::Result<(PathInfo, NixHash, StorePathRef<'a>)> {
) -> io::Result<PathInfo> {
// Ask the PathInfoService for the NAR size and sha256
// We always need it no matter what is the actual hash mode
// because the [PathInfo] needs to contain nar_{sha256,size}.
@ -405,7 +404,7 @@ impl TvixStoreIO {
// Calculate the output path. This might still fail, as some names are illegal.
let output_path =
nix_compat::store_path::build_ca_path(name, ca, Vec::<&str>::new(), false).map_err(
nix_compat::store_path::build_ca_path(name, &ca, Vec::<&str>::new(), false).map_err(
|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
@ -417,28 +416,24 @@ impl TvixStoreIO {
tvix_store::import::log_node(name.as_bytes(), &root_node, path);
// construct a PathInfo
let path_info = tvix_store::import::derive_nar_ca_path_info(
Ok(tvix_store::import::derive_nar_ca_path_info(
nar_size,
nar_sha256,
Some(ca.clone()),
output_path.to_owned(),
Some(ca),
output_path,
root_node,
);
Ok((path_info, NixHash::Sha256(nar_sha256), output_path))
))
}
pub(crate) async fn register_node_in_path_info_service<'a>(
pub(crate) async fn register_in_path_info_service<'a>(
&self,
name: &'a str,
path: &Path,
ca: &CAHash,
ca: CAHash,
root_node: Node,
) -> io::Result<StorePathRef<'a>> {
let (path_info, _, output_path) = self.node_to_path_info(name, path, ca, root_node).await?;
let _path_info = self.path_info_service.as_ref().put(path_info).await?;
Ok(output_path)
) -> io::Result<PathInfo> {
let path_info = self.node_to_path_info(name, path, ca, root_node).await?;
Ok(self.path_info_service.as_ref().put(path_info).await?)
}
pub async fn store_path_exists<'a>(&'a self, store_path: StorePathRef<'a>) -> io::Result<bool> {