refactor(tvix/store): import_pathimport_path_as_nar_ca

Add multiple additional helpers such as:

- `path_to_name`: derive the basename of a given path
- `derive_nar_ca_path_info`: derive the `PathInfo` for a content
  addressed NAR

which isolates further the tree walking feature and the ingestion feature.

Additionally, we don't `expect` anymore and propagate properly ingestion errors up.

Change-Id: I60edb5b633911c58ade7e19f5002e6f75f90e262
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10574
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: raitobezarius <tvl@lahfa.xyz>
This commit is contained in:
Ryan Lahfa 2024-01-08 09:50:13 +01:00 committed by clbot
parent 7275288f0e
commit 4c3ba46ba3
5 changed files with 161 additions and 118 deletions

View file

@ -274,7 +274,7 @@ impl EvalIO for TvixStoreIO {
#[instrument(skip(self), ret, err)]
fn import_path(&self, path: &Path) -> io::Result<PathBuf> {
let output_path = self.tokio_handle.block_on(async {
tvix_store::utils::import_path(
tvix_store::import::import_path_as_nar_ca(
path,
&self.blob_service,
&self.directory_service,

View file

@ -305,7 +305,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let path_info_service = path_info_service.clone();
async move {
let resp = tvix_store::utils::import_path(
let resp = tvix_store::import::import_path_as_nar_ca(
path,
blob_service,
directory_service,

156
tvix/store/src/import.rs Normal file
View file

@ -0,0 +1,156 @@
use std::path::Path;
use data_encoding::BASE64;
use tracing::{debug, instrument};
use tvix_castore::{
blobservice::BlobService, directoryservice::DirectoryService, proto::node::Node,
};
use nix_compat::store_path::{self, StorePath};
use crate::{
pathinfoservice::PathInfoService,
proto::{nar_info, NarInfo, PathInfo},
};
fn log_node(node: &Node, path: &Path) {
match node {
Node::Directory(directory_node) => {
debug!(
path = ?path,
name = ?directory_node.name,
digest = BASE64.encode(&directory_node.digest),
"import successful",
)
}
Node::File(file_node) => {
debug!(
path = ?path,
name = ?file_node.name,
digest = BASE64.encode(&file_node.digest),
"import successful"
)
}
Node::Symlink(symlink_node) => {
debug!(
path = ?path,
name = ?symlink_node.name,
target = ?symlink_node.target,
"import successful"
)
}
}
}
/// Transform a path into its base name and returns an [`std::io::Error`] if it is `..` or if the
/// basename is not valid unicode.
#[inline]
pub fn path_to_name(path: &Path) -> std::io::Result<&str> {
path.file_name()
.and_then(|file_name| file_name.to_str())
.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"path must not be .. and the basename valid unicode",
)
})
}
/// Takes the NAR size, SHA-256 of the NAR representation and the root node.
/// Returns the path information object for a content addressed NAR-style (recursive) object.
///
/// This [`PathInfo`] can be further filled for signatures, deriver or verified for the expected
/// hashes.
#[inline]
pub fn derive_nar_ca_path_info(nar_size: u64, nar_sha256: [u8; 32], root_node: Node) -> PathInfo {
// assemble the [crate::proto::PathInfo] object.
PathInfo {
node: Some(tvix_castore::proto::Node {
node: Some(root_node),
}),
// There's no reference scanning on path contents ingested like this.
references: vec![],
narinfo: Some(NarInfo {
nar_size,
nar_sha256: nar_sha256.to_vec().into(),
signatures: vec![],
reference_names: vec![],
deriver: None,
ca: Some(nar_info::Ca {
r#type: nar_info::ca::Hash::NarSha256.into(),
digest: nar_sha256.to_vec().into(),
}),
}),
}
}
/// Ingest the given path [`path`] and register the resulting output path in the
/// [`PathInfoService`] as a recursive fixed output NAR.
#[instrument(skip_all, fields(path=?path), err)]
pub async fn import_path_as_nar_ca<BS, DS, PS, P>(
path: P,
blob_service: BS,
directory_service: DS,
path_info_service: PS,
) -> Result<StorePath, std::io::Error>
where
P: AsRef<Path> + std::fmt::Debug,
BS: AsRef<dyn BlobService> + Clone,
DS: AsRef<dyn DirectoryService>,
PS: AsRef<dyn PathInfoService>,
{
let root_node =
tvix_castore::import::ingest_path(blob_service, directory_service, &path).await?;
// Ask the PathInfoService for the NAR size and sha256
let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;
// Calculate the output path. This might still fail, as some names are illegal.
// FUTUREWORK: take `name` as a parameter here and enforce the validity of the name
// at the type level.
let name = path_to_name(path.as_ref())?;
let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("invalid name: {}", name),
)
})?;
// assemble a new root_node with a name that is derived from the nar hash.
let root_node = root_node.rename(output_path.to_string().into_bytes().into());
log_node(&root_node, path.as_ref());
let path_info = derive_nar_ca_path_info(nar_size, nar_sha256, root_node);
// This new [`PathInfo`] that we get back from there might contain additional signatures or
// information set by the service itself. In this function, we silently swallow it because
// callers doesn't really need it.
let _path_info = path_info_service.as_ref().put(path_info).await?;
Ok(output_path.to_owned())
}
#[cfg(test)]
mod tests {
use std::{ffi::OsStr, path::PathBuf};
use crate::import::path_to_name;
use test_case::test_case;
#[test_case("a/b/c", "c"; "simple path")]
#[test_case("a/b/../c", "c"; "simple path containing ..")]
#[test_case("a/b/../c/d/../e", "e"; "path containing multiple ..")]
fn test_path_to_name(path: &str, expected_name: &str) {
let path: PathBuf = path.into();
assert_eq!(path_to_name(&path).expect("must succeed"), expected_name);
}
#[test_case(b"a/b/.."; "path ending in ..")]
#[test_case(b"\xf8\xa1\xa1\xa1\xa1"; "non unicode path")]
fn test_invalid_path_to_name(invalid_path: &[u8]) {
let path: PathBuf = unsafe { OsStr::from_encoded_bytes_unchecked(invalid_path) }.into();
path_to_name(&path).expect_err("must fail");
}
}

View file

@ -1,3 +1,4 @@
pub mod import;
pub mod nar;
pub mod pathinfoservice;
pub mod proto;

View file

@ -1,18 +1,11 @@
use std::{path::Path, sync::Arc};
use std::sync::Arc;
use data_encoding::BASE64;
use nix_compat::store_path::{self, StorePath};
use tracing::{debug, instrument};
use tvix_castore::{
blobservice::{self, BlobService},
directoryservice::{self, DirectoryService},
proto::node::Node,
};
use crate::{
pathinfoservice::{self, PathInfoService},
proto::{nar_info, NarInfo, PathInfo},
};
use crate::pathinfoservice::{self, PathInfoService};
/// Construct the three store handles from their addrs.
pub async fn construct_services(
@ -40,110 +33,3 @@ pub async fn construct_services(
Ok((blob_service, directory_service, path_info_service))
}
/// Imports a given path on the filesystem into the store, and returns the
/// [PathInfo] describing the path, that was sent to
/// [PathInfoService].
#[instrument(skip_all, fields(path=?path), err)]
pub async fn import_path<BS, DS, PS, P>(
path: P,
blob_service: BS,
directory_service: DS,
path_info_service: PS,
) -> Result<StorePath, std::io::Error>
where
P: AsRef<Path> + std::fmt::Debug,
BS: AsRef<dyn BlobService> + Clone,
DS: AsRef<dyn DirectoryService>,
PS: AsRef<dyn PathInfoService>,
{
// calculate the name
// TODO: make a path_to_name helper function?
let name = path
.as_ref()
.file_name()
.and_then(|file_name| file_name.to_str())
.ok_or_else(|| {
std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"path must not be .. and the basename valid unicode",
)
})?;
// Ingest the path into blob and directory service.
let root_node = tvix_castore::import::ingest_path(blob_service, &directory_service, &path)
.await
.expect("failed to ingest path");
debug!(root_node =?root_node, "import successful");
// Ask the PathInfoService for the NAR size and sha256
let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;
// Calculate the output path. This might still fail, as some names are illegal.
let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("invalid name: {}", name),
)
})?;
// assemble a new root_node with a name that is derived from the nar hash.
let root_node = root_node.rename(output_path.to_string().into_bytes().into());
log_node(&root_node, path.as_ref());
// assemble the [crate::proto::PathInfo] object.
let path_info = PathInfo {
node: Some(tvix_castore::proto::Node {
node: Some(root_node),
}),
// There's no reference scanning on path contents ingested like this.
references: vec![],
narinfo: Some(NarInfo {
nar_size,
nar_sha256: nar_sha256.to_vec().into(),
signatures: vec![],
reference_names: vec![],
deriver: None,
ca: Some(nar_info::Ca {
r#type: nar_info::ca::Hash::NarSha256.into(),
digest: nar_sha256.to_vec().into(),
}),
}),
};
// put into [PathInfoService], and return the PathInfo that we get back
// from there (it might contain additional signatures).
let _path_info = path_info_service.as_ref().put(path_info).await?;
Ok(output_path.to_owned())
}
fn log_node(node: &Node, path: &Path) {
match node {
Node::Directory(directory_node) => {
debug!(
path = ?path,
name = ?directory_node.name,
digest = BASE64.encode(&directory_node.digest),
"import successful",
)
}
Node::File(file_node) => {
debug!(
path = ?path,
name = ?file_node.name,
digest = BASE64.encode(&file_node.digest),
"import successful"
)
}
Node::Symlink(symlink_node) => {
debug!(
path = ?path,
name = ?symlink_node.name,
target = ?symlink_node.target,
"import successful"
)
}
}
}