refactor(tvix/store): import_path
→ import_path_as_nar_ca
Add multiple additional helpers such as: - `path_to_name`: derive the basename of a given path - `derive_nar_ca_path_info`: derive the `PathInfo` for a content addressed NAR which isolates further the tree walking feature and the ingestion feature. Additionally, we don't `expect` anymore and propagate properly ingestion errors up. Change-Id: I60edb5b633911c58ade7e19f5002e6f75f90e262 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10574 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Autosubmit: raitobezarius <tvl@lahfa.xyz>
This commit is contained in:
parent
7275288f0e
commit
4c3ba46ba3
5 changed files with 161 additions and 118 deletions
|
@ -274,7 +274,7 @@ impl EvalIO for TvixStoreIO {
|
|||
#[instrument(skip(self), ret, err)]
|
||||
fn import_path(&self, path: &Path) -> io::Result<PathBuf> {
|
||||
let output_path = self.tokio_handle.block_on(async {
|
||||
tvix_store::utils::import_path(
|
||||
tvix_store::import::import_path_as_nar_ca(
|
||||
path,
|
||||
&self.blob_service,
|
||||
&self.directory_service,
|
||||
|
|
|
@ -305,7 +305,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
let path_info_service = path_info_service.clone();
|
||||
|
||||
async move {
|
||||
let resp = tvix_store::utils::import_path(
|
||||
let resp = tvix_store::import::import_path_as_nar_ca(
|
||||
path,
|
||||
blob_service,
|
||||
directory_service,
|
||||
|
|
156
tvix/store/src/import.rs
Normal file
156
tvix/store/src/import.rs
Normal file
|
@ -0,0 +1,156 @@
|
|||
use std::path::Path;
|
||||
|
||||
use data_encoding::BASE64;
|
||||
use tracing::{debug, instrument};
|
||||
use tvix_castore::{
|
||||
blobservice::BlobService, directoryservice::DirectoryService, proto::node::Node,
|
||||
};
|
||||
|
||||
use nix_compat::store_path::{self, StorePath};
|
||||
|
||||
use crate::{
|
||||
pathinfoservice::PathInfoService,
|
||||
proto::{nar_info, NarInfo, PathInfo},
|
||||
};
|
||||
|
||||
fn log_node(node: &Node, path: &Path) {
|
||||
match node {
|
||||
Node::Directory(directory_node) => {
|
||||
debug!(
|
||||
path = ?path,
|
||||
name = ?directory_node.name,
|
||||
digest = BASE64.encode(&directory_node.digest),
|
||||
"import successful",
|
||||
)
|
||||
}
|
||||
Node::File(file_node) => {
|
||||
debug!(
|
||||
path = ?path,
|
||||
name = ?file_node.name,
|
||||
digest = BASE64.encode(&file_node.digest),
|
||||
"import successful"
|
||||
)
|
||||
}
|
||||
Node::Symlink(symlink_node) => {
|
||||
debug!(
|
||||
path = ?path,
|
||||
name = ?symlink_node.name,
|
||||
target = ?symlink_node.target,
|
||||
"import successful"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Transform a path into its base name and returns an [`std::io::Error`] if it is `..` or if the
|
||||
/// basename is not valid unicode.
|
||||
#[inline]
|
||||
pub fn path_to_name(path: &Path) -> std::io::Result<&str> {
|
||||
path.file_name()
|
||||
.and_then(|file_name| file_name.to_str())
|
||||
.ok_or_else(|| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"path must not be .. and the basename valid unicode",
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Takes the NAR size, SHA-256 of the NAR representation and the root node.
|
||||
/// Returns the path information object for a content addressed NAR-style (recursive) object.
|
||||
///
|
||||
/// This [`PathInfo`] can be further filled for signatures, deriver or verified for the expected
|
||||
/// hashes.
|
||||
#[inline]
|
||||
pub fn derive_nar_ca_path_info(nar_size: u64, nar_sha256: [u8; 32], root_node: Node) -> PathInfo {
|
||||
// assemble the [crate::proto::PathInfo] object.
|
||||
PathInfo {
|
||||
node: Some(tvix_castore::proto::Node {
|
||||
node: Some(root_node),
|
||||
}),
|
||||
// There's no reference scanning on path contents ingested like this.
|
||||
references: vec![],
|
||||
narinfo: Some(NarInfo {
|
||||
nar_size,
|
||||
nar_sha256: nar_sha256.to_vec().into(),
|
||||
signatures: vec![],
|
||||
reference_names: vec![],
|
||||
deriver: None,
|
||||
ca: Some(nar_info::Ca {
|
||||
r#type: nar_info::ca::Hash::NarSha256.into(),
|
||||
digest: nar_sha256.to_vec().into(),
|
||||
}),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Ingest the given path [`path`] and register the resulting output path in the
|
||||
/// [`PathInfoService`] as a recursive fixed output NAR.
|
||||
#[instrument(skip_all, fields(path=?path), err)]
|
||||
pub async fn import_path_as_nar_ca<BS, DS, PS, P>(
|
||||
path: P,
|
||||
blob_service: BS,
|
||||
directory_service: DS,
|
||||
path_info_service: PS,
|
||||
) -> Result<StorePath, std::io::Error>
|
||||
where
|
||||
P: AsRef<Path> + std::fmt::Debug,
|
||||
BS: AsRef<dyn BlobService> + Clone,
|
||||
DS: AsRef<dyn DirectoryService>,
|
||||
PS: AsRef<dyn PathInfoService>,
|
||||
{
|
||||
let root_node =
|
||||
tvix_castore::import::ingest_path(blob_service, directory_service, &path).await?;
|
||||
|
||||
// Ask the PathInfoService for the NAR size and sha256
|
||||
let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;
|
||||
|
||||
// Calculate the output path. This might still fail, as some names are illegal.
|
||||
// FUTUREWORK: take `name` as a parameter here and enforce the validity of the name
|
||||
// at the type level.
|
||||
let name = path_to_name(path.as_ref())?;
|
||||
let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("invalid name: {}", name),
|
||||
)
|
||||
})?;
|
||||
|
||||
// assemble a new root_node with a name that is derived from the nar hash.
|
||||
let root_node = root_node.rename(output_path.to_string().into_bytes().into());
|
||||
log_node(&root_node, path.as_ref());
|
||||
|
||||
let path_info = derive_nar_ca_path_info(nar_size, nar_sha256, root_node);
|
||||
|
||||
// This new [`PathInfo`] that we get back from there might contain additional signatures or
|
||||
// information set by the service itself. In this function, we silently swallow it because
|
||||
// callers doesn't really need it.
|
||||
let _path_info = path_info_service.as_ref().put(path_info).await?;
|
||||
|
||||
Ok(output_path.to_owned())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::{ffi::OsStr, path::PathBuf};
|
||||
|
||||
use crate::import::path_to_name;
|
||||
use test_case::test_case;
|
||||
|
||||
#[test_case("a/b/c", "c"; "simple path")]
|
||||
#[test_case("a/b/../c", "c"; "simple path containing ..")]
|
||||
#[test_case("a/b/../c/d/../e", "e"; "path containing multiple ..")]
|
||||
|
||||
fn test_path_to_name(path: &str, expected_name: &str) {
|
||||
let path: PathBuf = path.into();
|
||||
assert_eq!(path_to_name(&path).expect("must succeed"), expected_name);
|
||||
}
|
||||
|
||||
#[test_case(b"a/b/.."; "path ending in ..")]
|
||||
#[test_case(b"\xf8\xa1\xa1\xa1\xa1"; "non unicode path")]
|
||||
|
||||
fn test_invalid_path_to_name(invalid_path: &[u8]) {
|
||||
let path: PathBuf = unsafe { OsStr::from_encoded_bytes_unchecked(invalid_path) }.into();
|
||||
path_to_name(&path).expect_err("must fail");
|
||||
}
|
||||
}
|
|
@ -1,3 +1,4 @@
|
|||
pub mod import;
|
||||
pub mod nar;
|
||||
pub mod pathinfoservice;
|
||||
pub mod proto;
|
||||
|
|
|
@ -1,18 +1,11 @@
|
|||
use std::{path::Path, sync::Arc};
|
||||
use std::sync::Arc;
|
||||
|
||||
use data_encoding::BASE64;
|
||||
use nix_compat::store_path::{self, StorePath};
|
||||
use tracing::{debug, instrument};
|
||||
use tvix_castore::{
|
||||
blobservice::{self, BlobService},
|
||||
directoryservice::{self, DirectoryService},
|
||||
proto::node::Node,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
pathinfoservice::{self, PathInfoService},
|
||||
proto::{nar_info, NarInfo, PathInfo},
|
||||
};
|
||||
use crate::pathinfoservice::{self, PathInfoService};
|
||||
|
||||
/// Construct the three store handles from their addrs.
|
||||
pub async fn construct_services(
|
||||
|
@ -40,110 +33,3 @@ pub async fn construct_services(
|
|||
|
||||
Ok((blob_service, directory_service, path_info_service))
|
||||
}
|
||||
|
||||
/// Imports a given path on the filesystem into the store, and returns the
|
||||
/// [PathInfo] describing the path, that was sent to
|
||||
/// [PathInfoService].
|
||||
#[instrument(skip_all, fields(path=?path), err)]
|
||||
pub async fn import_path<BS, DS, PS, P>(
|
||||
path: P,
|
||||
blob_service: BS,
|
||||
directory_service: DS,
|
||||
path_info_service: PS,
|
||||
) -> Result<StorePath, std::io::Error>
|
||||
where
|
||||
P: AsRef<Path> + std::fmt::Debug,
|
||||
BS: AsRef<dyn BlobService> + Clone,
|
||||
DS: AsRef<dyn DirectoryService>,
|
||||
PS: AsRef<dyn PathInfoService>,
|
||||
{
|
||||
// calculate the name
|
||||
// TODO: make a path_to_name helper function?
|
||||
let name = path
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.and_then(|file_name| file_name.to_str())
|
||||
.ok_or_else(|| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"path must not be .. and the basename valid unicode",
|
||||
)
|
||||
})?;
|
||||
|
||||
// Ingest the path into blob and directory service.
|
||||
let root_node = tvix_castore::import::ingest_path(blob_service, &directory_service, &path)
|
||||
.await
|
||||
.expect("failed to ingest path");
|
||||
|
||||
debug!(root_node =?root_node, "import successful");
|
||||
|
||||
// Ask the PathInfoService for the NAR size and sha256
|
||||
let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;
|
||||
|
||||
// Calculate the output path. This might still fail, as some names are illegal.
|
||||
let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidData,
|
||||
format!("invalid name: {}", name),
|
||||
)
|
||||
})?;
|
||||
|
||||
// assemble a new root_node with a name that is derived from the nar hash.
|
||||
let root_node = root_node.rename(output_path.to_string().into_bytes().into());
|
||||
log_node(&root_node, path.as_ref());
|
||||
|
||||
// assemble the [crate::proto::PathInfo] object.
|
||||
let path_info = PathInfo {
|
||||
node: Some(tvix_castore::proto::Node {
|
||||
node: Some(root_node),
|
||||
}),
|
||||
// There's no reference scanning on path contents ingested like this.
|
||||
references: vec![],
|
||||
narinfo: Some(NarInfo {
|
||||
nar_size,
|
||||
nar_sha256: nar_sha256.to_vec().into(),
|
||||
signatures: vec![],
|
||||
reference_names: vec![],
|
||||
deriver: None,
|
||||
ca: Some(nar_info::Ca {
|
||||
r#type: nar_info::ca::Hash::NarSha256.into(),
|
||||
digest: nar_sha256.to_vec().into(),
|
||||
}),
|
||||
}),
|
||||
};
|
||||
|
||||
// put into [PathInfoService], and return the PathInfo that we get back
|
||||
// from there (it might contain additional signatures).
|
||||
let _path_info = path_info_service.as_ref().put(path_info).await?;
|
||||
|
||||
Ok(output_path.to_owned())
|
||||
}
|
||||
|
||||
fn log_node(node: &Node, path: &Path) {
|
||||
match node {
|
||||
Node::Directory(directory_node) => {
|
||||
debug!(
|
||||
path = ?path,
|
||||
name = ?directory_node.name,
|
||||
digest = BASE64.encode(&directory_node.digest),
|
||||
"import successful",
|
||||
)
|
||||
}
|
||||
Node::File(file_node) => {
|
||||
debug!(
|
||||
path = ?path,
|
||||
name = ?file_node.name,
|
||||
digest = BASE64.encode(&file_node.digest),
|
||||
"import successful"
|
||||
)
|
||||
}
|
||||
Node::Symlink(symlink_node) => {
|
||||
debug!(
|
||||
path = ?path,
|
||||
name = ?symlink_node.name,
|
||||
target = ?symlink_node.target,
|
||||
"import successful"
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue