refactor(tvix/castore): remove `name` from Nodes
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.
This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.
It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).
The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.
The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.
The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.
Some further cleanups:
The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)
There's some leftover cleanups to do:
- There should be a from_(sorted_)iter and into_iter in Directory, so
we can construct and deconstruct in one go.
That should also enable us to implement conversions from and to the
proto representation that moves, rather than clones.
- The BuildRequest and PathInfo structs are still proto-based, so we
still do a bunch of conversions back and forth there (and have some
ugly expect there). There's not much point for error handling here,
this will be moved to stricter types in a followup CL.
Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
2024-08-14 21:00:12 +02:00
|
|
|
use bstr::ByteSlice;
|
2024-01-08 09:50:13 +01:00
|
|
|
use std::path::Path;
|
|
|
|
use tracing::{debug, instrument};
|
|
|
|
use tvix_castore::{
|
refactor(tvix/castore): remove `name` from Nodes
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.
This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.
It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).
The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.
The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.
The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.
Some further cleanups:
The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)
There's some leftover cleanups to do:
- There should be a from_(sorted_)iter and into_iter in Directory, so
we can construct and deconstruct in one go.
That should also enable us to implement conversions from and to the
proto representation that moves, rather than clones.
- The BuildRequest and PathInfo structs are still proto-based, so we
still do a bunch of conversions back and forth there (and have some
ugly expect there). There's not much point for error handling here,
this will be moved to stricter types in a followup CL.
Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
2024-08-14 21:00:12 +02:00
|
|
|
blobservice::BlobService, directoryservice::DirectoryService, import::fs::ingest_path, Node,
|
2024-01-08 09:50:13 +01:00
|
|
|
};
|
|
|
|
|
2024-03-29 00:43:56 +01:00
|
|
|
use nix_compat::{
|
|
|
|
nixhash::{CAHash, NixHash},
|
2024-10-15 15:11:42 +02:00
|
|
|
store_path::{self, StorePath},
|
2024-03-29 00:43:56 +01:00
|
|
|
};
|
2024-01-08 09:50:13 +01:00
|
|
|
|
|
|
|
use crate::{
|
2024-05-10 07:59:25 +02:00
|
|
|
nar::NarCalculationService,
|
2024-10-10 16:11:17 +02:00
|
|
|
pathinfoservice::{PathInfo, PathInfoService},
|
|
|
|
proto::nar_info,
|
2024-01-08 09:50:13 +01:00
|
|
|
};
|
|
|
|
|
2024-03-29 00:43:56 +01:00
|
|
|
impl From<CAHash> for nar_info::Ca {
|
|
|
|
fn from(value: CAHash) -> Self {
|
|
|
|
let hash_type: nar_info::ca::Hash = (&value).into();
|
|
|
|
let digest: bytes::Bytes = value.hash().to_string().into();
|
|
|
|
nar_info::Ca {
|
|
|
|
r#type: hash_type.into(),
|
|
|
|
digest,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
refactor(tvix/castore): remove `name` from Nodes
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.
This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.
It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).
The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.
The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.
The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.
Some further cleanups:
The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)
There's some leftover cleanups to do:
- There should be a from_(sorted_)iter and into_iter in Directory, so
we can construct and deconstruct in one go.
That should also enable us to implement conversions from and to the
proto representation that moves, rather than clones.
- The BuildRequest and PathInfo structs are still proto-based, so we
still do a bunch of conversions back and forth there (and have some
ugly expect there). There's not much point for error handling here,
this will be moved to stricter types in a followup CL.
Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
2024-08-14 21:00:12 +02:00
|
|
|
pub fn log_node(name: &[u8], node: &Node, path: &Path) {
|
2024-01-08 09:50:13 +01:00
|
|
|
match node {
|
2024-08-16 01:24:12 +02:00
|
|
|
Node::Directory { digest, .. } => {
|
2024-01-08 09:50:13 +01:00
|
|
|
debug!(
|
|
|
|
path = ?path,
|
refactor(tvix/castore): remove `name` from Nodes
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.
This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.
It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).
The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.
The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.
The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.
Some further cleanups:
The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)
There's some leftover cleanups to do:
- There should be a from_(sorted_)iter and into_iter in Directory, so
we can construct and deconstruct in one go.
That should also enable us to implement conversions from and to the
proto representation that moves, rather than clones.
- The BuildRequest and PathInfo structs are still proto-based, so we
still do a bunch of conversions back and forth there (and have some
ugly expect there). There's not much point for error handling here,
this will be moved to stricter types in a followup CL.
Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
2024-08-14 21:00:12 +02:00
|
|
|
name = %name.as_bstr(),
|
2024-08-16 01:24:12 +02:00
|
|
|
digest = %digest,
|
2024-01-08 09:50:13 +01:00
|
|
|
"import successful",
|
|
|
|
)
|
|
|
|
}
|
2024-08-16 01:24:12 +02:00
|
|
|
Node::File { digest, .. } => {
|
2024-01-08 09:50:13 +01:00
|
|
|
debug!(
|
|
|
|
path = ?path,
|
refactor(tvix/castore): remove `name` from Nodes
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.
This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.
It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).
The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.
The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.
The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.
Some further cleanups:
The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)
There's some leftover cleanups to do:
- There should be a from_(sorted_)iter and into_iter in Directory, so
we can construct and deconstruct in one go.
That should also enable us to implement conversions from and to the
proto representation that moves, rather than clones.
- The BuildRequest and PathInfo structs are still proto-based, so we
still do a bunch of conversions back and forth there (and have some
ugly expect there). There's not much point for error handling here,
this will be moved to stricter types in a followup CL.
Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
2024-08-14 21:00:12 +02:00
|
|
|
name = %name.as_bstr(),
|
2024-08-16 01:24:12 +02:00
|
|
|
digest = %digest,
|
2024-01-08 09:50:13 +01:00
|
|
|
"import successful"
|
|
|
|
)
|
|
|
|
}
|
2024-08-16 01:24:12 +02:00
|
|
|
Node::Symlink { target } => {
|
2024-01-08 09:50:13 +01:00
|
|
|
debug!(
|
|
|
|
path = ?path,
|
refactor(tvix/castore): remove `name` from Nodes
Nodes only have names if they're contained inside a Directory, or if
they're a root node and have something else possibly giving them a name
externally.
This removes all `name` fields in the three different Nodes, and instead
maintains it inside a BTreeMap inside the Directory.
It also removes the NamedNode trait (they don't have a get_name()), as
well as Node::rename(self, name), and all [Partial]Ord implementations
for Node (as they don't have names to use for sorting).
The `nodes()`, `directories()`, `files()` iterators inside a `Directory`
now return a tuple of Name and Node, as does the RootNodesProvider.
The different {Directory,File,Symlink}Node struct constructors got
simpler, and the {Directory,File}Node ones became infallible - as
there's no more possibility to represent invalid state.
The proto structs stayed the same - there's now from_name_and_node and
into_name_and_node to convert back and forth between the two `Node`
structs.
Some further cleanups:
The error types for Node validation were renamed. Everything related to
names is now in the DirectoryError (not yet happy about the naming)
There's some leftover cleanups to do:
- There should be a from_(sorted_)iter and into_iter in Directory, so
we can construct and deconstruct in one go.
That should also enable us to implement conversions from and to the
proto representation that moves, rather than clones.
- The BuildRequest and PathInfo structs are still proto-based, so we
still do a bunch of conversions back and forth there (and have some
ugly expect there). There's not much point for error handling here,
this will be moved to stricter types in a followup CL.
Change-Id: I7369a8e3a426f44419c349077cb4fcab2044ebb6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12205
Tested-by: BuildkiteCI
Reviewed-by: yuka <yuka@yuka.dev>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: benjaminedwardwebb <benjaminedwardwebb@gmail.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
2024-08-14 21:00:12 +02:00
|
|
|
name = %name.as_bstr(),
|
2024-08-16 01:24:12 +02:00
|
|
|
target = ?target,
|
2024-01-08 09:50:13 +01:00
|
|
|
"import successful"
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Transform a path into its base name and returns an [`std::io::Error`] if it is `..` or if the
|
|
|
|
/// basename is not valid unicode.
|
|
|
|
#[inline]
|
|
|
|
pub fn path_to_name(path: &Path) -> std::io::Result<&str> {
|
|
|
|
path.file_name()
|
|
|
|
.and_then(|file_name| file_name.to_str())
|
|
|
|
.ok_or_else(|| {
|
|
|
|
std::io::Error::new(
|
|
|
|
std::io::ErrorKind::InvalidInput,
|
|
|
|
"path must not be .. and the basename valid unicode",
|
|
|
|
)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2024-06-16 15:00:09 +02:00
|
|
|
/// Ingest the contents at the given path `path` into castore, and registers the
|
|
|
|
/// resulting root node in the passed PathInfoService, using the "NAR sha256
|
|
|
|
/// digest" and the passed name for output path calculation.
|
2024-10-15 15:11:42 +02:00
|
|
|
/// Inserts the PathInfo into the PathInfoService and returns it back to the caller.
|
2024-01-17 08:06:30 +01:00
|
|
|
#[instrument(skip_all, fields(store_name=name, path=?path), err)]
|
2024-05-10 07:59:25 +02:00
|
|
|
pub async fn import_path_as_nar_ca<BS, DS, PS, NS, P>(
|
2024-01-08 09:50:13 +01:00
|
|
|
path: P,
|
2024-01-17 08:06:30 +01:00
|
|
|
name: &str,
|
2024-01-08 09:50:13 +01:00
|
|
|
blob_service: BS,
|
|
|
|
directory_service: DS,
|
|
|
|
path_info_service: PS,
|
2024-05-10 07:59:25 +02:00
|
|
|
nar_calculation_service: NS,
|
2024-10-15 15:11:42 +02:00
|
|
|
) -> Result<PathInfo, std::io::Error>
|
2024-01-08 09:50:13 +01:00
|
|
|
where
|
|
|
|
P: AsRef<Path> + std::fmt::Debug,
|
2024-04-18 20:51:28 +02:00
|
|
|
BS: BlobService + Clone,
|
2024-05-04 21:23:26 +02:00
|
|
|
DS: DirectoryService,
|
2024-01-08 09:50:13 +01:00
|
|
|
PS: AsRef<dyn PathInfoService>,
|
2024-05-10 07:59:25 +02:00
|
|
|
NS: NarCalculationService,
|
2024-01-08 09:50:13 +01:00
|
|
|
{
|
2024-10-15 15:11:42 +02:00
|
|
|
// Ingest the contents at the given path `path` into castore.
|
2024-09-27 21:32:17 +02:00
|
|
|
let root_node =
|
|
|
|
ingest_path::<_, _, _, &[u8]>(blob_service, directory_service, path.as_ref(), None)
|
|
|
|
.await
|
|
|
|
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
|
2024-01-08 09:50:13 +01:00
|
|
|
|
2024-05-10 07:59:25 +02:00
|
|
|
// Ask for the NAR size and sha256
|
|
|
|
let (nar_size, nar_sha256) = nar_calculation_service.calculate_nar(&root_node).await?;
|
2024-01-08 09:50:13 +01:00
|
|
|
|
2024-10-16 01:41:05 +02:00
|
|
|
let ca = CAHash::Nar(NixHash::Sha256(nar_sha256));
|
|
|
|
|
2024-01-08 09:50:13 +01:00
|
|
|
// Calculate the output path. This might still fail, as some names are illegal.
|
2024-01-17 08:06:30 +01:00
|
|
|
// FUTUREWORK: express the `name` at the type level to be valid and move the conversion
|
|
|
|
// at the caller level.
|
2024-10-16 01:41:05 +02:00
|
|
|
let output_path: StorePath<String> =
|
|
|
|
store_path::build_ca_path(name, &ca, std::iter::empty::<&str>(), false).map_err(|_| {
|
2024-10-15 15:11:42 +02:00
|
|
|
std::io::Error::new(
|
|
|
|
std::io::ErrorKind::InvalidData,
|
|
|
|
format!("invalid name: {}", name),
|
|
|
|
)
|
|
|
|
})?;
|
|
|
|
|
|
|
|
// Insert a PathInfo. On success, return it back to the caller.
|
|
|
|
Ok(path_info_service
|
|
|
|
.as_ref()
|
|
|
|
.put(PathInfo {
|
|
|
|
store_path: output_path.to_owned(),
|
|
|
|
node: root_node,
|
|
|
|
// There's no reference scanning on imported paths
|
|
|
|
references: vec![],
|
|
|
|
nar_size,
|
|
|
|
nar_sha256,
|
|
|
|
signatures: vec![],
|
|
|
|
deriver: None,
|
2024-10-16 01:41:05 +02:00
|
|
|
ca: Some(ca),
|
2024-10-15 15:11:42 +02:00
|
|
|
})
|
|
|
|
.await?)
|
2024-01-08 09:50:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use std::{ffi::OsStr, path::PathBuf};
|
|
|
|
|
|
|
|
use crate::import::path_to_name;
|
2024-04-19 12:52:50 +02:00
|
|
|
use rstest::rstest;
|
2024-01-08 09:50:13 +01:00
|
|
|
|
2024-04-19 12:52:50 +02:00
|
|
|
#[rstest]
|
|
|
|
#[case::simple_path("a/b/c", "c")]
|
|
|
|
#[case::simple_path_containing_dotdot("a/b/../c", "c")]
|
|
|
|
#[case::path_containing_multiple_dotdot("a/b/../c/d/../e", "e")]
|
2024-01-08 09:50:13 +01:00
|
|
|
|
2024-04-19 12:52:50 +02:00
|
|
|
fn test_path_to_name(#[case] path: &str, #[case] expected_name: &str) {
|
2024-01-08 09:50:13 +01:00
|
|
|
let path: PathBuf = path.into();
|
|
|
|
assert_eq!(path_to_name(&path).expect("must succeed"), expected_name);
|
|
|
|
}
|
|
|
|
|
2024-04-19 12:52:50 +02:00
|
|
|
#[rstest]
|
|
|
|
#[case::path_ending_in_dotdot(b"a/b/..")]
|
|
|
|
#[case::non_unicode_path(b"\xf8\xa1\xa1\xa1\xa1")]
|
|
|
|
fn test_invalid_path_to_name(#[case] invalid_path: &[u8]) {
|
2024-01-08 09:50:13 +01:00
|
|
|
let path: PathBuf = unsafe { OsStr::from_encoded_bytes_unchecked(invalid_path) }.into();
|
|
|
|
path_to_name(&path).expect_err("must fail");
|
|
|
|
}
|
|
|
|
}
|