refactor(nix-compat): Properly encapsulate store path construction

Before there was code scattered about (e.g. text hashing module and
derivation output computation) constructing store paths from low level
building blocks --- there was some duplication and it was easy to make
nonsense store paths.

Now, we have roughly the same "safe-ish" ways of constructing them as
C++ Nix, and only those are exposed:

- Make text hashed content-addressed store paths

- Make other content-addressed store paths

- Make input-addressed fixed output hashes

Change-Id: I122a3ee0802b4f45ae386306b95b698991be89c8
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8411
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
John Ericson 2023-03-31 10:20:04 -04:00 committed by John Ericson
parent b4670bfbd1
commit 26c68f8e89
9 changed files with 268 additions and 125 deletions

View file

@ -412,16 +412,14 @@ mod derivation_builtins {
// TODO: fail on derivation references (only "plain" is allowed here)
let path = nix_compat::store_path::build_store_path_from_references(
name.as_str(),
content.as_str(),
refs,
)
.map_err(|_e| {
nix_compat::derivation::DerivationError::InvalidOutputName(name.as_str().to_string())
})
.map_err(Error::InvalidDerivation)?
.to_absolute_path();
let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs)
.map_err(|_e| {
nix_compat::derivation::DerivationError::InvalidOutputName(
name.as_str().to_string(),
)
})
.map_err(Error::InvalidDerivation)?
.to_absolute_path();
state.borrow_mut().plain(&path);

View file

@ -15,6 +15,8 @@ pub enum DerivationError {
InvalidOutputNameForFixed(String),
#[error("unable to validate output {0}: {1}")]
InvalidOutput(String, OutputError),
#[error("unable to validate output {0}: {1}")]
InvalidOutputDerivationPath(String, store_path::BuildStorePathError),
// input derivation
#[error("unable to parse input derivation path {0}: {1}")]
InvalidInputDerivationPath(String, store_path::Error),

View file

@ -1,8 +1,5 @@
use crate::{
nixhash::HashAlgo,
store_path::{
self, build_store_path_from_fingerprint, build_store_path_from_references, StorePath,
},
use crate::store_path::{
self, build_output_path, build_regular_ca_path, build_text_path, StorePath,
};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
@ -78,7 +75,7 @@ impl Derivation {
/// Returns the drv path of a [Derivation] struct.
///
/// The drv path is calculated by invoking [build_store_path_from_references], using
/// The drv path is calculated by invoking [build_text_path], using
/// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and
/// keys of [Derivation::input_derivations] as references, and the ATerm string of
/// the [Derivation] as content.
@ -96,7 +93,7 @@ impl Derivation {
inputs
};
build_store_path_from_references(name, self.to_aterm_string(), references)
build_text_path(name, self.to_aterm_string(), references)
.map_err(|_e| DerivationError::InvalidOutputName(name.to_string()))
}
@ -196,7 +193,6 @@ impl Derivation {
name: &str,
derivation_or_fod_hash: &NixHash,
) -> Result<(), DerivationError> {
let num_outputs = self.outputs.len();
// The fingerprint and hash differs per output
for (output_name, output) in self.outputs.iter_mut() {
// Assert that outputs are not yet populated, to avoid using this function wrongly.
@ -204,59 +200,43 @@ impl Derivation {
// footgun prevention mechanism.
assert!(output.path.is_empty());
// calculate the output_name_path, which is the part of the NixPath after the digest.
// It's the name, and (if it's the non-out output), the output name after a `-`.
let output_path_name = {
let mut output_path_name = name.to_string();
if output_name != "out" {
output_path_name.push('-');
output_path_name.push_str(output_name);
}
output_path_name
let path_name = output_path_name(name, output_name);
// For fixed output derivation we use the per-output info, otherwise we use the
// derivation hash.
let abs_store_path = if let Some(ref hwm) = output.hash_with_mode {
build_regular_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err(
|e| DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e),
)?
} else {
build_output_path(derivation_or_fod_hash, &output_name, &path_name).map_err(
|e| {
DerivationError::InvalidOutputDerivationPath(
output_name.to_string(),
store_path::BuildStorePathError::InvalidName(e),
)
},
)?
};
// In the case this is a fixed-output derivation AND the
// hash mode is recursive AND the hash algo is sha256, a
// custom fingerprint is used to calculate the output path.
//
// In all other cases, the fingerprint is derived from the
// derivation_or_fod_hash.
let custom_fp: Option<String> =
if num_outputs == 1 && output_name == "out" && output.hash_with_mode.is_some() {
match &output.hash_with_mode {
Some(NixHashWithMode::Recursive(NixHash {
digest,
algo: HashAlgo::Sha256,
})) => Some(format!(
"source:sha256:{}:{}:{}",
data_encoding::HEXLOWER.encode(digest),
store_path::STORE_DIR,
output_path_name
)),
_ => None,
}
} else {
None
};
// If no custom_fp has been determined, use the default one.
let fp = custom_fp.unwrap_or(format!(
"output:{}:{}:{}:{}",
output_name,
derivation_or_fod_hash.to_nix_hash_string(),
store_path::STORE_DIR,
output_path_name,
));
let abs_store_path = build_store_path_from_fingerprint(&output_path_name, &fp)
.map_err(|_e| DerivationError::InvalidOutputName(output_path_name.to_string()))?
.to_absolute_path();
output.path = abs_store_path.clone();
output.path = abs_store_path.to_absolute_path();
self.environment
.insert(output_name.to_string(), abs_store_path);
.insert(output_name.to_string(), abs_store_path.to_absolute_path());
}
Ok(())
}
}
/// Calculate the name part of the store path of a derivation [Output].
///
/// It's the name, and (if it's the non-out output), the output name
/// after a `-`.
fn output_path_name(derivation_name: &str, output_name: &str) -> String {
let mut output_path_name = derivation_name.to_string();
if output_name != "out" {
output_path_name.push('-');
output_path_name.push_str(output_name);
}
output_path_name
}

View file

@ -1,7 +1,7 @@
use crate::derivation::output::Output;
use crate::derivation::Derivation;
use crate::nixhash::NixHash;
use crate::store_path::{build_store_path_from_references, StorePath};
use crate::store_path::StorePath;
use std::collections::BTreeSet;
use std::fs::File;
use std::io::Read;

View file

@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
use crate::nixhash::Error;
/// This are the hash algorithms supported by cppnix.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum HashAlgo {
Md5,
Sha1,

View file

@ -3,6 +3,20 @@ use crate::nixhash::{HashAlgo, NixHash};
use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
pub enum NixHashMode {
Flat,
Recursive,
}
impl NixHashMode {
pub fn prefix(self) -> &'static str {
match self {
Self::Flat => "",
Self::Recursive => "r:",
}
}
}
/// A Nix Hash can either be flat or recursive.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum NixHashWithMode {
@ -11,14 +25,25 @@ pub enum NixHashWithMode {
}
impl NixHashWithMode {
pub fn mode(&self) -> NixHashMode {
match self {
Self::Flat(_) => NixHashMode::Flat,
Self::Recursive(_) => NixHashMode::Recursive,
}
}
pub fn digest(&self) -> &NixHash {
match self {
Self::Flat(ref h) => h,
Self::Recursive(ref h) => h,
}
}
/// Formats a [NixHashWithMode] in the Nix default hash format,
/// which is the algo, followed by a colon, then the lower hex encoded digest.
/// In case the hash itself is recursive, a `r:` is added as prefix
pub fn to_nix_hash_string(&self) -> String {
match self {
NixHashWithMode::Flat(h) => h.to_nix_hash_string(),
NixHashWithMode::Recursive(h) => format!("r:{}", h.to_nix_hash_string()),
}
String::from(self.mode().prefix()) + &self.digest().to_nix_hash_string()
}
}

View file

@ -4,10 +4,7 @@ use thiserror::Error;
mod utils;
pub use utils::{
build_store_path_from_fingerprint, build_store_path_from_references, compress_hash,
hash_placeholder,
};
pub use utils::*;
pub const DIGEST_SIZE: usize = 20;
// lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it
@ -19,19 +16,32 @@ pub const ENCODED_DIGEST_SIZE: usize = 32;
pub const STORE_DIR: &str = "/nix/store";
pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/";
/// Errors that can occur during the validation of name characters.
/// Errors that can occur when parsing a literal store path
#[derive(Debug, PartialEq, Eq, Error)]
pub enum Error {
#[error("Dash is missing between hash and name")]
MissingDash(),
#[error("Hash encoding is invalid: {0}")]
InvalidHashEncoding(Nixbase32DecodeError),
#[error("Invalid name: {0}")]
InvalidName(String),
#[error("{0}")]
InvalidName(NameError),
#[error("Tried to parse an absolute path which was missing the store dir prefix.")]
MissingStoreDir(),
}
/// Errors that can occur during the validation of name characters.
#[derive(Debug, PartialEq, Eq, Error)]
pub enum NameError {
#[error("Invalid name: {0}")]
InvalidName(String),
}
impl From<NameError> for Error {
fn from(e: NameError) -> Self {
Self::InvalidName(e)
}
}
/// Represents a path in the Nix store (a direct child of [STORE_DIR]).
///
/// It starts with a digest (20 bytes), [crate::nixbase32]-encoded,
@ -56,7 +66,7 @@ impl StorePath {
// - 1 dash
// - 1 character for the name
if s.len() < ENCODED_DIGEST_SIZE + 2 {
return Err(Error::InvalidName("".to_string()));
Err(NameError::InvalidName("".to_string()))?;
}
let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) {
@ -92,7 +102,7 @@ impl StorePath {
}
/// Checks a given &str to match the restrictions for store path names.
pub fn validate_name(s: &str) -> Result<(), Error> {
pub fn validate_name(s: &str) -> Result<(), NameError> {
for c in s.chars() {
if c.is_ascii_alphanumeric()
|| c == '-'
@ -105,7 +115,7 @@ impl StorePath {
continue;
}
return Err(Error::InvalidName(s.to_string()));
return Err(NameError::InvalidName(s.to_string()));
}
Ok(())

View file

@ -1,9 +1,24 @@
use crate::nixbase32;
use crate::nixhash::NixHash;
use crate::nixhash::{HashAlgo, NixHash, NixHashWithMode};
use crate::store_path::StorePath;
use sha2::{Digest, Sha256};
use thiserror::Error;
use super::Error;
use super::{NameError, STORE_DIR};
/// Errors that can occur when creating a content-addressed store path.
///
/// This wraps the main [Error] which is just about invalid store path names.
#[derive(Debug, PartialEq, Eq, Error)]
pub enum BuildStorePathError {
#[error("{0}")]
InvalidName(NameError),
/// This error occurs when we have references outside the SHA-256 +
/// Recursive case. The restriction comes from upstream Nix. It may be
/// lifted at some point but there isn't a pressing need to anticipate that.
#[error("References were not supported as much as requested")]
InvalidReference(),
}
/// compress_hash takes an arbitrarily long sequence of bytes (usually
/// a hash digest), and returns a sequence of bytes of length
@ -27,33 +42,107 @@ pub fn compress_hash<const OUTPUT_SIZE: usize>(input: &[u8]) -> [u8; OUTPUT_SIZE
/// This builds a store path, by calculating the text_hash_string of either a
/// derivation or a literal text file that may contain references.
pub fn build_store_path_from_references<
S: AsRef<str>,
I: IntoIterator<Item = S>,
C: AsRef<[u8]>,
>(
pub fn build_text_path<S: AsRef<str>, I: IntoIterator<Item = S>, C: AsRef<[u8]>>(
name: &str,
content: C,
references: I,
) -> Result<StorePath, Error> {
let text_hash_str = text_hash_string(name, content, references);
build_store_path_from_fingerprint(name, &text_hash_str)
) -> Result<StorePath, NameError> {
build_store_path_from_fingerprint_parts(
&make_type("text", references, false),
// the nix_hash_string representation of the sha256 digest of some contents
&{
let content_digest = {
let hasher = Sha256::new_with_prefix(content);
hasher.finalize()
};
NixHash::new(crate::nixhash::HashAlgo::Sha256, content_digest.to_vec())
},
name,
)
}
/// This builds a store path from a fingerprint.
/// Usually, that function is used from [build_store_path_from_references] and
/// This builds a more "regular" content-addressed store path
pub fn build_regular_ca_path<S: AsRef<str>, I: IntoIterator<Item = S>>(
name: &str,
hash_with_mode: &NixHashWithMode,
references: I,
self_reference: bool,
) -> Result<StorePath, BuildStorePathError> {
match &hash_with_mode {
NixHashWithMode::Recursive(
ref hash @ NixHash {
algo: HashAlgo::Sha256,
..
},
) => build_store_path_from_fingerprint_parts(
&make_type("source", references, self_reference),
hash,
name,
)
.map_err(BuildStorePathError::InvalidName),
_ => {
if references.into_iter().next().is_some() {
return Err(BuildStorePathError::InvalidReference());
}
if self_reference {
return Err(BuildStorePathError::InvalidReference());
}
build_store_path_from_fingerprint_parts(
"output:out",
&{
let content_digest = {
let mut hasher = Sha256::new_with_prefix("fixed:out:");
hasher.update(hash_with_mode.mode().prefix());
hasher.update(hash_with_mode.digest().algo.to_string());
hasher.update(":");
hasher.update(
&data_encoding::HEXLOWER.encode(&hash_with_mode.digest().digest),
);
hasher.update(":");
hasher.finalize()
};
NixHash::new(crate::nixhash::HashAlgo::Sha256, content_digest.to_vec())
},
name,
)
.map_err(BuildStorePathError::InvalidName)
}
}
}
/// This builds an input-addressed store path
///
/// Input-addresed store paths are always derivation outputs, the "input" in question is the
/// derivation and its closure.
pub fn build_output_path(
drv_hash: &NixHash,
output_name: &str,
output_path_name: &str,
) -> Result<StorePath, NameError> {
build_store_path_from_fingerprint_parts(
&(String::from("output:") + output_name),
drv_hash,
&output_path_name,
)
}
/// This builds a store path from fingerprint parts.
/// Usually, that function is used from [build_text_path] and
/// passed a "text hash string" (starting with "text:" as fingerprint),
/// but other fingerprints starting with "output:" are also used in Derivation
/// output path calculation.
///
/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes,
/// and nixbase32-encoded (32 characters).
pub fn build_store_path_from_fingerprint(
fn build_store_path_from_fingerprint_parts(
ty: &str,
hash: &NixHash,
name: &str,
fingerprint: &str,
) -> Result<StorePath, Error> {
) -> Result<StorePath, NameError> {
let fingerprint =
String::from(ty) + ":" + &hash.to_nix_hash_string() + ":" + STORE_DIR + ":" + name;
let digest = {
let hasher = Sha256::new_with_prefix(fingerprint);
let hasher = Sha256::new_with_prefix(&fingerprint);
hasher.finalize()
};
let compressed = compress_hash::<20>(&digest);
@ -74,31 +163,21 @@ pub fn build_store_path_from_fingerprint(
/// - the nix_hash_string representation of the sha256 digest of some contents
/// - the value of `storeDir`
/// - the name
pub fn text_hash_string<S: AsRef<str>, I: IntoIterator<Item = S>, C: AsRef<[u8]>>(
name: &str,
content: C,
fn make_type<S: AsRef<str>, I: IntoIterator<Item = S>>(
ty: &str,
references: I,
self_ref: bool,
) -> String {
let mut s = String::from("text:");
let mut s = String::from(ty);
for reference in references {
s.push_str(reference.as_ref());
s.push(':');
s.push_str(reference.as_ref());
}
// the nix_hash_string representation of the sha256 digest of some contents
s.push_str(
&{
let content_digest = {
let hasher = Sha256::new_with_prefix(content);
hasher.finalize()
};
NixHash::new(crate::nixhash::HashAlgo::Sha256, content_digest.to_vec())
}
.to_nix_hash_string(),
);
s.push_str(&format!(":{}:{}", crate::store_path::STORE_DIR, name));
if self_ref {
s.push_str(":self");
}
s
}
@ -121,16 +200,17 @@ pub fn hash_placeholder(name: &str) -> String {
#[cfg(test)]
mod test {
use crate::store_path::build_store_path_from_references;
use super::*;
use crate::nixhash::{NixHash, NixHashWithMode};
#[test]
fn build_store_path_with_zero_references() {
fn build_text_path_with_zero_references() {
// This hash should match `builtins.toFile`, e.g.:
//
// nix-repl> builtins.toFile "foo" "bar"
// "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo"
let store_path = build_store_path_from_references("foo", "bar", Vec::<String>::new())
let store_path = build_text_path("foo", "bar", Vec::<String>::new())
.expect("build_store_path() should succeed");
assert_eq!(
@ -140,17 +220,17 @@ mod test {
}
#[test]
fn build_store_path_with_non_zero_references() {
fn build_text_path_with_non_zero_references() {
// This hash should match:
//
// nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}"
// "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz"
let inner = build_store_path_from_references("foo", "bar", Vec::<String>::new())
let inner = build_text_path("foo", "bar", Vec::<String>::new())
.expect("path_with_references() should succeed");
let inner_path = inner.to_absolute_path();
let outer = build_store_path_from_references("baz", &inner_path, vec![inner_path.as_str()])
let outer = build_text_path("baz", &inner_path, vec![inner_path.as_str()])
.expect("path_with_references() should succeed");
assert_eq!(
@ -158,4 +238,52 @@ mod test {
"/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz"
);
}
#[test]
fn build_sha1_path() {
let outer = build_regular_ca_path(
"bar",
&NixHashWithMode::Recursive(NixHash {
algo: HashAlgo::Sha1,
digest: data_encoding::HEXLOWER
.decode(b"0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")
.expect("hex should decode"),
}),
Vec::<String>::new(),
false,
)
.expect("path_with_references() should succeed");
assert_eq!(
outer.to_absolute_path().as_str(),
"/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"
);
}
#[test]
fn build_store_path_with_non_zero_references() {
// This hash should match:
//
// nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}"
// "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz"
//
// $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz
// rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz'
let outer = build_regular_ca_path(
"baz",
&NixHashWithMode::Recursive(NixHash {
algo: HashAlgo::Sha256,
digest: nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1")
.expect("hex should decode"),
}),
vec!["/nix/store/dxwkwjzdaq7ka55pkk252gh32bgpmql4-foo"],
false,
)
.expect("path_with_references() should succeed");
assert_eq!(
outer.to_absolute_path().as_str(),
"/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz"
);
}
}

View file

@ -66,7 +66,7 @@ fn validate_no_node(
},
Err(ValidatePathInfoError::InvalidNodeName(
"invalid".to_string(),
store_path::Error::InvalidName("".to_string())
store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string()))
));
"invalid node name"
)]
@ -111,7 +111,7 @@ fn validate_directory(
},
Err(ValidatePathInfoError::InvalidNodeName(
"invalid".to_string(),
store_path::Error::InvalidName("".to_string())
store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string()))
));
"invalid node name"
)]
@ -141,7 +141,7 @@ fn validate_file(t_file_node: proto::FileNode, t_result: Result<StorePath, Valid
},
Err(ValidatePathInfoError::InvalidNodeName(
"invalid".to_string(),
store_path::Error::InvalidName("".to_string())
store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string()))
));
"invalid node name"
)]