refactor(nix-compat/store_path): take [u8;32] for outer fingerprint

The outer fingerprint used for store path calculation is always a sha256
digest. This includes both input and output-addressed store paths.

We used a NixHash here, which can also represent other hash types, and
that had a bunch of annoyances:

 - Whenever we had the bytes, we had to wrap them in a NixHash::Sha256().
 - Things like AtermWriteable had to be implemented on NixHash,
   even though we then had an assertion it was only called in the
   NixHash::Sha256 case.

Change-Id: Ic895503d9b071800d2e52ae057666f44bd0ab9d6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11142
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: John Ericson <git@johnericson.me>
Reviewed-by: picnoir picnoir <picnoir@alternativebit.fr>
This commit is contained in:
Florian Klink 2024-03-14 13:50:56 +02:00 committed by clbot
parent 35f636b684
commit 43c851bc84
5 changed files with 46 additions and 51 deletions

View file

@ -8,7 +8,7 @@
//! This data is required to find the derivation needed to actually trigger the
//! build, if necessary.
use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath};
use nix_compat::{derivation::Derivation, store_path::StorePath};
use std::collections::HashMap;
/// Struct keeping track of all known Derivations in the current evaluation.
@ -20,7 +20,7 @@ pub struct KnownPaths {
///
/// Keys are derivation paths, values are a tuple of the "hash derivation
/// modulo" and the Derivation struct itself.
derivations: HashMap<StorePath, (NixHash, Derivation)>,
derivations: HashMap<StorePath, ([u8; 32], Derivation)>,
/// A map from output path to (one) drv path.
/// Note that in the case of FODs, multiple drvs can produce the same output
@ -30,7 +30,7 @@ pub struct KnownPaths {
impl KnownPaths {
/// Fetch the opaque "hash derivation modulo" for a given derivation path.
pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&NixHash> {
pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&[u8; 32]> {
self.derivations
.get(drv_path)
.map(|(hash_derivation_modulo, _derivation)| hash_derivation_modulo)
@ -83,7 +83,7 @@ impl KnownPaths {
#[allow(unused_variables)] // assertions on this only compiled in debug builds
let old = self
.derivations
.insert(drv_path.to_owned(), (hash_derivation_modulo.clone(), drv));
.insert(drv_path.to_owned(), (hash_derivation_modulo, drv));
#[cfg(debug_assertions)]
{
@ -99,7 +99,7 @@ impl KnownPaths {
#[cfg(test)]
mod tests {
use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath};
use nix_compat::{derivation::Derivation, store_path::StorePath};
use super::KnownPaths;
use hex_literal::hex;
@ -165,9 +165,9 @@ mod tests {
// It should be possible to get the hash derivation modulo.
assert_eq!(
Some(&NixHash::Sha256(hex!(
Some(&hex!(
"c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df"
))),
)),
known_paths.get_hash_derivation_modulo(&BAR_DRV_PATH.clone())
);
@ -180,9 +180,9 @@ mod tests {
known_paths.get_drv_by_drvpath(&FOO_DRV_PATH)
);
assert_eq!(
Some(&NixHash::Sha256(hex!(
Some(&hex!(
"af030d36d63d3d7f56a71adaba26b36f5fa1f9847da5eed953ed62e18192762f"
))),
)),
known_paths.get_hash_derivation_modulo(&FOO_DRV_PATH.clone())
);

View file

@ -178,8 +178,8 @@ impl Derivation {
///
/// This is called `hashDerivationModulo` in nixcpp.
///
/// It returns a [NixHash], created by calculating the sha256 digest of
/// the derivation ATerm representation, except that:
/// It returns the sha256 digest of the derivation ATerm representation,
/// except that:
/// - any input derivation paths have beed replaced "by the result of a
/// recursive call to this function" and that
/// - for fixed-output derivations the special
@ -190,16 +190,16 @@ impl Derivation {
/// this function to provide a lookup function to lookup these calculation
/// results of parent derivations at `fn_get_derivation_or_fod_hash` (by
/// drv path).
pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash
pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> [u8; 32]
where
F: Fn(&StorePathRef) -> NixHash,
F: Fn(&StorePathRef) -> [u8; 32],
{
// Fixed-output derivations return a fixed hash.
// Non-Fixed-output derivations return the sha256 digest of the ATerm
// notation, but with all input_derivation paths replaced by a recursive
// call to this function.
// We use fn_get_derivation_or_fod_hash here, so callers can precompute this.
NixHash::Sha256(self.fod_digest().unwrap_or({
self.fod_digest().unwrap_or({
// For each input_derivation, look up the
// derivation_or_fod_hash, and replace the derivation path with
// it's HEXLOWER digest.
@ -216,7 +216,7 @@ impl Derivation {
hasher.update(self.to_aterm_bytes_with_replacements(&input_derivations));
hasher.finalize().into()
}))
})
}
/// This calculates all output paths of a Derivation and updates the struct.
@ -238,7 +238,7 @@ impl Derivation {
pub fn calculate_output_paths(
&mut self,
name: &str,
derivation_or_fod_hash: &NixHash,
derivation_or_fod_hash: &[u8; 32],
) -> Result<(), DerivationError> {
// The fingerprint and hash differs per output
for (output_name, output) in self.outputs.iter_mut() {

View file

@ -5,6 +5,7 @@ use crate::derivation::parser::Error;
use crate::derivation::Derivation;
use crate::store_path::StorePath;
use bstr::{BStr, BString};
use hex_literal::hex;
use std::collections::BTreeSet;
use std::fs::File;
use std::io::Read;
@ -184,16 +185,15 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation {
}
}
#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")]
#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")]
fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) {
#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"); "fixed_sha256")]
#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df");"fixed-sha1")]
fn derivation_or_fod_hash(drv_path: &str, expected_digest: [u8; 32]) {
// read in the fixture
let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path));
let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize");
let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called"));
assert_eq!(expected_nix_hash_string, actual.to_nix_hex_string());
assert_eq!(expected_digest, actual);
}
/// This reads a Derivation (in A-Term), trims out all fields containing
@ -401,7 +401,7 @@ fn output_path_construction() {
if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" {
panic!("lookup called with unexpected drv_path: {}", drv_path);
}
bar_drv_derivation_or_fod_hash.clone()
bar_drv_derivation_or_fod_hash
}),
);
assert!(foo_calc_result.is_ok());

View file

@ -8,7 +8,8 @@ use crate::derivation::{ca_kind_prefix, output::Output};
use crate::nixbase32;
use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH};
use bstr::BString;
use std::fmt::Display;
use data_encoding::HEXLOWER;
use std::{
collections::{BTreeMap, BTreeSet},
io,
@ -16,8 +17,6 @@ use std::{
io::Write,
};
use super::NixHash;
pub const DERIVATION_PREFIX: &str = "Derive";
pub const PAREN_OPEN: char = '(';
pub const PAREN_CLOSE: char = ')';
@ -31,7 +30,7 @@ pub const QUOTE: char = '"';
/// Note that we mostly use explicit `write_*` calls
/// instead since the serialization of the items depends on
/// the context a lot.
pub(crate) trait AtermWriteable: Display {
pub(crate) trait AtermWriteable {
fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>;
fn aterm_bytes(&self) -> Vec<u8> {
@ -67,12 +66,9 @@ impl AtermWriteable for String {
}
}
impl AtermWriteable for NixHash {
impl AtermWriteable for [u8; 32] {
fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> {
// When we serialize the placeholder hashes,
// they need to be SHA256.
debug_assert!(matches!(self, NixHash::Sha256(_)));
write_field(writer, self.to_plain_hex_string(), false)
write_field(writer, HEXLOWER.encode(self), false)
}
}

View file

@ -1,8 +1,8 @@
use crate::nixbase32;
use crate::nixhash::{CAHash, NixHash};
use crate::store_path::{Error, StorePathRef, DIGEST_SIZE, STORE_DIR};
use data_encoding::HEXLOWER;
use sha2::{Digest, Sha256};
use std::io::Write;
use thiserror;
/// Errors that can occur when creating a content-addressed store path.
@ -66,14 +66,11 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
return Err(BuildStorePathError::InvalidReference());
}
let (ty, hash) = match &ca_hash {
CAHash::Text(ref digest) => (
make_references_string("text", references, false),
NixHash::Sha256(*digest),
),
let (ty, inner_digest) = match &ca_hash {
CAHash::Text(ref digest) => (make_references_string("text", references, false), *digest),
CAHash::Nar(NixHash::Sha256(ref digest)) => (
make_references_string("source", references, self_reference),
NixHash::Sha256(*digest),
*digest,
),
// for all other CAHash::Nar, another custom scheme is used.
@ -84,7 +81,7 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
(
"output:out".to_string(),
NixHash::Sha256(fixed_out_digest("fixed:out:r", hash)),
fixed_out_digest("fixed:out:r", hash),
)
}
// CaHash::Flat is using something very similar, except the `r:` prefix.
@ -95,12 +92,12 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
(
"output:out".to_string(),
NixHash::Sha256(fixed_out_digest("fixed:out", hash)),
fixed_out_digest("fixed:out", hash),
)
}
};
build_store_path_from_fingerprint_parts(&ty, &hash, name)
build_store_path_from_fingerprint_parts(&ty, &inner_digest, name)
.map_err(BuildStorePathError::InvalidStorePath)
}
@ -128,13 +125,13 @@ pub fn build_nar_based_store_path<'a>(
/// Input-addresed store paths are always derivation outputs, the "input" in question is the
/// derivation and its closure.
pub fn build_output_path<'a>(
drv_hash: &NixHash,
drv_sha256: &[u8; 32],
output_name: &str,
output_path_name: &'a str,
) -> Result<StorePathRef<'a>, Error> {
build_store_path_from_fingerprint_parts(
&(String::from("output:") + output_name),
drv_hash,
drv_sha256,
output_path_name,
)
}
@ -145,18 +142,20 @@ pub fn build_output_path<'a>(
/// but other fingerprints starting with "output:" are also used in Derivation
/// output path calculation.
///
/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes,
/// and nixbase32-encoded (32 characters).
/// The fingerprint is hashed with sha256, and its digest is compressed to 20
/// bytes.
/// Inside a StorePath, that digest is printed nixbase32-encoded
/// (32 characters).
fn build_store_path_from_fingerprint_parts<'a>(
ty: &str,
hash: &NixHash,
inner_digest: &[u8; 32],
name: &'a str,
) -> Result<StorePathRef<'a>, Error> {
let digest: [u8; DIGEST_SIZE] = compress_hash(&{
let mut h = Sha256::new();
write!(h, "{ty}:{}:{STORE_DIR}:{name}", hash.to_nix_hex_string()).unwrap();
h.finalize()
});
let fingerprint = format!(
"{ty}:sha256:{}:{STORE_DIR}:{name}",
HEXLOWER.encode(inner_digest)
);
let digest: [u8; DIGEST_SIZE] = compress_hash(&Sha256::new_with_prefix(fingerprint).finalize());
// name validation happens in here.
StorePathRef::from_name_and_digest(name, &digest)