refactor(nix-compat/store_path): take [u8;32] for outer fingerprint

The outer fingerprint used for store path calculation is always a sha256
digest. This includes both input and output-addressed store paths.

We used a NixHash here, which can also represent other hash types, and
that had a bunch of annoyances:

 - Whenever we had the bytes, we had to wrap them in a NixHash::Sha256().
 - Things like AtermWriteable had to be implemented on NixHash,
   even though we then had an assertion it was only called in the
   NixHash::Sha256 case.

Change-Id: Ic895503d9b071800d2e52ae057666f44bd0ab9d6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11142
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: John Ericson <git@johnericson.me>
Reviewed-by: picnoir picnoir <picnoir@alternativebit.fr>
This commit is contained in:
Florian Klink 2024-03-14 13:50:56 +02:00 committed by clbot
parent 35f636b684
commit 43c851bc84
5 changed files with 46 additions and 51 deletions

View file

@ -8,7 +8,7 @@
//! This data is required to find the derivation needed to actually trigger the //! This data is required to find the derivation needed to actually trigger the
//! build, if necessary. //! build, if necessary.
use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath}; use nix_compat::{derivation::Derivation, store_path::StorePath};
use std::collections::HashMap; use std::collections::HashMap;
/// Struct keeping track of all known Derivations in the current evaluation. /// Struct keeping track of all known Derivations in the current evaluation.
@ -20,7 +20,7 @@ pub struct KnownPaths {
/// ///
/// Keys are derivation paths, values are a tuple of the "hash derivation /// Keys are derivation paths, values are a tuple of the "hash derivation
/// modulo" and the Derivation struct itself. /// modulo" and the Derivation struct itself.
derivations: HashMap<StorePath, (NixHash, Derivation)>, derivations: HashMap<StorePath, ([u8; 32], Derivation)>,
/// A map from output path to (one) drv path. /// A map from output path to (one) drv path.
/// Note that in the case of FODs, multiple drvs can produce the same output /// Note that in the case of FODs, multiple drvs can produce the same output
@ -30,7 +30,7 @@ pub struct KnownPaths {
impl KnownPaths { impl KnownPaths {
/// Fetch the opaque "hash derivation modulo" for a given derivation path. /// Fetch the opaque "hash derivation modulo" for a given derivation path.
pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&NixHash> { pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&[u8; 32]> {
self.derivations self.derivations
.get(drv_path) .get(drv_path)
.map(|(hash_derivation_modulo, _derivation)| hash_derivation_modulo) .map(|(hash_derivation_modulo, _derivation)| hash_derivation_modulo)
@ -83,7 +83,7 @@ impl KnownPaths {
#[allow(unused_variables)] // assertions on this only compiled in debug builds #[allow(unused_variables)] // assertions on this only compiled in debug builds
let old = self let old = self
.derivations .derivations
.insert(drv_path.to_owned(), (hash_derivation_modulo.clone(), drv)); .insert(drv_path.to_owned(), (hash_derivation_modulo, drv));
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
{ {
@ -99,7 +99,7 @@ impl KnownPaths {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath}; use nix_compat::{derivation::Derivation, store_path::StorePath};
use super::KnownPaths; use super::KnownPaths;
use hex_literal::hex; use hex_literal::hex;
@ -165,9 +165,9 @@ mod tests {
// It should be possible to get the hash derivation modulo. // It should be possible to get the hash derivation modulo.
assert_eq!( assert_eq!(
Some(&NixHash::Sha256(hex!( Some(&hex!(
"c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df" "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df"
))), )),
known_paths.get_hash_derivation_modulo(&BAR_DRV_PATH.clone()) known_paths.get_hash_derivation_modulo(&BAR_DRV_PATH.clone())
); );
@ -180,9 +180,9 @@ mod tests {
known_paths.get_drv_by_drvpath(&FOO_DRV_PATH) known_paths.get_drv_by_drvpath(&FOO_DRV_PATH)
); );
assert_eq!( assert_eq!(
Some(&NixHash::Sha256(hex!( Some(&hex!(
"af030d36d63d3d7f56a71adaba26b36f5fa1f9847da5eed953ed62e18192762f" "af030d36d63d3d7f56a71adaba26b36f5fa1f9847da5eed953ed62e18192762f"
))), )),
known_paths.get_hash_derivation_modulo(&FOO_DRV_PATH.clone()) known_paths.get_hash_derivation_modulo(&FOO_DRV_PATH.clone())
); );

View file

@ -178,8 +178,8 @@ impl Derivation {
/// ///
/// This is called `hashDerivationModulo` in nixcpp. /// This is called `hashDerivationModulo` in nixcpp.
/// ///
/// It returns a [NixHash], created by calculating the sha256 digest of /// It returns the sha256 digest of the derivation ATerm representation,
/// the derivation ATerm representation, except that: /// except that:
/// - any input derivation paths have beed replaced "by the result of a /// - any input derivation paths have beed replaced "by the result of a
/// recursive call to this function" and that /// recursive call to this function" and that
/// - for fixed-output derivations the special /// - for fixed-output derivations the special
@ -190,16 +190,16 @@ impl Derivation {
/// this function to provide a lookup function to lookup these calculation /// this function to provide a lookup function to lookup these calculation
/// results of parent derivations at `fn_get_derivation_or_fod_hash` (by /// results of parent derivations at `fn_get_derivation_or_fod_hash` (by
/// drv path). /// drv path).
pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> [u8; 32]
where where
F: Fn(&StorePathRef) -> NixHash, F: Fn(&StorePathRef) -> [u8; 32],
{ {
// Fixed-output derivations return a fixed hash. // Fixed-output derivations return a fixed hash.
// Non-Fixed-output derivations return the sha256 digest of the ATerm // Non-Fixed-output derivations return the sha256 digest of the ATerm
// notation, but with all input_derivation paths replaced by a recursive // notation, but with all input_derivation paths replaced by a recursive
// call to this function. // call to this function.
// We use fn_get_derivation_or_fod_hash here, so callers can precompute this. // We use fn_get_derivation_or_fod_hash here, so callers can precompute this.
NixHash::Sha256(self.fod_digest().unwrap_or({ self.fod_digest().unwrap_or({
// For each input_derivation, look up the // For each input_derivation, look up the
// derivation_or_fod_hash, and replace the derivation path with // derivation_or_fod_hash, and replace the derivation path with
// it's HEXLOWER digest. // it's HEXLOWER digest.
@ -216,7 +216,7 @@ impl Derivation {
hasher.update(self.to_aterm_bytes_with_replacements(&input_derivations)); hasher.update(self.to_aterm_bytes_with_replacements(&input_derivations));
hasher.finalize().into() hasher.finalize().into()
})) })
} }
/// This calculates all output paths of a Derivation and updates the struct. /// This calculates all output paths of a Derivation and updates the struct.
@ -238,7 +238,7 @@ impl Derivation {
pub fn calculate_output_paths( pub fn calculate_output_paths(
&mut self, &mut self,
name: &str, name: &str,
derivation_or_fod_hash: &NixHash, derivation_or_fod_hash: &[u8; 32],
) -> Result<(), DerivationError> { ) -> Result<(), DerivationError> {
// The fingerprint and hash differs per output // The fingerprint and hash differs per output
for (output_name, output) in self.outputs.iter_mut() { for (output_name, output) in self.outputs.iter_mut() {

View file

@ -5,6 +5,7 @@ use crate::derivation::parser::Error;
use crate::derivation::Derivation; use crate::derivation::Derivation;
use crate::store_path::StorePath; use crate::store_path::StorePath;
use bstr::{BStr, BString}; use bstr::{BStr, BString};
use hex_literal::hex;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::Read;
@ -184,16 +185,15 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation {
} }
} }
#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] #[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"); "fixed_sha256")]
#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] #[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df");"fixed-sha1")]
fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { fn derivation_or_fod_hash(drv_path: &str, expected_digest: [u8; 32]) {
// read in the fixture // read in the fixture
let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)); let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path));
let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize");
let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called"));
assert_eq!(expected_digest, actual);
assert_eq!(expected_nix_hash_string, actual.to_nix_hex_string());
} }
/// This reads a Derivation (in A-Term), trims out all fields containing /// This reads a Derivation (in A-Term), trims out all fields containing
@ -401,7 +401,7 @@ fn output_path_construction() {
if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" {
panic!("lookup called with unexpected drv_path: {}", drv_path); panic!("lookup called with unexpected drv_path: {}", drv_path);
} }
bar_drv_derivation_or_fod_hash.clone() bar_drv_derivation_or_fod_hash
}), }),
); );
assert!(foo_calc_result.is_ok()); assert!(foo_calc_result.is_ok());

View file

@ -8,7 +8,8 @@ use crate::derivation::{ca_kind_prefix, output::Output};
use crate::nixbase32; use crate::nixbase32;
use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH}; use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH};
use bstr::BString; use bstr::BString;
use std::fmt::Display; use data_encoding::HEXLOWER;
use std::{ use std::{
collections::{BTreeMap, BTreeSet}, collections::{BTreeMap, BTreeSet},
io, io,
@ -16,8 +17,6 @@ use std::{
io::Write, io::Write,
}; };
use super::NixHash;
pub const DERIVATION_PREFIX: &str = "Derive"; pub const DERIVATION_PREFIX: &str = "Derive";
pub const PAREN_OPEN: char = '('; pub const PAREN_OPEN: char = '(';
pub const PAREN_CLOSE: char = ')'; pub const PAREN_CLOSE: char = ')';
@ -31,7 +30,7 @@ pub const QUOTE: char = '"';
/// Note that we mostly use explicit `write_*` calls /// Note that we mostly use explicit `write_*` calls
/// instead since the serialization of the items depends on /// instead since the serialization of the items depends on
/// the context a lot. /// the context a lot.
pub(crate) trait AtermWriteable: Display { pub(crate) trait AtermWriteable {
fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>; fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>;
fn aterm_bytes(&self) -> Vec<u8> { fn aterm_bytes(&self) -> Vec<u8> {
@ -67,12 +66,9 @@ impl AtermWriteable for String {
} }
} }
impl AtermWriteable for NixHash { impl AtermWriteable for [u8; 32] {
fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> {
// When we serialize the placeholder hashes, write_field(writer, HEXLOWER.encode(self), false)
// they need to be SHA256.
debug_assert!(matches!(self, NixHash::Sha256(_)));
write_field(writer, self.to_plain_hex_string(), false)
} }
} }

View file

@ -1,8 +1,8 @@
use crate::nixbase32; use crate::nixbase32;
use crate::nixhash::{CAHash, NixHash}; use crate::nixhash::{CAHash, NixHash};
use crate::store_path::{Error, StorePathRef, DIGEST_SIZE, STORE_DIR}; use crate::store_path::{Error, StorePathRef, DIGEST_SIZE, STORE_DIR};
use data_encoding::HEXLOWER;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::io::Write;
use thiserror; use thiserror;
/// Errors that can occur when creating a content-addressed store path. /// Errors that can occur when creating a content-addressed store path.
@ -66,14 +66,11 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
return Err(BuildStorePathError::InvalidReference()); return Err(BuildStorePathError::InvalidReference());
} }
let (ty, hash) = match &ca_hash { let (ty, inner_digest) = match &ca_hash {
CAHash::Text(ref digest) => ( CAHash::Text(ref digest) => (make_references_string("text", references, false), *digest),
make_references_string("text", references, false),
NixHash::Sha256(*digest),
),
CAHash::Nar(NixHash::Sha256(ref digest)) => ( CAHash::Nar(NixHash::Sha256(ref digest)) => (
make_references_string("source", references, self_reference), make_references_string("source", references, self_reference),
NixHash::Sha256(*digest), *digest,
), ),
// for all other CAHash::Nar, another custom scheme is used. // for all other CAHash::Nar, another custom scheme is used.
@ -84,7 +81,7 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
( (
"output:out".to_string(), "output:out".to_string(),
NixHash::Sha256(fixed_out_digest("fixed:out:r", hash)), fixed_out_digest("fixed:out:r", hash),
) )
} }
// CaHash::Flat is using something very similar, except the `r:` prefix. // CaHash::Flat is using something very similar, except the `r:` prefix.
@ -95,12 +92,12 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
( (
"output:out".to_string(), "output:out".to_string(),
NixHash::Sha256(fixed_out_digest("fixed:out", hash)), fixed_out_digest("fixed:out", hash),
) )
} }
}; };
build_store_path_from_fingerprint_parts(&ty, &hash, name) build_store_path_from_fingerprint_parts(&ty, &inner_digest, name)
.map_err(BuildStorePathError::InvalidStorePath) .map_err(BuildStorePathError::InvalidStorePath)
} }
@ -128,13 +125,13 @@ pub fn build_nar_based_store_path<'a>(
/// Input-addresed store paths are always derivation outputs, the "input" in question is the /// Input-addresed store paths are always derivation outputs, the "input" in question is the
/// derivation and its closure. /// derivation and its closure.
pub fn build_output_path<'a>( pub fn build_output_path<'a>(
drv_hash: &NixHash, drv_sha256: &[u8; 32],
output_name: &str, output_name: &str,
output_path_name: &'a str, output_path_name: &'a str,
) -> Result<StorePathRef<'a>, Error> { ) -> Result<StorePathRef<'a>, Error> {
build_store_path_from_fingerprint_parts( build_store_path_from_fingerprint_parts(
&(String::from("output:") + output_name), &(String::from("output:") + output_name),
drv_hash, drv_sha256,
output_path_name, output_path_name,
) )
} }
@ -145,18 +142,20 @@ pub fn build_output_path<'a>(
/// but other fingerprints starting with "output:" are also used in Derivation /// but other fingerprints starting with "output:" are also used in Derivation
/// output path calculation. /// output path calculation.
/// ///
/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, /// The fingerprint is hashed with sha256, and its digest is compressed to 20
/// and nixbase32-encoded (32 characters). /// bytes.
/// Inside a StorePath, that digest is printed nixbase32-encoded
/// (32 characters).
fn build_store_path_from_fingerprint_parts<'a>( fn build_store_path_from_fingerprint_parts<'a>(
ty: &str, ty: &str,
hash: &NixHash, inner_digest: &[u8; 32],
name: &'a str, name: &'a str,
) -> Result<StorePathRef<'a>, Error> { ) -> Result<StorePathRef<'a>, Error> {
let digest: [u8; DIGEST_SIZE] = compress_hash(&{ let fingerprint = format!(
let mut h = Sha256::new(); "{ty}:sha256:{}:{STORE_DIR}:{name}",
write!(h, "{ty}:{}:{STORE_DIR}:{name}", hash.to_nix_hex_string()).unwrap(); HEXLOWER.encode(inner_digest)
h.finalize() );
}); let digest: [u8; DIGEST_SIZE] = compress_hash(&Sha256::new_with_prefix(fingerprint).finalize());
// name validation happens in here. // name validation happens in here.
StorePathRef::from_name_and_digest(name, &digest) StorePathRef::from_name_and_digest(name, &digest)