refactor(tvix/nix-compat): replace calculate_drv_replacement_str

Call this function derivation_or_fod_hash, and return a NixHash.

This is more in line with how cppnix calls this, and allows using
to_nix_hash_string() in some places.

Change-Id: Iebf5355f08ed5c9a044844739350f829f874f0ce
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8293
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2023-03-13 23:52:23 +01:00 committed by flokli
parent 32999cb6f6
commit 5f260edf7f
4 changed files with 98 additions and 71 deletions

View file

@ -339,21 +339,27 @@ mod derivation_builtins {
// eval data structures. // eval data structures.
drv.validate(false).map_err(Error::InvalidDerivation)?; drv.validate(false).map_err(Error::InvalidDerivation)?;
let tmp_replacement_str = // Calculate the derivation_or_fod_hash for the current derivation.
drv.calculate_drv_replacement_str(|drv| known_paths.get_replacement_string(drv)); // This one is still intermediate (so not added to known_paths)
let derivation_or_fod_hash_tmp =
drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
drv.calculate_output_paths(&name, &tmp_replacement_str) // Mutate the Derivation struct and set output paths
drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp)
.map_err(Error::InvalidDerivation)?; .map_err(Error::InvalidDerivation)?;
let actual_replacement_str =
drv.calculate_drv_replacement_str(|drv| known_paths.get_replacement_string(drv));
let derivation_path = drv let derivation_path = drv
.calculate_derivation_path(&name) .calculate_derivation_path(&name)
.map_err(Error::InvalidDerivation)?; .map_err(Error::InvalidDerivation)?;
known_paths // recompute the hash derivation modulo and add to known_paths
.add_replacement_string(derivation_path.to_absolute_path(), &actual_replacement_str); let derivation_or_fod_hash_final =
drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
known_paths.add_hash_derivation_modulo(
derivation_path.to_absolute_path(),
&derivation_or_fod_hash_final,
);
// mark all the new paths as known // mark all the new paths as known
let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect(); let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect();

View file

@ -12,6 +12,7 @@
//! information. //! information.
use crate::refscan::{ReferenceScanner, STORE_PATH_LEN}; use crate::refscan::{ReferenceScanner, STORE_PATH_LEN};
use nix_compat::nixhash::NixHash;
use std::{ use std::{
collections::{hash_map, BTreeSet, HashMap}, collections::{hash_map, BTreeSet, HashMap},
ops::Index, ops::Index,
@ -67,11 +68,10 @@ pub struct KnownPaths {
/// path used for reference scanning. /// path used for reference scanning.
paths: HashMap<PathName, KnownPath>, paths: HashMap<PathName, KnownPath>,
/// All known replacement strings for derivations. /// All known derivation or FOD hashes.
/// ///
/// Keys are derivation paths, values are the opaque replacement /// Keys are derivation paths, values is the NixHash.
/// strings. derivation_or_fod_hashes: HashMap<String, NixHash>,
replacements: HashMap<String, String>,
} }
impl Index<&PathName> for KnownPaths { impl Index<&PathName> for KnownPaths {
@ -156,25 +156,29 @@ impl KnownPaths {
ReferenceScanner::new(candidates) ReferenceScanner::new(candidates)
} }
/// Fetch the opaque "replacement string" for a given derivation path. /// Fetch the opaque "hash derivation modulo" for a given derivation path.
pub fn get_replacement_string(&self, drv: &str) -> String { pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash {
// TODO: we rely on an invariant that things *should* have // TODO: we rely on an invariant that things *should* have
// been calculated if we get this far. // been calculated if we get this far.
self.replacements[drv].clone() self.derivation_or_fod_hashes[drv_path].clone()
} }
pub fn add_replacement_string<D: ToString>(&mut self, drv: D, replacement_str: &str) { pub fn add_hash_derivation_modulo<D: ToString>(
&mut self,
drv: D,
hash_derivation_modulo: &NixHash,
) {
#[allow(unused_variables)] // assertions on this only compiled in debug builds #[allow(unused_variables)] // assertions on this only compiled in debug builds
let old = self let old = self
.replacements .derivation_or_fod_hashes
.insert(drv.to_string(), replacement_str.to_owned()); .insert(drv.to_string(), hash_derivation_modulo.to_owned());
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
{ {
if let Some(old) = old { if let Some(old) = old {
debug_assert!( debug_assert!(
old == replacement_str, old == *hash_derivation_modulo,
"replacement string for a given derivation should always match" "hash derivation modulo for a given derivation should always be calculated the same"
); );
} }
} }

View file

@ -150,21 +150,31 @@ impl Derivation {
utils::build_store_path(true, &s, name) utils::build_store_path(true, &s, name)
} }
/// Calculate the drv replacement string for a given derivation. /// Calculates the hash of a derivation modulo fixed-output subderivations.
/// ///
/// This is either called on a struct without output paths populated, /// This is called `hashDerivationModulo` in nixcpp.
/// to provide the `drv_replacement_str` value for the `calculate_output_paths`
/// function call, or called on a struct with output paths populated, to
/// calculate / cache lookups for calls to fn_get_drv_replacement.
/// ///
/// `fn_get_drv_replacement` is used to look up the drv replacement strings /// It returns a [NixHash], created by calculating the sha256 digest of
/// for input_derivations the Derivation refers to. /// the derivation ATerm representation, except that:
pub fn calculate_drv_replacement_str<F>(&self, fn_get_drv_replacement: F) -> String /// - any input derivation paths have beed replaced "by the result of a
/// recursive call to this function" and that
/// - for fixed-output derivations the special
/// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of
/// the A-Term.
///
/// TODO: what's the representation of ${digest}?
///
/// If the derivation is not a fixed derivation, it's up to the caller of
/// this function to provide a lookup function to lookup these calculation
/// results of parent derivations at `fn_get_hash_derivation_modulo` (by
/// drv path).
pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash
where where
F: Fn(&str) -> String, F: Fn(&str) -> NixHash,
{ {
let mut hasher = Sha256::new(); let mut hasher = Sha256::new();
let digest = match self.get_fixed_output() { let digest = match self.get_fixed_output() {
// Fixed-output derivations return a fixed hash
Some((fixed_output_path, fixed_output_hash)) => { Some((fixed_output_path, fixed_output_hash)) => {
hasher.update(format!( hasher.update(format!(
"fixed:out:{}:{}:{}", "fixed:out:{}:{}:{}",
@ -172,15 +182,24 @@ impl Derivation {
)); ));
hasher.finalize() hasher.finalize()
} }
// Non-Fixed-output derivations return a hash of the ATerm notation, but with all
// input_derivation paths replaced by a recursive call to this function.
// We use fn_get_derivation_or_fod_hash here, so callers can precompute this.
None => { None => {
// This is a new map from derivation_or_fod_hash.digest (as lowerhex)
// to list of output names
let mut replaced_input_derivations: BTreeMap<String, BTreeSet<String>> = let mut replaced_input_derivations: BTreeMap<String, BTreeSet<String>> =
BTreeMap::new(); BTreeMap::new();
// For each input_derivation, look up the replacement. // For each input_derivation, look up the
for (drv_path, input_derivation) in &self.input_derivations { // derivation_or_fod_hash, and replace the derivation path with it's HEXLOWER
// digest.
// This is not the [NixHash::to_nix_hash_string], but without the sha256: prefix).
for (drv_path, output_names) in &self.input_derivations {
replaced_input_derivations.insert( replaced_input_derivations.insert(
fn_get_drv_replacement(drv_path).to_string(), data_encoding::HEXLOWER
input_derivation.clone(), .encode(&fn_get_derivation_or_fod_hash(&drv_path).digest),
output_names.clone(),
); );
} }
@ -196,8 +215,7 @@ impl Derivation {
hasher.finalize() hasher.finalize()
} }
}; };
NixHash::new(crate::nixhash::HashAlgo::Sha256, digest.to_vec())
format!("{:x}", digest)
} }
/// This calculates all output paths of a Derivation and updates the struct. /// This calculates all output paths of a Derivation and updates the struct.
@ -205,17 +223,13 @@ impl Derivation {
/// This means, self.outputs[$outputName].path needs to be an empty string, /// This means, self.outputs[$outputName].path needs to be an empty string,
/// and self.environment[$outputName] needs to be an empty string. /// and self.environment[$outputName] needs to be an empty string.
/// ///
/// Output path calculation requires knowledge of "drv replacement /// Output path calculation requires knowledge of the
/// strings", and in case of non-fixed-output derivations, also knowledge /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output
/// of "drv replacement" strings (recursively) of all input derivations. /// derivations) also requires knowledge of other hash_derivation_modulo
/// [NixHash]es.
/// ///
/// We solve this by asking the caller of this function to provide /// We solve this by asking the caller of this function to provide the
/// the drv replacement string of the current derivation itself, /// hash_derivation_modulo of the current Derivation.
/// which is ran on the struct without output paths.
///
/// This sound terribly ugly, but won't be too much of a concern later on, as
/// naming fixed-output paths once uploaded will be a tvix-store concern,
/// so there's no need to calculate them here anymore.
/// ///
/// On completion, self.environment[$outputName] and /// On completion, self.environment[$outputName] and
/// self.outputs[$outputName].path are set to the calculated output path for all /// self.outputs[$outputName].path are set to the calculated output path for all
@ -223,7 +237,7 @@ impl Derivation {
pub fn calculate_output_paths( pub fn calculate_output_paths(
&mut self, &mut self,
name: &str, name: &str,
drv_replacement_str: &str, derivation_or_fod_hash: &NixHash,
) -> Result<(), DerivationError> { ) -> Result<(), DerivationError> {
// Check if the Derivation is fixed output, because they cause // Check if the Derivation is fixed output, because they cause
// different fingerprints to be hashed. // different fingerprints to be hashed.
@ -244,9 +258,9 @@ impl Derivation {
} }
let s = &format!( let s = &format!(
"output:{}:sha256:{}:{}:{}", "output:{}:{}:{}:{}",
output_name, output_name,
drv_replacement_str, derivation_or_fod_hash.to_nix_hash_string(),
store_path::STORE_DIR, store_path::STORE_DIR,
output_path_name, output_path_name,
); );
@ -277,9 +291,9 @@ impl Derivation {
fixed_output_hash.digest, // nixbase32 fixed_output_hash.digest, // nixbase32
)); ));
} else { } else {
s.push_str("output:out:sha256:"); s.push_str("output:out:");
// This is drv_replacement for FOD, with an empty fixed_output_path. // This is drv_replacement for FOD, with an empty fixed_output_path.
s.push_str(drv_replacement_str); s.push_str(&derivation_or_fod_hash.to_nix_hash_string());
} }
s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name)); s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name));
s s

View file

@ -96,16 +96,16 @@ fn derivation_with_trimmed_outputs(derivation: &Derivation) -> Derivation {
} }
} }
#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] #[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")]
#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] #[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")]
fn replacement_drv_path(drv_path: &str, expected_replacement_str: &str) { fn hash_derivation_modulo(drv_path: &str, expected_nix_hash_string: &str) {
// read in the fixture // read in the fixture
let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path));
let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); let drv: Derivation = serde_json::from_str(&data).expect("must deserialize");
let drv_replacement_str = drv.calculate_drv_replacement_str(|_| panic!("must not be called")); let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called"));
assert_eq!(expected_replacement_str, drv_replacement_str); assert_eq!(expected_nix_hash_string, actual.to_nix_hash_string());
} }
#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] #[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")]
@ -122,16 +122,16 @@ fn output_paths(name: &str, drv_path: &str) {
let mut derivation = derivation_with_trimmed_outputs(&expected_derivation); let mut derivation = derivation_with_trimmed_outputs(&expected_derivation);
// calculate the drv replacement string. // calculate the derivation_or_fod_hash of derivation
// We don't expect the lookup function to be called for most derivations. // We don't expect the lookup function to be called for most derivations.
let replacement_str = derivation.calculate_drv_replacement_str(|drv_name| { let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| {
// 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv
// ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv
if name == "foo" if name == "foo"
&& ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" && ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"
&& drv_name == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") && parent_drv_path == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv")
|| (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" || (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"
&& drv_name == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) && parent_drv_path == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"))
{ {
// do the lookup, by reading in the fixture of the requested // do the lookup, by reading in the fixture of the requested
// drv_name, and calculating its drv replacement (on the non-stripped version) // drv_name, and calculating its drv replacement (on the non-stripped version)
@ -140,22 +140,25 @@ fn output_paths(name: &str, drv_path: &str) {
let data = read_file(&format!( let data = read_file(&format!(
"{}/{}.json", "{}/{}.json",
RESOURCES_PATHS, RESOURCES_PATHS,
Path::new(drv_name).file_name().unwrap().to_string_lossy() Path::new(parent_drv_path)
.file_name()
.unwrap()
.to_string_lossy()
)); ));
let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); let drv: Derivation = serde_json::from_str(&data).expect("must deserialize");
// calculate replacement string. These don't trigger any subsequent requests, as they're both FOD. // calculate derivation_or_fod_hash for each parent.
drv.calculate_drv_replacement_str(|_| panic!("must not lookup")) // This may not trigger subsequent requests, as both parents are FOD.
drv.derivation_or_fod_hash(|_| panic!("must not lookup"))
} else { } else {
// we only expect this to be called in the "foo" testcase, for the "bar derivations" // we only expect this to be called in the "foo" testcase, for the "bar derivations"
panic!("may only be called for foo testcase on bar derivations"); panic!("may only be called for foo testcase on bar derivations");
} }
}); });
// We need to calculate the replacement_str, as fixed-sha1 does use it.
derivation derivation
.calculate_output_paths(name, &replacement_str) .calculate_output_paths(name, &calculated_derivation_or_fod_hash)
.unwrap(); .unwrap();
// The derivation should now look like it was before // The derivation should now look like it was before
@ -226,7 +229,7 @@ fn output_path_construction() {
// calculate bar output paths // calculate bar output paths
let bar_calc_result = bar_drv.calculate_output_paths( let bar_calc_result = bar_drv.calculate_output_paths(
"bar", "bar",
&bar_drv.calculate_drv_replacement_str(|_| panic!("is FOD, should not lookup")), &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")),
); );
assert!(bar_calc_result.is_ok()); assert!(bar_calc_result.is_ok());
@ -241,8 +244,8 @@ fn output_path_construction() {
// now construct foo, which requires bar_drv // now construct foo, which requires bar_drv
// Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar.
let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path;
let bar_drv_replacement_str = let bar_drv_derivation_or_fod_hash =
&bar_drv.calculate_drv_replacement_str(|_| panic!("is FOD, should not lookup")); bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup"));
let bar_drv_path = bar_drv let bar_drv_path = bar_drv
.calculate_derivation_path("bar") .calculate_derivation_path("bar")
@ -281,11 +284,11 @@ fn output_path_construction() {
// calculate foo output paths // calculate foo output paths
let foo_calc_result = foo_drv.calculate_output_paths( let foo_calc_result = foo_drv.calculate_output_paths(
"foo", "foo",
&foo_drv.calculate_drv_replacement_str(|drv_name| { &foo_drv.derivation_or_fod_hash(|drv_path| {
if drv_name != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { if drv_path != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" {
panic!("lookup called with unexpected drv_name: {}", drv_name); panic!("lookup called with unexpected drv_path: {}", drv_path);
} }
bar_drv_replacement_str.clone() bar_drv_derivation_or_fod_hash.clone()
}), }),
); );
assert!(foo_calc_result.is_ok()); assert!(foo_calc_result.is_ok());