diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index 15c4c6f85..61c448995 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -339,21 +339,27 @@ mod derivation_builtins { // eval data structures. drv.validate(false).map_err(Error::InvalidDerivation)?; - let tmp_replacement_str = - drv.calculate_drv_replacement_str(|drv| known_paths.get_replacement_string(drv)); + // Calculate the derivation_or_fod_hash for the current derivation. + // This one is still intermediate (so not added to known_paths) + let derivation_or_fod_hash_tmp = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); - drv.calculate_output_paths(&name, &tmp_replacement_str) + // Mutate the Derivation struct and set output paths + drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp) .map_err(Error::InvalidDerivation)?; - let actual_replacement_str = - drv.calculate_drv_replacement_str(|drv| known_paths.get_replacement_string(drv)); - let derivation_path = drv .calculate_derivation_path(&name) .map_err(Error::InvalidDerivation)?; - known_paths - .add_replacement_string(derivation_path.to_absolute_path(), &actual_replacement_str); + // recompute the hash derivation modulo and add to known_paths + let derivation_or_fod_hash_final = + drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv)); + + known_paths.add_hash_derivation_modulo( + derivation_path.to_absolute_path(), + &derivation_or_fod_hash_final, + ); // mark all the new paths as known let output_names: Vec = drv.outputs.keys().map(Clone::clone).collect(); diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs index f16fad902..07373ef0d 100644 --- a/tvix/cli/src/known_paths.rs +++ b/tvix/cli/src/known_paths.rs @@ -12,6 +12,7 @@ //! information. use crate::refscan::{ReferenceScanner, STORE_PATH_LEN}; +use nix_compat::nixhash::NixHash; use std::{ collections::{hash_map, BTreeSet, HashMap}, ops::Index, @@ -67,11 +68,10 @@ pub struct KnownPaths { /// path used for reference scanning. paths: HashMap, - /// All known replacement strings for derivations. + /// All known derivation or FOD hashes. /// - /// Keys are derivation paths, values are the opaque replacement - /// strings. - replacements: HashMap, + /// Keys are derivation paths, values is the NixHash. + derivation_or_fod_hashes: HashMap, } impl Index<&PathName> for KnownPaths { @@ -156,25 +156,29 @@ impl KnownPaths { ReferenceScanner::new(candidates) } - /// Fetch the opaque "replacement string" for a given derivation path. - pub fn get_replacement_string(&self, drv: &str) -> String { + /// Fetch the opaque "hash derivation modulo" for a given derivation path. + pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash { // TODO: we rely on an invariant that things *should* have // been calculated if we get this far. - self.replacements[drv].clone() + self.derivation_or_fod_hashes[drv_path].clone() } - pub fn add_replacement_string(&mut self, drv: D, replacement_str: &str) { + pub fn add_hash_derivation_modulo( + &mut self, + drv: D, + hash_derivation_modulo: &NixHash, + ) { #[allow(unused_variables)] // assertions on this only compiled in debug builds let old = self - .replacements - .insert(drv.to_string(), replacement_str.to_owned()); + .derivation_or_fod_hashes + .insert(drv.to_string(), hash_derivation_modulo.to_owned()); #[cfg(debug_assertions)] { if let Some(old) = old { debug_assert!( - old == replacement_str, - "replacement string for a given derivation should always match" + old == *hash_derivation_modulo, + "hash derivation modulo for a given derivation should always be calculated the same" ); } } diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index d348b0d71..e0e18823c 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -150,21 +150,31 @@ impl Derivation { utils::build_store_path(true, &s, name) } - /// Calculate the drv replacement string for a given derivation. + /// Calculates the hash of a derivation modulo fixed-output subderivations. /// - /// This is either called on a struct without output paths populated, - /// to provide the `drv_replacement_str` value for the `calculate_output_paths` - /// function call, or called on a struct with output paths populated, to - /// calculate / cache lookups for calls to fn_get_drv_replacement. + /// This is called `hashDerivationModulo` in nixcpp. /// - /// `fn_get_drv_replacement` is used to look up the drv replacement strings - /// for input_derivations the Derivation refers to. - pub fn calculate_drv_replacement_str(&self, fn_get_drv_replacement: F) -> String + /// It returns a [NixHash], created by calculating the sha256 digest of + /// the derivation ATerm representation, except that: + /// - any input derivation paths have beed replaced "by the result of a + /// recursive call to this function" and that + /// - for fixed-output derivations the special + /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of + /// the A-Term. + /// + /// TODO: what's the representation of ${digest}? + /// + /// If the derivation is not a fixed derivation, it's up to the caller of + /// this function to provide a lookup function to lookup these calculation + /// results of parent derivations at `fn_get_hash_derivation_modulo` (by + /// drv path). + pub fn derivation_or_fod_hash(&self, fn_get_derivation_or_fod_hash: F) -> NixHash where - F: Fn(&str) -> String, + F: Fn(&str) -> NixHash, { let mut hasher = Sha256::new(); let digest = match self.get_fixed_output() { + // Fixed-output derivations return a fixed hash Some((fixed_output_path, fixed_output_hash)) => { hasher.update(format!( "fixed:out:{}:{}:{}", @@ -172,15 +182,24 @@ impl Derivation { )); hasher.finalize() } + // Non-Fixed-output derivations return a hash of the ATerm notation, but with all + // input_derivation paths replaced by a recursive call to this function. + // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. None => { + // This is a new map from derivation_or_fod_hash.digest (as lowerhex) + // to list of output names let mut replaced_input_derivations: BTreeMap> = BTreeMap::new(); - // For each input_derivation, look up the replacement. - for (drv_path, input_derivation) in &self.input_derivations { + // For each input_derivation, look up the + // derivation_or_fod_hash, and replace the derivation path with it's HEXLOWER + // digest. + // This is not the [NixHash::to_nix_hash_string], but without the sha256: prefix). + for (drv_path, output_names) in &self.input_derivations { replaced_input_derivations.insert( - fn_get_drv_replacement(drv_path).to_string(), - input_derivation.clone(), + data_encoding::HEXLOWER + .encode(&fn_get_derivation_or_fod_hash(&drv_path).digest), + output_names.clone(), ); } @@ -196,8 +215,7 @@ impl Derivation { hasher.finalize() } }; - - format!("{:x}", digest) + NixHash::new(crate::nixhash::HashAlgo::Sha256, digest.to_vec()) } /// This calculates all output paths of a Derivation and updates the struct. @@ -205,17 +223,13 @@ impl Derivation { /// This means, self.outputs[$outputName].path needs to be an empty string, /// and self.environment[$outputName] needs to be an empty string. /// - /// Output path calculation requires knowledge of "drv replacement - /// strings", and in case of non-fixed-output derivations, also knowledge - /// of "drv replacement" strings (recursively) of all input derivations. + /// Output path calculation requires knowledge of the + /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output + /// derivations) also requires knowledge of other hash_derivation_modulo + /// [NixHash]es. /// - /// We solve this by asking the caller of this function to provide - /// the drv replacement string of the current derivation itself, - /// which is ran on the struct without output paths. - /// - /// This sound terribly ugly, but won't be too much of a concern later on, as - /// naming fixed-output paths once uploaded will be a tvix-store concern, - /// so there's no need to calculate them here anymore. + /// We solve this by asking the caller of this function to provide the + /// hash_derivation_modulo of the current Derivation. /// /// On completion, self.environment[$outputName] and /// self.outputs[$outputName].path are set to the calculated output path for all @@ -223,7 +237,7 @@ impl Derivation { pub fn calculate_output_paths( &mut self, name: &str, - drv_replacement_str: &str, + derivation_or_fod_hash: &NixHash, ) -> Result<(), DerivationError> { // Check if the Derivation is fixed output, because they cause // different fingerprints to be hashed. @@ -244,9 +258,9 @@ impl Derivation { } let s = &format!( - "output:{}:sha256:{}:{}:{}", + "output:{}:{}:{}:{}", output_name, - drv_replacement_str, + derivation_or_fod_hash.to_nix_hash_string(), store_path::STORE_DIR, output_path_name, ); @@ -277,9 +291,9 @@ impl Derivation { fixed_output_hash.digest, // nixbase32 )); } else { - s.push_str("output:out:sha256:"); + s.push_str("output:out:"); // This is drv_replacement for FOD, with an empty fixed_output_path. - s.push_str(drv_replacement_str); + s.push_str(&derivation_or_fod_hash.to_nix_hash_string()); } s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name)); s diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 34f6cae67..1a4f8ce23 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -96,16 +96,16 @@ fn derivation_with_trimmed_outputs(derivation: &Derivation) -> Derivation { } } -#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] -#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] -fn replacement_drv_path(drv_path: &str, expected_replacement_str: &str) { +#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] +#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] +fn hash_derivation_modulo(drv_path: &str, expected_nix_hash_string: &str) { // read in the fixture let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); - let drv_replacement_str = drv.calculate_drv_replacement_str(|_| panic!("must not be called")); + let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); - assert_eq!(expected_replacement_str, drv_replacement_str); + assert_eq!(expected_nix_hash_string, actual.to_nix_hash_string()); } #[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] @@ -122,16 +122,16 @@ fn output_paths(name: &str, drv_path: &str) { let mut derivation = derivation_with_trimmed_outputs(&expected_derivation); - // calculate the drv replacement string. + // calculate the derivation_or_fod_hash of derivation // We don't expect the lookup function to be called for most derivations. - let replacement_str = derivation.calculate_drv_replacement_str(|drv_name| { + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| { // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv if name == "foo" && ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" - && drv_name == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") + && parent_drv_path == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") || (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" - && drv_name == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) + && parent_drv_path == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) { // do the lookup, by reading in the fixture of the requested // drv_name, and calculating its drv replacement (on the non-stripped version) @@ -140,22 +140,25 @@ fn output_paths(name: &str, drv_path: &str) { let data = read_file(&format!( "{}/{}.json", RESOURCES_PATHS, - Path::new(drv_name).file_name().unwrap().to_string_lossy() + Path::new(parent_drv_path) + .file_name() + .unwrap() + .to_string_lossy() )); let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); - // calculate replacement string. These don't trigger any subsequent requests, as they're both FOD. - drv.calculate_drv_replacement_str(|_| panic!("must not lookup")) + // calculate derivation_or_fod_hash for each parent. + // This may not trigger subsequent requests, as both parents are FOD. + drv.derivation_or_fod_hash(|_| panic!("must not lookup")) } else { // we only expect this to be called in the "foo" testcase, for the "bar derivations" panic!("may only be called for foo testcase on bar derivations"); } }); - // We need to calculate the replacement_str, as fixed-sha1 does use it. derivation - .calculate_output_paths(name, &replacement_str) + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) .unwrap(); // The derivation should now look like it was before @@ -226,7 +229,7 @@ fn output_path_construction() { // calculate bar output paths let bar_calc_result = bar_drv.calculate_output_paths( "bar", - &bar_drv.calculate_drv_replacement_str(|_| panic!("is FOD, should not lookup")), + &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")), ); assert!(bar_calc_result.is_ok()); @@ -241,8 +244,8 @@ fn output_path_construction() { // now construct foo, which requires bar_drv // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; - let bar_drv_replacement_str = - &bar_drv.calculate_drv_replacement_str(|_| panic!("is FOD, should not lookup")); + let bar_drv_derivation_or_fod_hash = + bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")); let bar_drv_path = bar_drv .calculate_derivation_path("bar") @@ -281,11 +284,11 @@ fn output_path_construction() { // calculate foo output paths let foo_calc_result = foo_drv.calculate_output_paths( "foo", - &foo_drv.calculate_drv_replacement_str(|drv_name| { - if drv_name != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { - panic!("lookup called with unexpected drv_name: {}", drv_name); + &foo_drv.derivation_or_fod_hash(|drv_path| { + if drv_path != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { + panic!("lookup called with unexpected drv_path: {}", drv_path); } - bar_drv_replacement_str.clone() + bar_drv_derivation_or_fod_hash.clone() }), ); assert!(foo_calc_result.is_ok());