From c0d54393627748187c05bf0c7dd59cc16efd7f61 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 2 May 2024 11:45:48 +0300 Subject: [PATCH] refactor(nix-compat): derivation_or_fod_hash -> hash_derivation_modulo There's no need for us to come up with our own names for this. Also update the comments/docstrings a bit, and inline the intermediate hash_derivation_modulo calculation. Change-Id: I09dab8ffe1ebfb6601841e98119eee4ff25d8f39 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11578 Reviewed-by: edef Autosubmit: flokli Tested-by: BuildkiteCI --- tvix/glue/src/builtins/derivation.rs | 26 +++++++----- tvix/glue/src/known_paths.rs | 2 +- tvix/nix-compat/src/derivation/mod.rs | 45 +++++++++++---------- tvix/nix-compat/src/derivation/tests/mod.rs | 32 +++++++-------- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 37d2460d9..a7742ae40 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -457,18 +457,24 @@ pub(crate) mod derivation_builtins { drv.validate(false) .map_err(DerivationError::InvalidDerivation)?; - // Calculate the derivation_or_fod_hash for the current derivation. - // This one is still intermediate (so not added to known_paths) - let derivation_or_fod_hash_tmp = drv.derivation_or_fod_hash(|drv_path| { - known_paths - .get_hash_derivation_modulo(&drv_path.to_owned()) - .unwrap_or_else(|| panic!("{} not found", drv_path)) - .to_owned() - }); + // Calculate the hash_derivation_modulo for the current derivation.. + debug_assert!( + drv.outputs.values().all(|output| { output.path.is_none() }), + "outputs should still be unset" + ); // Mutate the Derivation struct and set output paths - drv.calculate_output_paths(name, &derivation_or_fod_hash_tmp) - .map_err(DerivationError::InvalidDerivation)?; + drv.calculate_output_paths( + name, + // This one is still intermediate (so not added to known_paths), + // as the outputs are still unset. + &drv.hash_derivation_modulo(|drv_path| { + *known_paths + .get_hash_derivation_modulo(&drv_path.to_owned()) + .unwrap_or_else(|| panic!("{} not found", drv_path)) + }), + ) + .map_err(DerivationError::InvalidDerivation)?; let drv_path = drv .calculate_derivation_path(name) diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index c95065592..290c9d5b6 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -73,7 +73,7 @@ impl KnownPaths { } // compute the hash derivation modulo - let hash_derivation_modulo = drv.derivation_or_fod_hash(|drv_path| { + let hash_derivation_modulo = drv.hash_derivation_modulo(|drv_path| { self.get_hash_derivation_modulo(&drv_path.to_owned()) .unwrap_or_else(|| panic!("{} not found", drv_path)) .to_owned() diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 07da127ed..6e12e3ea8 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -188,11 +188,12 @@ impl Derivation { /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of /// the A-Term. /// - /// If the derivation is not a fixed derivation, it's up to the caller of - /// this function to provide a lookup function to lookup these calculation - /// results of parent derivations at `fn_get_derivation_or_fod_hash` (by - /// drv path). - pub fn derivation_or_fod_hash(&self, fn_get_derivation_or_fod_hash: F) -> [u8; 32] + /// It's up to the caller of this function to provide a (infallible) lookup + /// function to query [hash_derivation_modulo] of direct input derivations, + /// by their [StorePathRef]. + /// It will only be called in case the derivation is not a fixed-output + /// derivation. + pub fn hash_derivation_modulo(&self, fn_lookup_hash_derivation_modulo: F) -> [u8; 32] where F: Fn(&StorePathRef) -> [u8; 32], { @@ -200,16 +201,16 @@ impl Derivation { // Non-Fixed-output derivations return the sha256 digest of the ATerm // notation, but with all input_derivation paths replaced by a recursive // call to this function. - // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. + // We call [fn_lookup_hash_derivation_modulo] rather than recursing + // ourselves, so callers can precompute this. self.fod_digest().unwrap_or({ - // For each input_derivation, look up the - // derivation_or_fod_hash, and replace the derivation path with - // it's HEXLOWER digest. + // For each input_derivation, look up the hash derivation modulo, + // and replace the derivation path in the aterm with it's HEXLOWER digest. let aterm_bytes = self.to_aterm_bytes_with_replacements(&BTreeMap::from_iter( self.input_derivations .iter() .map(|(drv_path, output_names)| { - let hash = fn_get_derivation_or_fod_hash(&drv_path.into()); + let hash = fn_lookup_hash_derivation_modulo(&drv_path.into()); (hash, output_names.to_owned()) }), @@ -226,20 +227,22 @@ impl Derivation { /// and self.environment[$outputName] needs to be an empty string. /// /// Output path calculation requires knowledge of the - /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output - /// derivations) also requires knowledge of other hash_derivation_modulo - /// [NixHash]es. + /// [hash_derivation_modulo], which (in case of non-fixed-output + /// derivations) also requires knowledge of the [hash_derivation_modulo] of + /// input derivations (recursively). /// - /// We solve this by asking the caller of this function to provide the - /// hash_derivation_modulo of the current Derivation. + /// To avoid recursing and doing unnecessary calculation, we simply + /// ask the caller of this function to provide the result of the + /// [hash_derivation_modulo] call of the current [Derivation], + /// and leave it up to them to calculate it when needed. /// - /// On completion, self.environment[$outputName] and - /// self.outputs[$outputName].path are set to the calculated output path for all + /// On completion, `self.environment[$outputName]` and + /// `self.outputs[$outputName].path` are set to the calculated output path for all /// outputs. pub fn calculate_output_paths( &mut self, name: &str, - derivation_or_fod_hash: &[u8; 32], + hash_derivation_modulo: &[u8; 32], ) -> Result<(), DerivationError> { // The fingerprint and hash differs per output for (output_name, output) in self.outputs.iter_mut() { @@ -250,14 +253,14 @@ impl Derivation { let path_name = output_path_name(name, output_name); - // For fixed output derivation we use the per-output info, otherwise we use the - // derivation hash. + // For fixed output derivation we use [build_ca_path], otherwise we + // use [build_output_path] with [hash_derivation_modulo]. let abs_store_path = if let Some(ref hwm) = output.ca_hash { build_ca_path(&path_name, hwm, Vec::::new(), false).map_err(|e| { DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e) })? } else { - build_output_path(derivation_or_fod_hash, output_name, &path_name).map_err(|e| { + build_output_path(hash_derivation_modulo, output_name, &path_name).map_err(|e| { DerivationError::InvalidOutputDerivationPath( output_name.to_string(), store_path::BuildStorePathError::InvalidStorePath(e), diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 63a65356b..48d4e8926 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -164,7 +164,7 @@ fn derivation_path(#[case] name: &str, #[case] expected_path: &str) { /// This trims all output paths from a Derivation struct, /// by setting outputs[$outputName].path and environment[$outputName] to the empty string. -fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { +fn derivation_without_output_paths(derivation: &Derivation) -> Derivation { let mut trimmed_env = derivation.environment.clone(); let mut trimmed_outputs = derivation.outputs.clone(); @@ -191,13 +191,13 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { #[rstest] #[case::fixed_sha256("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"))] #[case::fixed_sha1("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df"))] -fn derivation_or_fod_hash(#[case] drv_path: &str, #[case] expected_digest: [u8; 32]) { +fn hash_derivation_modulo_fixed(#[case] drv_path: &str, #[case] expected_digest: [u8; 32]) { // read in the fixture let json_bytes = fs::read(format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)).expect("unable to read JSON"); let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); - let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); + let actual = drv.hash_derivation_modulo(|_| panic!("must not be called")); assert_eq!(expected_digest, actual); } @@ -224,13 +224,13 @@ fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { ) .expect("must succeed"); - // create a version with trimmed output paths, simulating we constructed - // the struct. - let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); + // create a version without output paths, simulating we constructed the + // struct. + let mut derivation = derivation_without_output_paths(&expected_derivation); - // calculate the derivation_or_fod_hash of derivation + // calculate the hash_derivation_modulo of Derivation // We don't expect the lookup function to be called for most derivations. - let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| { + let actual_hash_derivation_modulo = derivation.hash_derivation_modulo(|parent_drv_path| { // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv if name == "foo" @@ -255,9 +255,9 @@ fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); - // calculate derivation_or_fod_hash for each parent. + // calculate hash_derivation_modulo for each parent. // This may not trigger subsequent requests, as both parents are FOD. - drv.derivation_or_fod_hash(|_| panic!("must not lookup")) + drv.hash_derivation_modulo(|_| panic!("must not lookup")) } else { // we only expect this to be called in the "foo" testcase, for the "bar derivations" panic!("may only be called for foo testcase on bar derivations"); @@ -265,7 +265,7 @@ fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { }); derivation - .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .calculate_output_paths(name, &actual_hash_derivation_modulo) .unwrap(); // The derivation should now look like it was before @@ -343,7 +343,7 @@ fn output_path_construction() { // calculate bar output paths let bar_calc_result = bar_drv.calculate_output_paths( "bar", - &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")), + &bar_drv.hash_derivation_modulo(|_| panic!("is FOD, should not lookup")), ); assert!(bar_calc_result.is_ok()); @@ -360,8 +360,8 @@ fn output_path_construction() { // now construct foo, which requires bar_drv // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; - let bar_drv_derivation_or_fod_hash = - bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")); + let bar_drv_hash_derivation_modulo = + bar_drv.hash_derivation_modulo(|_| panic!("is FOD, should not lookup")); let bar_drv_path = bar_drv .calculate_derivation_path("bar") @@ -408,11 +408,11 @@ fn output_path_construction() { // calculate foo output paths let foo_calc_result = foo_drv.calculate_output_paths( "foo", - &foo_drv.derivation_or_fod_hash(|drv_path| { + &foo_drv.hash_derivation_modulo(|drv_path| { if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { panic!("lookup called with unexpected drv_path: {}", drv_path); } - bar_drv_derivation_or_fod_hash + bar_drv_hash_derivation_modulo }), ); assert!(foo_calc_result.is_ok());