From c06fb01b3b7a752e0c04ba21a02cdc3f431055e1 Mon Sep 17 00:00:00 2001 From: Peter Kolloch Date: Wed, 21 Feb 2024 18:24:50 +0700 Subject: [PATCH] feat(tvix/nix-compat): input_derivations with StorePaths ...in `Derivation`. This is more type-safe and should consume less memory. This also removes some allocations in the potentially hot path of output hash calculation. https: //b.tvl.fyi/issues/264 Change-Id: I6ad7d3cb868dc9f750894d449a6065608ef06e8c Reviewed-on: https://cl.tvl.fyi/c/depot/+/10957 Tested-by: BuildkiteCI Reviewed-by: flokli Autosubmit: Peter Kolloch Reviewed-by: Peter Kolloch --- tvix/glue/src/builtins/derivation.rs | 19 ++++- tvix/glue/src/known_paths.rs | 10 +-- tvix/glue/src/tvix_store_io.rs | 8 +-- tvix/nix-compat/src/derivation/mod.rs | 31 +++----- tvix/nix-compat/src/derivation/parser.rs | 42 ++++++++--- tvix/nix-compat/src/derivation/tests/mod.rs | 9 ++- tvix/nix-compat/src/derivation/validate.rs | 9 +-- tvix/nix-compat/src/derivation/write.rs | 4 +- tvix/nix-compat/src/store_path/mod.rs | 79 ++++++++++++++++++--- 9 files changed, 138 insertions(+), 73 deletions(-) diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 1ee8d531c..1208ca94d 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -4,6 +4,7 @@ use crate::tvix_store_io::TvixStoreIO; use bstr::BString; use nix_compat::derivation::{Derivation, Output}; use nix_compat::nixhash; +use nix_compat::store_path::StorePath; use std::collections::{btree_map, BTreeSet}; use std::rc::Rc; use tvix_eval::builtin_macros::builtins; @@ -26,7 +27,23 @@ fn populate_inputs(drv: &mut Derivation, full_context: NixContext) { drv.input_sources.insert(source.clone()); } - NixContextElement::Single { name, derivation } => { + NixContextElement::Single { + name, + derivation: derivation_str, + } => { + // TODO: b/264 + // We assume derivations to be passed validated, so ignoring rest + // and expecting parsing is ok. + let (derivation, _rest) = + StorePath::from_absolute_path_full(derivation_str).expect("valid store path"); + + #[cfg(debug_assertions)] + assert!( + _rest.iter().next().is_none(), + "Extra path not empty for {}", + derivation_str + ); + match drv.input_derivations.entry(derivation.clone()) { btree_map::Entry::Vacant(entry) => { entry.insert(BTreeSet::from([name.clone()])); diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index 03b04d4fc..bac7e34a7 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -59,14 +59,8 @@ impl KnownPaths { // check input derivations to have been inserted. #[cfg(debug_assertions)] { - // TODO: b/264 - // We assume derivations to be passed validated, so ignoring rest - // and expecting parsing is ok. - for input_drv_path_str in drv.input_derivations.keys() { - let (input_drv_path, _rest) = - StorePath::from_absolute_path_full(input_drv_path_str) - .expect("parse input drv path"); - debug_assert!(self.derivations.contains_key(&input_drv_path)); + for input_drv_path in drv.input_derivations.keys() { + debug_assert!(self.derivations.contains_key(input_drv_path)); } } diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 375501b65..296a369e2 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -140,17 +140,11 @@ impl TvixStoreIO { let input_nodes: BTreeSet = futures::stream::iter(drv.input_derivations.iter()) .map(|(input_drv_path, output_names)| { - // since Derivation is validated, we know this can be parsed. - let input_drv_path = - StorePathRef::from_absolute_path(input_drv_path.as_bytes()) - .expect("invalid drv path") - .to_owned(); - // look up the derivation object let input_drv = { let known_paths = self.known_paths.borrow(); known_paths - .get_drv_by_drvpath(&input_drv_path) + .get_drv_by_drvpath(input_drv_path) .unwrap_or_else(|| panic!("{} not found", input_drv_path)) .to_owned() }; diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 3a09c9d56..3cf9a5ba7 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -36,7 +36,7 @@ pub struct Derivation { /// Map from drv path to output names used from this derivation. #[serde(rename = "inputDrvs")] - pub input_derivations: BTreeMap>, + pub input_derivations: BTreeMap>, /// Plain store paths of additional inputs. #[serde(rename = "inputSrcs")] @@ -70,19 +70,7 @@ impl Derivation { write_outputs(writer, &self.outputs)?; write_char(writer, COMMA)?; - // To reproduce the exact hashes of original nix, we need to sort - // these by their aterm representation. - // StorePath has a different sort order, so we convert it here. - let input_derivations: BTreeMap> = input_derivations - .iter() - .map(|(k, v)| { - let mut aterm_k = Vec::new(); - k.aterm_write(&mut aterm_k).expect("no write error to Vec"); - (BString::new(aterm_k), v) - }) - .collect(); - - write_input_derivations(writer, &input_derivations)?; + write_input_derivations(writer, input_derivations)?; write_char(writer, COMMA)?; write_input_sources(writer, &self.input_sources)?; @@ -145,8 +133,11 @@ impl Derivation { // into a (sorted, guaranteed by BTreeSet) list of references let references: BTreeSet = { let mut inputs = self.input_sources.clone(); - let input_derivation_keys: Vec = - self.input_derivations.keys().cloned().collect(); + let input_derivation_keys: Vec = self + .input_derivations + .keys() + .map(|k| k.to_absolute_path()) + .collect(); inputs.extend(input_derivation_keys); inputs }; @@ -211,12 +202,8 @@ impl Derivation { // derivation_or_fod_hash, and replace the derivation path with // it's HEXLOWER digest. let input_derivations = BTreeMap::from_iter(self.input_derivations.iter().map( - |(drv_path_str, output_names)| { - // parse drv_path to StorePathRef - let drv_path = StorePathRef::from_absolute_path(drv_path_str.as_bytes()) - .expect("invalid input derivation path"); - - let hash = fn_get_derivation_or_fod_hash(&drv_path); + |(drv_path, output_names)| { + let hash = fn_get_derivation_or_fod_hash(&drv_path.into()); (hash, output_names.to_owned()) }, diff --git a/tvix/nix-compat/src/derivation/parser.rs b/tvix/nix-compat/src/derivation/parser.rs index 7ffa6fd46..2cfcf2eae 100644 --- a/tvix/nix-compat/src/derivation/parser.rs +++ b/tvix/nix-compat/src/derivation/parser.rs @@ -14,6 +14,7 @@ use thiserror; use crate::derivation::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; use crate::derivation::{write, CAHash, Derivation, Output}; +use crate::store_path::{self, StorePath, StorePathRef}; use crate::{aterm, nixhash}; #[derive(Debug, thiserror::Error)] @@ -142,11 +143,11 @@ fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap> { } } -fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap>> { +fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap>> { let (i, input_derivations_list) = parse_kv::, _>(aterm::parse_str_list)(i)?; // This is a HashMap of drv paths to a list of output names. - let mut input_derivations: BTreeMap> = BTreeMap::new(); + let mut input_derivations: BTreeMap> = BTreeMap::new(); for (input_derivation, output_names) in input_derivations_list { let mut new_output_names = BTreeSet::new(); @@ -159,10 +160,26 @@ fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap> = { + static ref EXP_INPUT_DERIVATIONS_SIMPLE: BTreeMap> = { let mut b = BTreeMap::new(); b.insert( - "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv".to_string(), + StorePath::from_bytes(b"8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv") + .unwrap(), { let mut output_names = BTreeSet::new(); output_names.insert("out".to_string()); @@ -345,7 +366,8 @@ mod tests { }, ); b.insert( - "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(), + StorePath::from_bytes(b"p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv") + .unwrap(), { let mut output_names = BTreeSet::new(); output_names.insert("out".to_string()); @@ -400,7 +422,7 @@ mod tests { #[test_case(EXP_INPUT_DERIVATIONS_SIMPLE_ATERM.as_bytes(), &EXP_INPUT_DERIVATIONS_SIMPLE; "simple")] fn parse_input_derivations( input: &'static [u8], - expected: &BTreeMap>, + expected: &BTreeMap>, ) { let (rest, parsed) = super::parse_input_derivations(input).expect("must parse"); diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 168e11d46..fcfea2047 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -104,7 +104,7 @@ fn from_aterm_bytes(path_to_drv_file: &str) { assert_eq!( &aterm_bytes, - &parsed_drv.to_aterm_bytes(), + &BString::new(parsed_drv.to_aterm_bytes()), "expected serialized ATerm to match initial input" ); } @@ -381,10 +381,9 @@ fn output_path_construction() { ); // assemble foo input_derivations - foo_drv.input_derivations.insert( - bar_drv_path.to_absolute_path(), - BTreeSet::from(["out".to_string()]), - ); + foo_drv + .input_derivations + .insert(bar_drv_path, BTreeSet::from(["out".to_string()])); // calculate foo output paths let foo_calc_result = foo_drv.calculate_output_paths( diff --git a/tvix/nix-compat/src/derivation/validate.rs b/tvix/nix-compat/src/derivation/validate.rs index d711f5ce1..757326816 100644 --- a/tvix/nix-compat/src/derivation/validate.rs +++ b/tvix/nix-compat/src/derivation/validate.rs @@ -53,14 +53,7 @@ impl Derivation { // Validate all input_derivations for (input_derivation_path, output_names) in &self.input_derivations { // Validate input_derivation_path - if let Err(e) = StorePathRef::from_absolute_path(input_derivation_path.as_bytes()) { - return Err(DerivationError::InvalidInputDerivationPath( - input_derivation_path.to_string(), - e, - )); - } - - if !input_derivation_path.ends_with(".drv") { + if !input_derivation_path.name().ends_with(".drv") { return Err(DerivationError::InvalidInputDerivationPrefix( input_derivation_path.to_string(), )); diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index f3b16d9cf..83106cd9e 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -149,7 +149,7 @@ pub(crate) fn write_outputs( pub(crate) fn write_input_derivations( writer: &mut impl Write, - input_derivations: &BTreeMap>, + input_derivations: &BTreeMap>, ) -> Result<(), io::Error> { write_char(writer, BRACKET_OPEN)?; @@ -159,7 +159,7 @@ pub(crate) fn write_input_derivations( } write_char(writer, PAREN_OPEN)?; - writer.write_all(input_derivation_aterm)?; + input_derivation_aterm.aterm_write(writer)?; write_char(writer, COMMA)?; write_char(writer, BRACKET_OPEN)?; diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs index ecf8ec8fa..836374b80 100644 --- a/tvix/nix-compat/src/store_path/mod.rs +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -75,9 +75,26 @@ impl PartialOrd for StorePath { } } +/// `StorePath`s are sorted by their reverse digest to match the sorting order +/// of the nixbase32-encoded string. impl Ord for StorePath { fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.digest.cmp(&other.digest) + let order = self.digest.iter().rev().cmp(other.digest.iter().rev()); + + // This order must match the order of the nixbase32 encoded digests. + #[cfg(debug_assertions)] + { + let self_hash = nixbase32::encode(&self.digest); + let other_hash = nixbase32::encode(&other.digest); + let canonical_order = self_hash.cmp(&other_hash); + assert_eq!( + order, canonical_order, + "Ordering of nixbase32 differs, {:?} instead of {:?}:\n{:?}\n{:?}", + order, canonical_order, self_hash, other_hash + ); + } + + order } } @@ -339,10 +356,12 @@ impl fmt::Display for StorePathRef<'_> { #[cfg(test)] mod tests { + use std::cmp::Ordering; use std::path::PathBuf; use crate::store_path::{StorePathRef, DIGEST_SIZE}; use hex_literal::hex; + use pretty_assertions::assert_eq; use test_case::test_case; use super::{Error, StorePath}; @@ -362,6 +381,47 @@ mod tests { assert_eq!(example_nix_path_str, nixpath.to_string()) } + #[test] + fn store_path_ordering() { + let store_paths = [ + "/nix/store/0lk5dgi01r933abzfj9c9wlndg82yd3g-psutil-5.9.6.tar.gz.drv", + "/nix/store/1xj43bva89f9qmwm37zl7r3d7m67i9ck-shorttoc-1.3-tex.drv", + "/nix/store/2gb633czchi20jq1kqv70rx2yvvgins8-lifted-base-0.2.3.12.tar.gz.drv", + "/nix/store/2vksym3r3zqhp15q3fpvw2mnvffv11b9-docbook-xml-4.5.zip.drv", + "/nix/store/5q918awszjcz5720xvpc2czbg1sdqsf0-rust_renaming-0.1.0-lib", + "/nix/store/7jw30i342sr2p1fmz5xcfnch65h4zbd9-dbus-1.14.10.tar.xz.drv", + "/nix/store/96yqwqhnp3qya4rf4n0rcl0lwvrylp6k-eap8021x-222.40.1.tar.gz.drv", + "/nix/store/9gjqg36a1v0axyprbya1hkaylmnffixg-virtualenv-20.24.5.tar.gz.drv", + "/nix/store/a4i5mci2g9ada6ff7ks38g11dg6iqyb8-perl-5.32.1.drv", + "/nix/store/a5g76ljava4h5pxlggz3aqdhs3a4fk6p-ToolchainInfo.plist.drv", + "/nix/store/db46l7d6nswgz4ffp1mmd56vjf9g51v6-version.plist.drv", + "/nix/store/g6f7w20sd7vwy0rc1r4bfsw4ciclrm4q-crates-io-num_cpus-1.12.0.drv", + "/nix/store/iw82n1wwssb8g6772yddn8c3vafgv9np-bootstrap-stage1-sysctl-stdenv-darwin.drv", + "/nix/store/lp78d1y5wxpcn32d5c4r7xgbjwiw0cgf-logo.svg.drv", + "/nix/store/mf00ank13scv1f9l1zypqdpaawjhfr3s-python3.11-psutil-5.9.6.drv", + "/nix/store/mpfml61ra7pz90124jx9r3av0kvkz2w1-perl5.36.0-Encode-Locale-1.05", + "/nix/store/qhsvwx4h87skk7c4mx0xljgiy3z93i23-source.drv", + "/nix/store/riv7d73adim8hq7i04pr8kd0jnj93nav-fdk-aac-2.0.2.tar.gz.drv", + "/nix/store/s64b9031wga7vmpvgk16xwxjr0z9ln65-human-signals-5.0.0.tgz-extracted", + "/nix/store/w6svg3m2xdh6dhx0gl1nwa48g57d3hxh-thiserror-1.0.49", + ]; + + for w in store_paths.windows(2) { + if w.len() < 2 { + continue; + } + let (pa, _) = StorePath::from_absolute_path_full(w[0]).expect("parseable"); + let (pb, _) = StorePath::from_absolute_path_full(w[1]).expect("parseable"); + assert_eq!( + Ordering::Less, + pa.cmp(&pb), + "{:?} not less than {:?}", + w[0], + w[1] + ); + } + } + /// This is the store path rejected when `nix-store --add`'ing an /// empty `.gitignore` file. /// @@ -440,10 +500,10 @@ mod tests { #[test] fn serialize_ref() { - let store_path_str = - "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; - let nixpath_actual = - StorePathRef::from_absolute_path(store_path_str.as_bytes()).expect("can parse"); + let nixpath_actual = StorePathRef::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + ) + .expect("can parse"); let serialized = serde_json::to_string(&nixpath_actual).expect("can serialize"); @@ -455,11 +515,10 @@ mod tests { #[test] fn serialize_owned() { - let store_path_str = - "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; - let nixpath_actual = StorePathRef::from_absolute_path(store_path_str.as_bytes()) - .expect("can parse") - .to_owned(); + let nixpath_actual = StorePath::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + ) + .expect("can parse"); let serialized = serde_json::to_string(&nixpath_actual).expect("can serialize");