fix(tvix/cli): keep tracking full paths in known_paths

We need to distinguish explicitly between the paths used for the
scanner, and the paths that populate the derivation inputs. The full
paths must be accessible from the result of the refscanner to populate
drv fields correctly.

This was previously hidden by debug changes that masked actual IO
operations with no-ops.

Change-Id: I037af6e6bbe2b573034d695f8779bee1b56bc125
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8022
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2023-02-03 00:20:02 +03:00 committed by tazjin
parent e6235e2932
commit 38e8c2e959
3 changed files with 84 additions and 42 deletions

View file

@ -8,7 +8,7 @@ use tvix_eval::builtin_macros::builtins;
use tvix_eval::{AddContext, CoercionKind, ErrorKind, NixAttrs, NixList, Value, VM};
use crate::errors::Error;
use crate::known_paths::{KnownPaths, PathType};
use crate::known_paths::{KnownPaths, PathKind, PathName};
// Constants used for strangely named fields in derivation inputs.
const STRUCTURED_ATTRS: &str = "__structuredAttrs";
@ -41,18 +41,19 @@ fn populate_outputs(vm: &mut VM, drv: &mut Derivation, outputs: NixList) -> Resu
/// Populate the inputs of a derivation from the build references
/// found when scanning the derivation's parameters.
fn populate_inputs<I: IntoIterator<Item = String>>(
fn populate_inputs<I: IntoIterator<Item = PathName>>(
drv: &mut Derivation,
known_paths: &KnownPaths,
references: I,
) {
for reference in references.into_iter() {
match &known_paths[&reference] {
PathType::Plain => {
drv.input_sources.insert(reference.to_string());
let reference = &known_paths[&reference];
match &reference.kind {
PathKind::Plain => {
drv.input_sources.insert(reference.path.clone());
}
PathType::Output { name, derivation } => {
PathKind::Output { name, derivation } => {
match drv.input_derivations.entry(derivation.clone()) {
btree_map::Entry::Vacant(entry) => {
entry.insert(BTreeSet::from([name.clone()]));
@ -64,8 +65,8 @@ fn populate_inputs<I: IntoIterator<Item = String>>(
}
}
PathType::Derivation { output_names } => {
match drv.input_derivations.entry(reference.to_string()) {
PathKind::Derivation { output_names } => {
match drv.input_derivations.entry(reference.path.clone()) {
btree_map::Entry::Vacant(entry) => {
entry.insert(output_names.clone());
}
@ -389,7 +390,14 @@ mod derivation_builtins {
let mut refscan = state.borrow().reference_scanner();
refscan.scan_str(content.as_str());
let refs = refscan.finalise();
let refs = {
let paths = state.borrow();
refscan
.finalise()
.into_iter()
.map(|path| paths[&path].path.to_string())
.collect::<Vec<_>>()
};
// TODO: fail on derivation references (only "plain" is allowed here)
@ -491,7 +499,7 @@ mod tests {
"/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv",
);
let inputs: Vec<String> = vec![
let inputs = vec![
"/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo".into(),
"/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv".into(),
"/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar".into(),

View file

@ -18,7 +18,7 @@ use std::{
};
#[derive(Debug, PartialEq)]
pub enum PathType {
pub enum PathKind {
/// A literal derivation (`.drv`-file), and the *names* of its outputs.
Derivation { output_names: BTreeSet<String> },
@ -29,10 +29,43 @@ pub enum PathType {
Plain,
}
#[derive(Debug, PartialEq)]
pub struct KnownPath {
pub path: String,
pub kind: PathKind,
}
impl KnownPath {
fn new(path: String, kind: PathKind) -> Self {
KnownPath { path, kind }
}
}
/// Internal struct to prevent accidental leaks of the truncated path
/// names.
#[repr(transparent)]
#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)]
pub struct PathName(String);
impl From<&str> for PathName {
fn from(s: &str) -> Self {
PathName(s[..STORE_PATH_LEN].to_string())
}
}
/// This instance is required to pass PathName instances as needles to
/// the reference scanner.
impl AsRef<[u8]> for PathName {
fn as_ref(&self) -> &[u8] {
self.0.as_ref()
}
}
#[derive(Debug, Default)]
pub struct KnownPaths {
/// All known paths, and their associated [`PathType`].
paths: HashMap<String, PathType>,
/// All known paths, keyed by a truncated version of their store
/// path used for reference scanning.
paths: HashMap<PathName, KnownPath>,
/// All known replacement strings for derivations.
///
@ -41,39 +74,40 @@ pub struct KnownPaths {
replacements: HashMap<String, String>,
}
impl Index<&str> for KnownPaths {
type Output = PathType;
impl Index<&PathName> for KnownPaths {
type Output = KnownPath;
fn index(&self, index: &str) -> &Self::Output {
&self.paths[&index[..STORE_PATH_LEN]]
fn index(&self, index: &PathName) -> &Self::Output {
&self.paths[index]
}
}
impl KnownPaths {
fn insert_path(&mut self, path: String, path_type: PathType) {
let path = path[..STORE_PATH_LEN].to_owned();
assert_eq!(path.len(), STORE_PATH_LEN, "should match");
match self.paths.entry(path) {
fn insert_path(&mut self, path: String, path_kind: PathKind) {
match self.paths.entry(path.as_str().into()) {
hash_map::Entry::Vacant(entry) => {
entry.insert(path_type);
entry.insert(KnownPath::new(path, path_kind));
}
hash_map::Entry::Occupied(mut entry) => {
match (path_type, entry.get_mut()) {
match (path_kind, &mut entry.get_mut().kind) {
// These variant combinations require no "merging action".
(PathType::Plain, PathType::Plain) => (),
(PathType::Output { .. }, PathType::Output { .. }) => (),
(PathKind::Plain, PathKind::Plain) => (),
(PathKind::Output { .. }, PathKind::Output { .. }) => (),
(
PathType::Derivation { output_names: new },
PathType::Derivation {
PathKind::Derivation { output_names: new },
PathKind::Derivation {
output_names: ref mut old,
},
) => {
old.extend(new);
}
_ => panic!("path '{}' inserted twice with different types", entry.key()),
_ => panic!(
"path '{}' inserted twice with different types",
entry.key().0
),
};
}
};
@ -81,14 +115,14 @@ impl KnownPaths {
/// Mark a plain path as known.
pub fn plain<S: ToString>(&mut self, path: S) {
self.insert_path(path.to_string(), PathType::Plain);
self.insert_path(path.to_string(), PathKind::Plain);
}
/// Mark a derivation as known.
pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) {
self.insert_path(
path.to_string(),
PathType::Derivation {
PathKind::Derivation {
output_names: outputs.into_iter().map(ToString::to_string).collect(),
},
);
@ -103,7 +137,7 @@ impl KnownPaths {
) {
self.insert_path(
output_path.to_string(),
PathType::Output {
PathKind::Output {
name: name.to_string(),
derivation: drv_path.to_string(),
},
@ -117,7 +151,7 @@ impl KnownPaths {
}
/// Create a reference scanner from the current set of known paths.
pub fn reference_scanner(&self) -> ReferenceScanner {
pub fn reference_scanner(&self) -> ReferenceScanner<PathName> {
let candidates = self.paths.keys().map(Clone::clone).collect();
ReferenceScanner::new(candidates)
}

View file

@ -14,16 +14,16 @@ pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".
/// Represents a "primed" reference scanner with an automaton that knows the set
/// of store paths to scan for.
pub struct ReferenceScanner {
candidates: Vec<String>,
pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> {
candidates: Vec<P>,
searcher: TwoByteWM,
matches: Vec<usize>,
}
impl ReferenceScanner {
impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> {
/// Construct a new `ReferenceScanner` that knows how to scan for the given
/// candidate store paths.
pub fn new(candidates: Vec<String>) -> Self {
pub fn new(candidates: Vec<P>) -> Self {
let searcher = TwoByteWM::new(&candidates);
ReferenceScanner {
@ -46,7 +46,7 @@ impl ReferenceScanner {
}
/// Finalise the reference scanner and return the resulting matches.
pub fn finalise(self) -> BTreeSet<String> {
pub fn finalise(self) -> BTreeSet<P> {
self.matches
.into_iter()
.map(|idx| self.candidates[idx].clone())
@ -64,7 +64,7 @@ mod tests {
#[test]
fn test_single_match() {
let mut scanner = ReferenceScanner::new(vec![
"/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".into(),
"/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(),
]);
scanner.scan_str(HELLO_DRV);
@ -78,11 +78,11 @@ mod tests {
fn test_multiple_matches() {
let candidates = vec![
// these exist in the drv:
"/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".into(),
"/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".into(),
"/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".into(),
"/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(),
"/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(),
"/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(),
// this doesn't:
"/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".into(),
"/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(),
];
let mut scanner = ReferenceScanner::new(candidates.clone());