feat(tvix/glue): support builtin:fetchurl

nixpkgs calls <nix/fetchurl.nix> during nixpkgs bootstrap.

This produces a fake derivation with system = builtin
and builder = builtin:fetchurl, and needs to download files from the
internet.

At the end of the Derivation construction, if we have such a derivation,
also synthesize a `Fetch` struct, which we add to the known fetch paths.

This will then cause these fetches to be picked up like all other
fetches in TvixStoreIO.

Change-Id: I72cbca4f85da106b25eda97693a6a6e59911cd57
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10975
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
This commit is contained in:
Florian Klink 2024-02-19 22:20:09 +07:00 committed by flokli
parent b08379096f
commit 842d6816bf
5 changed files with 197 additions and 15 deletions

View file

@ -170,6 +170,7 @@ pub(crate) mod derivation_builtins {
use std::collections::BTreeMap;
use crate::builtins::utils::{select_string, strong_importing_coerce_to_string};
use crate::fetchurl::fetchurl_derivation_to_fetch;
use super::*;
use bstr::ByteSlice;
@ -506,6 +507,17 @@ pub(crate) mod derivation_builtins {
))),
)));
// If the derivation is a fake derivation (builtins:fetchurl),
// synthesize a [Fetch] and add it there, too.
if drv.builder == "builtin:fetchurl" {
let (name, fetch) =
fetchurl_derivation_to_fetch(&drv).map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
known_paths
.add_fetch(fetch, &name)
.map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
}
// Register the Derivation in known_paths.
known_paths.add_derivation(drv_path, drv);

View file

@ -6,8 +6,8 @@ use nix_compat::{
};
use sha1::Sha1;
use sha2::{digest::Output, Digest, Sha256, Sha512};
use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite};
use tokio_util::io::InspectReader;
use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
use tokio_util::io::{InspectReader, InspectWriter};
use tracing::warn;
use tvix_castore::{
blobservice::BlobService,
@ -381,7 +381,7 @@ where
});
// Ingest the NAR, get the root node.
let (root_node, actual_nar_sha256, actual_nar_size) =
let (root_node, _actual_nar_sha256, actual_nar_size) =
tvix_store::nar::ingest_nar_and_hash(
self.blob_service.clone(),
self.directory_service.clone(),
@ -416,14 +416,99 @@ where
got: actual_hash,
});
}
Ok((
root_node,
CAHash::Nar(NixHash::Sha256(actual_nar_sha256)),
// use a CAHash::Nar with the algo from the input.
CAHash::Nar(exp_hash),
actual_nar_size,
))
}
Fetch::Executable { url: _, hash: _ } => todo!(),
Fetch::Executable {
url,
hash: exp_hash,
} => {
// Construct a AsyncRead reading from the data as its downloaded.
let mut r = self.download(url.clone()).await?;
// Construct a AsyncWrite to write into the BlobService.
let mut blob_writer = self.blob_service.open_write().await;
// Copy the contents from the download reader to the blob writer.
let file_size = tokio::io::copy(&mut r, &mut blob_writer).await?;
let blob_digest = blob_writer.close().await?;
// Render the NAR representation on-the-fly into a hash function with
// the same algo as our expected hash.
// We cannot do this upfront, as we don't know the actual size.
// FUTUREWORK: make opportunistic use of Content-Length header?
let w = tokio::io::sink();
// Construct the hash function.
let mut hasher: Box<dyn DynDigest + Send> = match exp_hash.algo() {
HashAlgo::Md5 => Box::new(Md5::new()),
HashAlgo::Sha1 => Box::new(Sha1::new()),
HashAlgo::Sha256 => Box::new(Sha256::new()),
HashAlgo::Sha512 => Box::new(Sha512::new()),
};
let mut nar_size: u64 = 0;
let mut w = InspectWriter::new(w, |d| {
hasher.update(d);
nar_size += d.len() as u64;
});
{
let node = nix_compat::nar::writer::r#async::open(&mut w).await?;
let blob_reader = self
.blob_service
.open_read(&blob_digest)
.await?
.expect("Tvix bug: just-uploaded blob not found");
node.file(true, file_size, &mut BufReader::new(blob_reader))
.await?;
w.flush().await?;
}
// finalize the hasher.
let actual_hash = {
match exp_hash.algo() {
HashAlgo::Md5 => {
NixHash::Md5(hasher.finalize().to_vec().try_into().unwrap())
}
HashAlgo::Sha1 => {
NixHash::Sha1(hasher.finalize().to_vec().try_into().unwrap())
}
HashAlgo::Sha256 => {
NixHash::Sha256(hasher.finalize().to_vec().try_into().unwrap())
}
HashAlgo::Sha512 => {
NixHash::Sha512(hasher.finalize().to_vec().try_into().unwrap())
}
}
};
if exp_hash != actual_hash {
return Err(FetcherError::HashMismatch {
url,
wanted: exp_hash,
got: actual_hash,
});
}
// Construct and return the FileNode describing the downloaded contents,
// make it executable.
let root_node = Node::File(FileNode {
name: vec![].into(),
digest: blob_digest.into(),
size: file_size,
executable: true,
});
Ok((root_node, CAHash::Nar(actual_hash), file_size))
}
Fetch::Git() => todo!(),
}
}
@ -441,7 +526,7 @@ where
// Fetch file, return the (unnamed) (File)Node of its contents, ca hash and filesize.
let (node, ca_hash, size) = self.ingest(fetch).await?;
// Calculate the store path to return later, which is done with the ca_hash.
// Calculate the store path to return, by calculating from ca_hash.
let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?;
// Rename the node name to match the Store Path.
@ -450,14 +535,15 @@ where
// If the resulting hash is not a CAHash::Nar, we also need to invoke
// `calculate_nar` to calculate this representation, as it's required in
// the [PathInfo].
// FUTUREWORK: allow ingest() to return multiple hashes, or have it feed
// nar_calculation_service too?
let (nar_size, nar_sha256) = match &ca_hash {
CAHash::Flat(_nix_hash) => self
CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256),
CAHash::Nar(_) | CAHash::Flat(_) => self
.nar_calculation_service
.calculate_nar(&node)
.await
.map_err(|e| FetcherError::Io(e.into()))?,
CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256),
CAHash::Nar(_) => unreachable!("Tvix bug: fetch returned non-sha256 CAHash::Nar"),
CAHash::Text(_) => unreachable!("Tvix bug: fetch returned CAHash::Text"),
};

82
tvix/glue/src/fetchurl.rs Normal file
View file

@ -0,0 +1,82 @@
//! This contains the code translating from a `builtin:derivation` [Derivation]
//! to a [Fetch].
use crate::fetchers::Fetch;
use nix_compat::{derivation::Derivation, nixhash::CAHash};
use tracing::instrument;
use url::Url;
/// Takes a derivation produced by a call to `builtin:fetchurl` and returns the
/// synthesized [Fetch] for it, as well as the name.
#[instrument]
pub(crate) fn fetchurl_derivation_to_fetch(drv: &Derivation) -> Result<(String, Fetch), Error> {
if drv.builder != "builtin:fetchurl" {
return Err(Error::BuilderInvalid);
}
if !drv.arguments.is_empty() {
return Err(Error::ArgumentsInvalud);
}
if drv.system != "builtin" {
return Err(Error::SystemInvalid);
}
// ensure this is a fixed-output derivation
if drv.outputs.len() != 1 {
return Err(Error::NoFOD);
}
let out_output = &drv.outputs.get("out").ok_or(Error::NoFOD)?;
let ca_hash = out_output.ca_hash.clone().ok_or(Error::NoFOD)?;
let name: String = drv
.environment
.get("name")
.ok_or(Error::NameMissing)?
.to_owned()
.try_into()
.map_err(|_| Error::NameInvalid)?;
let url: Url = std::str::from_utf8(drv.environment.get("url").ok_or(Error::URLMissing)?)
.map_err(|_| Error::URLInvalid)?
.parse()
.map_err(|_| Error::URLInvalid)?;
match ca_hash {
CAHash::Flat(hash) => {
return Ok((
name,
Fetch::URL {
url,
exp_hash: Some(hash),
},
))
}
CAHash::Nar(hash) => {
if drv.environment.get("executable").map(|v| v.as_slice()) == Some(b"1") {
Ok((name, Fetch::Executable { url, hash }))
} else {
Ok((name, Fetch::NAR { url, hash }))
}
}
// you can't construct derivations containing this
CAHash::Text(_) => panic!("Tvix bug: got CaHash::Text in drv"),
}
}
#[derive(Debug, thiserror::Error)]
pub(crate) enum Error {
#[error("Invalid builder")]
BuilderInvalid,
#[error("invalid arguments")]
ArgumentsInvalud,
#[error("Invalid system")]
SystemInvalid,
#[error("Derivation is not fixed-output")]
NoFOD,
#[error("Missing URL")]
URLMissing,
#[error("Invalid URL")]
URLInvalid,
#[error("Missing Name")]
NameMissing,
#[error("Name invalid")]
NameInvalid,
}

View file

@ -6,6 +6,8 @@ pub mod tvix_build;
pub mod tvix_io;
pub mod tvix_store_io;
mod fetchurl;
#[cfg(test)]
mod tests;

View file

@ -1,5 +1,4 @@
//! This module provides an implementation of EvalIO talking to tvix-store.
use bytes::Bytes;
use futures::{StreamExt, TryStreamExt};
use nix_compat::nixhash::NixHash;
@ -138,7 +137,8 @@ impl TvixStoreIO {
// The store path doesn't exist yet, so we need to fetch or build it.
// We check for fetches first, as we might have both native
// fetchers and FODs in KnownPaths, and prefer the former.
// This will also find [Fetch] synthesized from
// `builtin:fetchurl` Derivations.
let maybe_fetch = self
.known_paths
.borrow()
@ -156,9 +156,9 @@ impl TvixStoreIO {
})?;
debug_assert_eq!(
sp.to_string(),
store_path.to_string(),
"store path returned from fetcher should match"
sp.to_absolute_path(),
store_path.as_ref().to_absolute_path(),
"store path returned from fetcher must match store path we have in fetchers"
);
root_node