feat(tvix/glue): support builtin:fetchurl
nixpkgs calls <nix/fetchurl.nix> during nixpkgs bootstrap. This produces a fake derivation with system = builtin and builder = builtin:fetchurl, and needs to download files from the internet. At the end of the Derivation construction, if we have such a derivation, also synthesize a `Fetch` struct, which we add to the known fetch paths. This will then cause these fetches to be picked up like all other fetches in TvixStoreIO. Change-Id: I72cbca4f85da106b25eda97693a6a6e59911cd57 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10975 Reviewed-by: Connor Brewster <cbrewster@hey.com> Tested-by: BuildkiteCI
This commit is contained in:
parent
b08379096f
commit
842d6816bf
5 changed files with 197 additions and 15 deletions
|
@ -170,6 +170,7 @@ pub(crate) mod derivation_builtins {
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
use crate::builtins::utils::{select_string, strong_importing_coerce_to_string};
|
use crate::builtins::utils::{select_string, strong_importing_coerce_to_string};
|
||||||
|
use crate::fetchurl::fetchurl_derivation_to_fetch;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use bstr::ByteSlice;
|
use bstr::ByteSlice;
|
||||||
|
@ -506,6 +507,17 @@ pub(crate) mod derivation_builtins {
|
||||||
))),
|
))),
|
||||||
)));
|
)));
|
||||||
|
|
||||||
|
// If the derivation is a fake derivation (builtins:fetchurl),
|
||||||
|
// synthesize a [Fetch] and add it there, too.
|
||||||
|
if drv.builder == "builtin:fetchurl" {
|
||||||
|
let (name, fetch) =
|
||||||
|
fetchurl_derivation_to_fetch(&drv).map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
|
||||||
|
|
||||||
|
known_paths
|
||||||
|
.add_fetch(fetch, &name)
|
||||||
|
.map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
|
||||||
|
}
|
||||||
|
|
||||||
// Register the Derivation in known_paths.
|
// Register the Derivation in known_paths.
|
||||||
known_paths.add_derivation(drv_path, drv);
|
known_paths.add_derivation(drv_path, drv);
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,8 @@ use nix_compat::{
|
||||||
};
|
};
|
||||||
use sha1::Sha1;
|
use sha1::Sha1;
|
||||||
use sha2::{digest::Output, Digest, Sha256, Sha512};
|
use sha2::{digest::Output, Digest, Sha256, Sha512};
|
||||||
use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite};
|
use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
|
||||||
use tokio_util::io::InspectReader;
|
use tokio_util::io::{InspectReader, InspectWriter};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
use tvix_castore::{
|
use tvix_castore::{
|
||||||
blobservice::BlobService,
|
blobservice::BlobService,
|
||||||
|
@ -381,7 +381,7 @@ where
|
||||||
});
|
});
|
||||||
|
|
||||||
// Ingest the NAR, get the root node.
|
// Ingest the NAR, get the root node.
|
||||||
let (root_node, actual_nar_sha256, actual_nar_size) =
|
let (root_node, _actual_nar_sha256, actual_nar_size) =
|
||||||
tvix_store::nar::ingest_nar_and_hash(
|
tvix_store::nar::ingest_nar_and_hash(
|
||||||
self.blob_service.clone(),
|
self.blob_service.clone(),
|
||||||
self.directory_service.clone(),
|
self.directory_service.clone(),
|
||||||
|
@ -416,14 +416,99 @@ where
|
||||||
got: actual_hash,
|
got: actual_hash,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
root_node,
|
root_node,
|
||||||
CAHash::Nar(NixHash::Sha256(actual_nar_sha256)),
|
// use a CAHash::Nar with the algo from the input.
|
||||||
|
CAHash::Nar(exp_hash),
|
||||||
actual_nar_size,
|
actual_nar_size,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
Fetch::Executable { url: _, hash: _ } => todo!(),
|
Fetch::Executable {
|
||||||
|
url,
|
||||||
|
hash: exp_hash,
|
||||||
|
} => {
|
||||||
|
// Construct a AsyncRead reading from the data as its downloaded.
|
||||||
|
let mut r = self.download(url.clone()).await?;
|
||||||
|
|
||||||
|
// Construct a AsyncWrite to write into the BlobService.
|
||||||
|
let mut blob_writer = self.blob_service.open_write().await;
|
||||||
|
|
||||||
|
// Copy the contents from the download reader to the blob writer.
|
||||||
|
let file_size = tokio::io::copy(&mut r, &mut blob_writer).await?;
|
||||||
|
let blob_digest = blob_writer.close().await?;
|
||||||
|
|
||||||
|
// Render the NAR representation on-the-fly into a hash function with
|
||||||
|
// the same algo as our expected hash.
|
||||||
|
// We cannot do this upfront, as we don't know the actual size.
|
||||||
|
// FUTUREWORK: make opportunistic use of Content-Length header?
|
||||||
|
|
||||||
|
let w = tokio::io::sink();
|
||||||
|
// Construct the hash function.
|
||||||
|
let mut hasher: Box<dyn DynDigest + Send> = match exp_hash.algo() {
|
||||||
|
HashAlgo::Md5 => Box::new(Md5::new()),
|
||||||
|
HashAlgo::Sha1 => Box::new(Sha1::new()),
|
||||||
|
HashAlgo::Sha256 => Box::new(Sha256::new()),
|
||||||
|
HashAlgo::Sha512 => Box::new(Sha512::new()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut nar_size: u64 = 0;
|
||||||
|
let mut w = InspectWriter::new(w, |d| {
|
||||||
|
hasher.update(d);
|
||||||
|
nar_size += d.len() as u64;
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
let node = nix_compat::nar::writer::r#async::open(&mut w).await?;
|
||||||
|
|
||||||
|
let blob_reader = self
|
||||||
|
.blob_service
|
||||||
|
.open_read(&blob_digest)
|
||||||
|
.await?
|
||||||
|
.expect("Tvix bug: just-uploaded blob not found");
|
||||||
|
|
||||||
|
node.file(true, file_size, &mut BufReader::new(blob_reader))
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
w.flush().await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// finalize the hasher.
|
||||||
|
let actual_hash = {
|
||||||
|
match exp_hash.algo() {
|
||||||
|
HashAlgo::Md5 => {
|
||||||
|
NixHash::Md5(hasher.finalize().to_vec().try_into().unwrap())
|
||||||
|
}
|
||||||
|
HashAlgo::Sha1 => {
|
||||||
|
NixHash::Sha1(hasher.finalize().to_vec().try_into().unwrap())
|
||||||
|
}
|
||||||
|
HashAlgo::Sha256 => {
|
||||||
|
NixHash::Sha256(hasher.finalize().to_vec().try_into().unwrap())
|
||||||
|
}
|
||||||
|
HashAlgo::Sha512 => {
|
||||||
|
NixHash::Sha512(hasher.finalize().to_vec().try_into().unwrap())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if exp_hash != actual_hash {
|
||||||
|
return Err(FetcherError::HashMismatch {
|
||||||
|
url,
|
||||||
|
wanted: exp_hash,
|
||||||
|
got: actual_hash,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct and return the FileNode describing the downloaded contents,
|
||||||
|
// make it executable.
|
||||||
|
let root_node = Node::File(FileNode {
|
||||||
|
name: vec![].into(),
|
||||||
|
digest: blob_digest.into(),
|
||||||
|
size: file_size,
|
||||||
|
executable: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
Ok((root_node, CAHash::Nar(actual_hash), file_size))
|
||||||
|
}
|
||||||
Fetch::Git() => todo!(),
|
Fetch::Git() => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -441,7 +526,7 @@ where
|
||||||
// Fetch file, return the (unnamed) (File)Node of its contents, ca hash and filesize.
|
// Fetch file, return the (unnamed) (File)Node of its contents, ca hash and filesize.
|
||||||
let (node, ca_hash, size) = self.ingest(fetch).await?;
|
let (node, ca_hash, size) = self.ingest(fetch).await?;
|
||||||
|
|
||||||
// Calculate the store path to return later, which is done with the ca_hash.
|
// Calculate the store path to return, by calculating from ca_hash.
|
||||||
let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?;
|
let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?;
|
||||||
|
|
||||||
// Rename the node name to match the Store Path.
|
// Rename the node name to match the Store Path.
|
||||||
|
@ -450,14 +535,15 @@ where
|
||||||
// If the resulting hash is not a CAHash::Nar, we also need to invoke
|
// If the resulting hash is not a CAHash::Nar, we also need to invoke
|
||||||
// `calculate_nar` to calculate this representation, as it's required in
|
// `calculate_nar` to calculate this representation, as it's required in
|
||||||
// the [PathInfo].
|
// the [PathInfo].
|
||||||
|
// FUTUREWORK: allow ingest() to return multiple hashes, or have it feed
|
||||||
|
// nar_calculation_service too?
|
||||||
let (nar_size, nar_sha256) = match &ca_hash {
|
let (nar_size, nar_sha256) = match &ca_hash {
|
||||||
CAHash::Flat(_nix_hash) => self
|
CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256),
|
||||||
|
CAHash::Nar(_) | CAHash::Flat(_) => self
|
||||||
.nar_calculation_service
|
.nar_calculation_service
|
||||||
.calculate_nar(&node)
|
.calculate_nar(&node)
|
||||||
.await
|
.await
|
||||||
.map_err(|e| FetcherError::Io(e.into()))?,
|
.map_err(|e| FetcherError::Io(e.into()))?,
|
||||||
CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256),
|
|
||||||
CAHash::Nar(_) => unreachable!("Tvix bug: fetch returned non-sha256 CAHash::Nar"),
|
|
||||||
CAHash::Text(_) => unreachable!("Tvix bug: fetch returned CAHash::Text"),
|
CAHash::Text(_) => unreachable!("Tvix bug: fetch returned CAHash::Text"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
82
tvix/glue/src/fetchurl.rs
Normal file
82
tvix/glue/src/fetchurl.rs
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
//! This contains the code translating from a `builtin:derivation` [Derivation]
|
||||||
|
//! to a [Fetch].
|
||||||
|
use crate::fetchers::Fetch;
|
||||||
|
use nix_compat::{derivation::Derivation, nixhash::CAHash};
|
||||||
|
use tracing::instrument;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
/// Takes a derivation produced by a call to `builtin:fetchurl` and returns the
|
||||||
|
/// synthesized [Fetch] for it, as well as the name.
|
||||||
|
#[instrument]
|
||||||
|
pub(crate) fn fetchurl_derivation_to_fetch(drv: &Derivation) -> Result<(String, Fetch), Error> {
|
||||||
|
if drv.builder != "builtin:fetchurl" {
|
||||||
|
return Err(Error::BuilderInvalid);
|
||||||
|
}
|
||||||
|
if !drv.arguments.is_empty() {
|
||||||
|
return Err(Error::ArgumentsInvalud);
|
||||||
|
}
|
||||||
|
if drv.system != "builtin" {
|
||||||
|
return Err(Error::SystemInvalid);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure this is a fixed-output derivation
|
||||||
|
if drv.outputs.len() != 1 {
|
||||||
|
return Err(Error::NoFOD);
|
||||||
|
}
|
||||||
|
let out_output = &drv.outputs.get("out").ok_or(Error::NoFOD)?;
|
||||||
|
let ca_hash = out_output.ca_hash.clone().ok_or(Error::NoFOD)?;
|
||||||
|
|
||||||
|
let name: String = drv
|
||||||
|
.environment
|
||||||
|
.get("name")
|
||||||
|
.ok_or(Error::NameMissing)?
|
||||||
|
.to_owned()
|
||||||
|
.try_into()
|
||||||
|
.map_err(|_| Error::NameInvalid)?;
|
||||||
|
|
||||||
|
let url: Url = std::str::from_utf8(drv.environment.get("url").ok_or(Error::URLMissing)?)
|
||||||
|
.map_err(|_| Error::URLInvalid)?
|
||||||
|
.parse()
|
||||||
|
.map_err(|_| Error::URLInvalid)?;
|
||||||
|
|
||||||
|
match ca_hash {
|
||||||
|
CAHash::Flat(hash) => {
|
||||||
|
return Ok((
|
||||||
|
name,
|
||||||
|
Fetch::URL {
|
||||||
|
url,
|
||||||
|
exp_hash: Some(hash),
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
CAHash::Nar(hash) => {
|
||||||
|
if drv.environment.get("executable").map(|v| v.as_slice()) == Some(b"1") {
|
||||||
|
Ok((name, Fetch::Executable { url, hash }))
|
||||||
|
} else {
|
||||||
|
Ok((name, Fetch::NAR { url, hash }))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// you can't construct derivations containing this
|
||||||
|
CAHash::Text(_) => panic!("Tvix bug: got CaHash::Text in drv"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub(crate) enum Error {
|
||||||
|
#[error("Invalid builder")]
|
||||||
|
BuilderInvalid,
|
||||||
|
#[error("invalid arguments")]
|
||||||
|
ArgumentsInvalud,
|
||||||
|
#[error("Invalid system")]
|
||||||
|
SystemInvalid,
|
||||||
|
#[error("Derivation is not fixed-output")]
|
||||||
|
NoFOD,
|
||||||
|
#[error("Missing URL")]
|
||||||
|
URLMissing,
|
||||||
|
#[error("Invalid URL")]
|
||||||
|
URLInvalid,
|
||||||
|
#[error("Missing Name")]
|
||||||
|
NameMissing,
|
||||||
|
#[error("Name invalid")]
|
||||||
|
NameInvalid,
|
||||||
|
}
|
|
@ -6,6 +6,8 @@ pub mod tvix_build;
|
||||||
pub mod tvix_io;
|
pub mod tvix_io;
|
||||||
pub mod tvix_store_io;
|
pub mod tvix_store_io;
|
||||||
|
|
||||||
|
mod fetchurl;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
//! This module provides an implementation of EvalIO talking to tvix-store.
|
//! This module provides an implementation of EvalIO talking to tvix-store.
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
use futures::{StreamExt, TryStreamExt};
|
use futures::{StreamExt, TryStreamExt};
|
||||||
use nix_compat::nixhash::NixHash;
|
use nix_compat::nixhash::NixHash;
|
||||||
|
@ -138,7 +137,8 @@ impl TvixStoreIO {
|
||||||
// The store path doesn't exist yet, so we need to fetch or build it.
|
// The store path doesn't exist yet, so we need to fetch or build it.
|
||||||
// We check for fetches first, as we might have both native
|
// We check for fetches first, as we might have both native
|
||||||
// fetchers and FODs in KnownPaths, and prefer the former.
|
// fetchers and FODs in KnownPaths, and prefer the former.
|
||||||
|
// This will also find [Fetch] synthesized from
|
||||||
|
// `builtin:fetchurl` Derivations.
|
||||||
let maybe_fetch = self
|
let maybe_fetch = self
|
||||||
.known_paths
|
.known_paths
|
||||||
.borrow()
|
.borrow()
|
||||||
|
@ -156,9 +156,9 @@ impl TvixStoreIO {
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
debug_assert_eq!(
|
debug_assert_eq!(
|
||||||
sp.to_string(),
|
sp.to_absolute_path(),
|
||||||
store_path.to_string(),
|
store_path.as_ref().to_absolute_path(),
|
||||||
"store path returned from fetcher should match"
|
"store path returned from fetcher must match store path we have in fetchers"
|
||||||
);
|
);
|
||||||
|
|
||||||
root_node
|
root_node
|
||||||
|
|
Loading…
Add table
Reference in a new issue