feat(tvix/glue/fetchers): support file:// URLs
Nix supports file:// - URLs for `fetchurl` and `fetchTarball`. Convert the enums and function arguments to hold a URL type. reqwest::Url is a re-export of the url crate, but they don't re-export the parsing errors, and as we want to hold these in our Error types, add it to Cargo.toml explicitly. The Fetcher::download function now checks on the scheme, and either opens the file locally, or does do a HTTP request as before. Fetch gets its custom debug impl, removing potentially sensitive username and password out of URLs. Change-Id: I777db1fe487370e822cbfec4624034aca5e08045 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11504 Autosubmit: flokli <flokli@flokli.de> Reviewed-by: raitobezarius <tvl@lahfa.xyz> Tested-by: BuildkiteCI
This commit is contained in:
parent
dfef3d18d1
commit
8181817e53
9 changed files with 124 additions and 37 deletions
1
tvix/Cargo.lock
generated
1
tvix/Cargo.lock
generated
|
@ -4415,6 +4415,7 @@ dependencies = [
|
|||
"tvix-castore",
|
||||
"tvix-eval",
|
||||
"tvix-store",
|
||||
"url",
|
||||
"walkdir",
|
||||
"wu-manber",
|
||||
]
|
||||
|
|
|
@ -14174,6 +14174,10 @@ rec {
|
|||
packageId = "tvix-store";
|
||||
usesDefaultFeatures = false;
|
||||
}
|
||||
{
|
||||
name = "url";
|
||||
packageId = "url";
|
||||
}
|
||||
{
|
||||
name = "walkdir";
|
||||
packageId = "walkdir";
|
||||
|
|
|
@ -27,6 +27,7 @@ serde_json = "1.0"
|
|||
sha2 = "0.10.8"
|
||||
sha1 = "0.10.6"
|
||||
md-5 = "0.10.6"
|
||||
url = "2.4.0"
|
||||
walkdir = "2.4.0"
|
||||
|
||||
[dependencies.async-compression]
|
||||
|
|
|
@ -3,6 +3,7 @@ use nix_compat::{
|
|||
nixhash::{self, NixHash},
|
||||
store_path::BuildStorePathError,
|
||||
};
|
||||
use reqwest::Url;
|
||||
use std::rc::Rc;
|
||||
use thiserror::Error;
|
||||
|
||||
|
@ -33,7 +34,7 @@ impl From<DerivationError> for tvix_eval::ErrorKind {
|
|||
pub enum FetcherError {
|
||||
#[error("hash mismatch in file downloaded from {url}:\n wanted: {wanted}\n got: {got}")]
|
||||
HashMismatch {
|
||||
url: String,
|
||||
url: Url,
|
||||
wanted: NixHash,
|
||||
got: NixHash,
|
||||
},
|
||||
|
@ -41,6 +42,9 @@ pub enum FetcherError {
|
|||
#[error("Invalid hash type '{0}' for fetcher")]
|
||||
InvalidHashType(&'static str),
|
||||
|
||||
#[error("Unable to parse URL: {0}")]
|
||||
InvalidUrl(#[from] url::ParseError),
|
||||
|
||||
#[error(transparent)]
|
||||
Http(#[from] reqwest::Error),
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
//! Contains builtins that fetch paths from the Internet
|
||||
//! Contains builtins that fetch paths from the Internet, or local filesystem.
|
||||
|
||||
use super::utils::select_string;
|
||||
use crate::{
|
||||
|
@ -15,7 +15,7 @@ use tvix_eval::generators::GenCo;
|
|||
use tvix_eval::{CatchableErrorKind, ErrorKind, Value};
|
||||
|
||||
struct NixFetchArgs {
|
||||
url: String,
|
||||
url_str: String,
|
||||
name: Option<String>,
|
||||
sha256: Option<[u8; 32]>,
|
||||
}
|
||||
|
@ -26,11 +26,12 @@ async fn extract_fetch_args(
|
|||
co: &GenCo,
|
||||
args: Value,
|
||||
) -> Result<Result<NixFetchArgs, CatchableErrorKind>, ErrorKind> {
|
||||
if let Ok(url) = args.to_str() {
|
||||
if let Ok(url_str) = args.to_str() {
|
||||
// Get the raw bytes, not the ToString repr.
|
||||
let url = String::from_utf8(url.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?;
|
||||
let url_str =
|
||||
String::from_utf8(url_str.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?;
|
||||
return Ok(Ok(NixFetchArgs {
|
||||
url,
|
||||
url_str,
|
||||
name: None,
|
||||
sha256: None,
|
||||
}));
|
||||
|
@ -41,7 +42,7 @@ async fn extract_fetch_args(
|
|||
actual: args.type_of(),
|
||||
})?;
|
||||
|
||||
let url = match select_string(co, &attrs, "url").await? {
|
||||
let url_str = match select_string(co, &attrs, "url").await? {
|
||||
Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?,
|
||||
Err(cek) => return Ok(Err(cek)),
|
||||
};
|
||||
|
@ -68,12 +69,19 @@ async fn extract_fetch_args(
|
|||
None => None,
|
||||
};
|
||||
|
||||
Ok(Ok(NixFetchArgs { url, name, sha256 }))
|
||||
Ok(Ok(NixFetchArgs {
|
||||
url_str,
|
||||
name,
|
||||
sha256,
|
||||
}))
|
||||
}
|
||||
|
||||
#[allow(unused_variables)] // for the `state` arg, for now
|
||||
#[builtins(state = "Rc<TvixStoreIO>")]
|
||||
pub(crate) mod fetcher_builtins {
|
||||
use crate::builtins::FetcherError;
|
||||
use url::Url;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Consumes a fetch.
|
||||
|
@ -130,12 +138,16 @@ pub(crate) mod fetcher_builtins {
|
|||
// Derive the name from the URL basename if not set explicitly.
|
||||
let name = args
|
||||
.name
|
||||
.unwrap_or_else(|| url_basename(&args.url).to_owned());
|
||||
.unwrap_or_else(|| url_basename(&args.url_str).to_owned());
|
||||
|
||||
// Parse the URL.
|
||||
let url = Url::parse(&args.url_str)
|
||||
.map_err(|e| ErrorKind::TvixError(Rc::new(FetcherError::InvalidUrl(e))))?;
|
||||
|
||||
fetch_lazy(
|
||||
state,
|
||||
name,
|
||||
Fetch::URL(args.url, args.sha256.map(NixHash::Sha256)),
|
||||
Fetch::URL(url, args.sha256.map(NixHash::Sha256)),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -156,7 +168,11 @@ pub(crate) mod fetcher_builtins {
|
|||
.name
|
||||
.unwrap_or_else(|| DEFAULT_NAME_FETCH_TARBALL.to_owned());
|
||||
|
||||
fetch_lazy(state, name, Fetch::Tarball(args.url, args.sha256))
|
||||
// Parse the URL.
|
||||
let url = Url::parse(&args.url_str)
|
||||
.map_err(|e| ErrorKind::TvixError(Rc::new(FetcherError::InvalidUrl(e))))?;
|
||||
|
||||
fetch_lazy(state, name, Fetch::Tarball(url, args.sha256))
|
||||
}
|
||||
|
||||
#[builtin("fetchGit")]
|
||||
|
|
|
@ -15,16 +15,17 @@ use tvix_castore::{
|
|||
proto::{node::Node, FileNode},
|
||||
};
|
||||
use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo};
|
||||
use url::Url;
|
||||
|
||||
use crate::{builtins::FetcherError, decompression::DecompressedReader};
|
||||
|
||||
/// Representing options for doing a fetch.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
pub enum Fetch {
|
||||
/// Fetch a literal file from the given URL, with an optional expected
|
||||
/// NixHash of it.
|
||||
/// TODO: check if this is *always* sha256, and if so, make it [u8; 32].
|
||||
URL(String, Option<NixHash>),
|
||||
URL(Url, Option<NixHash>),
|
||||
|
||||
/// Fetch a tarball from the given URL and unpack.
|
||||
/// The file must be a tape archive (.tar) compressed with gzip, bzip2 or xz.
|
||||
|
@ -32,12 +33,55 @@ pub enum Fetch {
|
|||
/// so it is best if the tarball contains a single directory at top level.
|
||||
/// Optionally, a sha256 digest can be provided to verify the unpacked
|
||||
/// contents against.
|
||||
Tarball(String, Option<[u8; 32]>),
|
||||
Tarball(Url, Option<[u8; 32]>),
|
||||
|
||||
/// TODO
|
||||
Git(),
|
||||
}
|
||||
|
||||
// Drops potentially sensitive username and password from a URL.
|
||||
fn redact_url(url: &Url) -> Url {
|
||||
let mut url = url.to_owned();
|
||||
if !url.username().is_empty() {
|
||||
let _ = url.set_username("redacted");
|
||||
}
|
||||
|
||||
if url.password().is_some() {
|
||||
let _ = url.set_password(Some("redacted"));
|
||||
}
|
||||
|
||||
url
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Fetch {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Fetch::URL(url, nixhash) => {
|
||||
let url = redact_url(url);
|
||||
if let Some(nixhash) = nixhash {
|
||||
write!(f, "URL [url: {}, exp_hash: Some({})]", &url, nixhash)
|
||||
} else {
|
||||
write!(f, "URL [url: {}, exp_hash: None]", &url)
|
||||
}
|
||||
}
|
||||
Fetch::Tarball(url, exp_digest) => {
|
||||
let url = redact_url(url);
|
||||
if let Some(exp_digest) = exp_digest {
|
||||
write!(
|
||||
f,
|
||||
"Tarball [url: {}, exp_hash: Some({})]",
|
||||
url,
|
||||
NixHash::Sha256(*exp_digest)
|
||||
)
|
||||
} else {
|
||||
write!(f, "Tarball [url: {}, exp_hash: None]", url)
|
||||
}
|
||||
}
|
||||
Fetch::Git() => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Fetch {
|
||||
/// If the [Fetch] contains an expected hash upfront, returns the resulting
|
||||
/// store path.
|
||||
|
@ -76,18 +120,32 @@ impl<BS, DS, PS> Fetcher<BS, DS, PS> {
|
|||
}
|
||||
|
||||
/// Constructs a HTTP request to the passed URL, and returns a AsyncReadBuf to it.
|
||||
async fn download<'a>(
|
||||
&self,
|
||||
url: &str,
|
||||
) -> Result<impl AsyncBufRead + Unpin + 'a, reqwest::Error> {
|
||||
let resp = self.http_client.get(url).send().await?;
|
||||
Ok(tokio_util::io::StreamReader::new(
|
||||
resp.bytes_stream().map_err(|e| {
|
||||
let e = e.without_url();
|
||||
warn!(%e, "failed to get response body");
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, e)
|
||||
}),
|
||||
))
|
||||
/// In case the URI uses the file:// scheme, use tokio::fs to open it.
|
||||
async fn download(&self, url: Url) -> Result<Box<dyn AsyncBufRead + Unpin>, FetcherError> {
|
||||
match url.scheme() {
|
||||
"file" => {
|
||||
let f = tokio::fs::File::open(url.to_file_path().map_err(|_| {
|
||||
// "Returns Err if the host is neither empty nor "localhost"
|
||||
// (except on Windows, where file: URLs may have a non-local host)"
|
||||
FetcherError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
"invalid host for file:// scheme",
|
||||
))
|
||||
})?)
|
||||
.await?;
|
||||
Ok(Box::new(tokio::io::BufReader::new(f)))
|
||||
}
|
||||
_ => {
|
||||
let resp = self.http_client.get(url).send().await?;
|
||||
Ok(Box::new(tokio_util::io::StreamReader::new(
|
||||
resp.bytes_stream().map_err(|e| {
|
||||
let e = e.without_url();
|
||||
warn!(%e, "failed to get response body");
|
||||
std::io::Error::new(std::io::ErrorKind::BrokenPipe, e)
|
||||
}),
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -122,7 +180,7 @@ where
|
|||
match fetch {
|
||||
Fetch::URL(url, exp_nixhash) => {
|
||||
// Construct a AsyncRead reading from the data as its downloaded.
|
||||
let mut r = self.download(&url).await?;
|
||||
let mut r = self.download(url.clone()).await?;
|
||||
|
||||
// Construct a AsyncWrite to write into the BlobService.
|
||||
let mut blob_writer = self.blob_service.open_write().await;
|
||||
|
@ -175,7 +233,7 @@ where
|
|||
}
|
||||
Fetch::Tarball(url, exp_nar_sha256) => {
|
||||
// Construct a AsyncRead reading from the data as its downloaded.
|
||||
let r = self.download(&url).await?;
|
||||
let r = self.download(url.clone()).await?;
|
||||
|
||||
// Pop compression.
|
||||
let r = DecompressedReader::new(r);
|
||||
|
@ -324,13 +382,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn fetchurl_store_path() {
|
||||
let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch";
|
||||
let url = Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap();
|
||||
let exp_nixhash = NixHash::Sha256(
|
||||
nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax")
|
||||
.unwrap(),
|
||||
);
|
||||
|
||||
let fetch = Fetch::URL(url.into(), Some(exp_nixhash));
|
||||
let fetch = Fetch::URL(url, Some(exp_nixhash));
|
||||
assert_eq!(
|
||||
"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch",
|
||||
&fetch
|
||||
|
@ -343,11 +401,11 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn fetch_tarball_store_path() {
|
||||
let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz";
|
||||
let url = Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap();
|
||||
let exp_nixbase32 =
|
||||
nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm")
|
||||
.unwrap();
|
||||
let fetch = Fetch::Tarball(url.into(), Some(exp_nixbase32));
|
||||
let fetch = Fetch::Tarball(url, Some(exp_nixbase32));
|
||||
|
||||
assert_eq!(
|
||||
"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source",
|
||||
|
|
|
@ -134,6 +134,7 @@ impl KnownPaths {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use nix_compat::{derivation::Derivation, nixbase32, nixhash::NixHash, store_path::StorePath};
|
||||
use url::Url;
|
||||
|
||||
use crate::fetchers::Fetch;
|
||||
|
||||
|
@ -160,13 +161,13 @@ mod tests {
|
|||
StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse");
|
||||
|
||||
static ref FETCH_URL : Fetch = Fetch::URL(
|
||||
"https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch".into(),
|
||||
Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(),
|
||||
Some(NixHash::Sha256(nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax").unwrap()))
|
||||
);
|
||||
static ref FETCH_URL_OUT_PATH: StorePath = StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap();
|
||||
|
||||
static ref FETCH_TARBALL : Fetch = Fetch::Tarball(
|
||||
"https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz".into(),
|
||||
Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap(),
|
||||
Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap())
|
||||
);
|
||||
static ref FETCH_TARBALL_OUT_PATH: StorePath = StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap();
|
||||
|
|
|
@ -1 +1 @@
|
|||
[ /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch ]
|
||||
[ /nix/store/y0r1p1cqmlvm0yqkz3gxvkc1p8kg2sz8-null /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch ]
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
[
|
||||
# (fetchurl "url") cannot be tested, as that one has to fetch from the
|
||||
# internet to calculate the path.
|
||||
# (fetchurl "url") needs to immediately fetch, but our options without
|
||||
# internet access are fairly limited.
|
||||
# TODO: populate some fixtures at a known location instead.
|
||||
(builtins.fetchurl "file:///dev/null")
|
||||
|
||||
# fetchurl with url and sha256
|
||||
(builtins.fetchurl {
|
||||
|
|
Loading…
Reference in a new issue