diff --git a/tvix/docs/src/TODO.md b/tvix/docs/src/TODO.md index 92d7c4cac..127fb6f4d 100644 --- a/tvix/docs/src/TODO.md +++ b/tvix/docs/src/TODO.md @@ -127,15 +127,6 @@ Some more fetcher-related builtins need work: - `fetchTree` (hairy, seems there's no proper spec and the URL syntax seems subject to change/underdocumented) -### `builtins.path` roundtrip for flat -`builtins.path` currently uses `filtered_ingest` also for the non-recursive -case, then reads through the blob contents again to get the sha256. - -We should take care of assembling the root node on our own, and pipe the data -through sha256 too (via `InspectReader`, see `glue/fetcher` for an example). - -This avoids some roundtrips, and is probably faster. - ### Derivation -> Build While we have some support for `structuredAttrs` and `fetchClosure` (at least enough to calculate output hashes, aka produce identical ATerm), the code diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs index 4a8a29b41..9257975a0 100644 --- a/tvix/glue/src/builtins/import.rs +++ b/tvix/glue/src/builtins/import.rs @@ -109,15 +109,16 @@ mod import_builtins { use super::*; + use crate::tvix_store_io::TvixStoreIO; use nix_compat::nixhash::{CAHash, NixHash}; use nix_compat::store_path::StorePath; + use sha2::Digest; + use tokio::io::AsyncWriteExt; + use tvix_castore::proto::node::Node; + use tvix_castore::proto::FileNode; use tvix_eval::generators::Gen; use tvix_eval::{generators::GenCo, ErrorKind, Value}; - use tvix_eval::{NixContextElement, NixString}; - - use tvix_castore::B3Digest; - - use crate::tvix_store_io::TvixStoreIO; + use tvix_eval::{FileType, NixContextElement, NixString}; #[builtin("path")] async fn builtin_path( @@ -167,54 +168,126 @@ mod import_builtins { }) .transpose()?; - // FUTUREWORK(performance): this opens the file instead of using a stat-like - // system call to the file. - if !recursive_ingestion && state.open(path.as_ref()).is_err() { - Err(ImportError::FlatImportOfNonFile( - path.to_string_lossy().to_string(), - ))?; - } + // Check if the path points to a regular file. + // If it does, the filter function is never executed. + // TODO: follow symlinks and check their type instead + let (root_node, ca_hash) = match state.file_type(path.as_ref())? { + FileType::Regular => { + let mut file = state.open(path.as_ref())?; + // This is a single file, copy it to the blobservice directly. + let mut hash = sha2::Sha256::new(); + let mut blob_size = 0; + let mut blob_writer = state + .tokio_handle + .block_on(async { state.blob_service.open_write().await }); - let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?; - let ca: CAHash = if recursive_ingestion { - CAHash::Nar(NixHash::Sha256(state.tokio_handle.block_on(async { - Ok::<_, tvix_eval::ErrorKind>( + let mut buf = [0u8; 4096]; + + loop { + // read bytes into buffer, break out if EOF + let len = file.read(&mut buf)?; + if len == 0 { + break; + } + blob_size += len as u64; + + let data = &buf[0..len]; + + // add to blobwriter state - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await - .map_err(|e| ErrorKind::TvixError(Rc::new(e)))? - .1, - ) - })?)) - } else { - let digest: B3Digest = match root_node { - tvix_castore::proto::node::Node::File(ref fnode) => { - // It's already validated. - fnode.digest.clone().try_into().unwrap() + .tokio_handle + .block_on(async { blob_writer.write_all(data).await })?; + + // update the sha256 hash function. We can skip that if we're not using it. + if !recursive_ingestion { + hash.update(data); + } } - // We cannot hash anything else than file in flat import mode. - _ => { + + // close the blob writer, get back the b3 digest. + let blob_digest = state + .tokio_handle + .block_on(async { blob_writer.close().await })?; + + let root_node = Node::File(FileNode { + // The name gets set further down, while constructing the PathInfo. + name: "".into(), + digest: blob_digest.into(), + size: blob_size, + executable: false, + }); + + let ca_hash = if recursive_ingestion { + let (_nar_size, nar_sha256) = state + .tokio_handle + .block_on(async { + state + .nar_calculation_service + .as_ref() + .calculate_nar(&root_node) + .await + }) + .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; + CAHash::Nar(NixHash::Sha256(nar_sha256)) + } else { + CAHash::Flat(NixHash::Sha256(hash.finalize().into())) + }; + + (root_node, ca_hash) + } + + FileType::Directory => { + if !recursive_ingestion { return Err(ImportError::FlatImportOfNonFile( path.to_string_lossy().to_string(), - ) - .into()) + ))?; } - }; - // FUTUREWORK: avoid hashing again. - CAHash::Flat(NixHash::Sha256( - state + // do the filtered ingest + let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?; + + // calculate the NAR sha256 + let (_nar_size, nar_sha256) = state .tokio_handle - .block_on(async { state.blob_to_sha256_hash(digest).await })?, - )) + .block_on(async { + state + .nar_calculation_service + .as_ref() + .calculate_nar(&root_node) + .await + }) + .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; + + let ca_hash = CAHash::Nar(NixHash::Sha256(nar_sha256)); + + (root_node, ca_hash) + } + FileType::Symlink => { + // FUTUREWORK: Nix follows a symlink if it's at the root, + // except if it's not resolve-able (NixOS/nix#7761).i + return Err(tvix_eval::ErrorKind::IO { + path: Some(path.to_path_buf()), + error: Rc::new(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "builtins.path pointing to a symlink is ill-defined.", + )), + }); + } + FileType::Unknown => { + return Err(tvix_eval::ErrorKind::IO { + path: Some(path.to_path_buf()), + error: Rc::new(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "unsupported file type", + )), + }) + } }; - let obtained_hash = ca.hash().clone().into_owned(); + let obtained_hash = ca_hash.hash().clone().into_owned(); let (path_info, _hash, output_path) = state.tokio_handle.block_on(async { state - .node_to_path_info(name.as_ref(), path.as_ref(), ca, root_node) + .node_to_path_info(name.as_ref(), path.as_ref(), ca_hash, root_node) .await })?; diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index dd034d74b..b0367f60a 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -4,7 +4,6 @@ use futures::{StreamExt, TryStreamExt}; use nix_compat::nixhash::NixHash; use nix_compat::store_path::StorePathRef; use nix_compat::{nixhash::CAHash, store_path::StorePath}; -use sha2::{Digest, Sha256}; use std::{ cell::RefCell, collections::BTreeSet, @@ -19,7 +18,6 @@ use tvix_build::buildservice::BuildService; use tvix_castore::proto::node::Node; use tvix_eval::{EvalIO, FileType, StdIO}; use tvix_store::nar::NarCalculationService; -use tvix_store::utils::AsyncIoBridge; use tvix_castore::{ blobservice::BlobService, @@ -410,26 +408,6 @@ impl TvixStoreIO { Ok(output_path) } - /// Transforms a BLAKE-3 digest into a SHA256 digest - /// by re-hashing the whole file. - pub(crate) async fn blob_to_sha256_hash(&self, blob_digest: B3Digest) -> io::Result<[u8; 32]> { - let mut reader = self - .blob_service - .open_read(&blob_digest) - .await? - .ok_or_else(|| { - io::Error::new( - io::ErrorKind::NotFound, - format!("blob represented by digest: '{}' not found", blob_digest), - ) - })?; - // It is fine to use `AsyncIoBridge` here because hashing is not actually I/O. - let mut hasher = AsyncIoBridge(Sha256::new()); - - tokio::io::copy(&mut reader, &mut hasher).await?; - Ok(hasher.0.finalize().into()) - } - pub async fn store_path_exists<'a>(&'a self, store_path: StorePathRef<'a>) -> io::Result { Ok(self .path_info_service