refactor(tvix/build): add stricter BuildRequest type
Change-Id: I2950c76bbc2227952e583426bfb3ed34e8da6d2d Reviewed-on: https://cl.tvl.fyi/c/depot/+/12625 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
This commit is contained in:
parent
1c1eb68678
commit
a247b25097
3 changed files with 315 additions and 3 deletions
131
tvix/build/src/buildservice/build_request.rs
Normal file
131
tvix/build/src/buildservice/build_request.rs
Normal file
|
@ -0,0 +1,131 @@
|
|||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::path::PathBuf;
|
||||
|
||||
use bytes::Bytes;
|
||||
use tvix_castore::{Node, PathComponent};
|
||||
/// A BuildRequest describes the request of something to be run on the builder.
|
||||
/// It is distinct from an actual \[Build\] that has already happened, or might be
|
||||
/// currently ongoing.
|
||||
///
|
||||
/// A BuildRequest can be seen as a more normalized version of a Derivation
|
||||
/// (parsed from A-Term), "writing out" some of the Nix-internal details about
|
||||
/// how e.g. environment variables in the build are set.
|
||||
///
|
||||
/// Nix has some impurities when building a Derivation, for example the --cores option
|
||||
/// ends up as an environment variable in the build, that's not part of the ATerm.
|
||||
///
|
||||
/// As of now, we serialize this into the BuildRequest, so builders can stay dumb.
|
||||
/// This might change in the future.
|
||||
///
|
||||
/// There's also a big difference when it comes to how inputs are modelled:
|
||||
///
|
||||
/// * Nix only uses store path (strings) to describe the inputs.
|
||||
/// As store paths can be input-addressed, a certain store path can contain
|
||||
/// different contents (as not all store paths are binary reproducible).
|
||||
/// This requires that for every input-addressed input, the builder has access
|
||||
/// to either the input's deriver (and needs to build it) or else a trusted
|
||||
/// source for the built input.
|
||||
/// to upload input-addressed paths, requiring the trusted users concept.
|
||||
/// * tvix-build records a list of tvix.castore.v1.Node as inputs.
|
||||
/// These map from the store path base name to their contents, relieving the
|
||||
/// builder from having to "trust" any input-addressed paths, contrary to Nix.
|
||||
///
|
||||
/// While this approach gives a better hermeticity, it has one downside:
|
||||
/// A BuildRequest can only be sent once the contents of all its inputs are known.
|
||||
///
|
||||
/// As of now, we're okay to accept this, but it prevents uploading an
|
||||
/// entirely-non-IFD subgraph of BuildRequests eagerly.
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct BuildRequest {
|
||||
/// The list of all root nodes that should be visible in `inputs_dir` at the
|
||||
/// time of the build.
|
||||
/// As all references are content-addressed, no additional signatures are
|
||||
/// needed to substitute / make these available in the build environment.
|
||||
pub inputs: BTreeMap<PathComponent, Node>,
|
||||
/// The command (and its args) executed as the build script.
|
||||
/// In the case of a Nix derivation, this is usually
|
||||
/// \["/path/to/some-bash/bin/bash", "-e", "/path/to/some/builder.sh"\].
|
||||
pub command_args: Vec<String>,
|
||||
/// The working dir of the command, relative to the build root.
|
||||
/// "build", in the case of Nix.
|
||||
/// This MUST be a clean relative path, without any ".", "..", or superfluous
|
||||
/// slashes.
|
||||
pub working_dir: PathBuf,
|
||||
/// A list of "scratch" paths, relative to the build root.
|
||||
/// These will be write-able during the build.
|
||||
/// \[build, nix/store\] in the case of Nix.
|
||||
/// These MUST be clean relative paths, without any ".", "..", or superfluous
|
||||
/// slashes, and sorted.
|
||||
pub scratch_paths: Vec<PathBuf>,
|
||||
/// The path where the castore input nodes will be located at,
|
||||
/// "nix/store" in case of Nix.
|
||||
/// Builds might also write into here (Nix builds do that).
|
||||
/// This MUST be a clean relative path, without any ".", "..", or superfluous
|
||||
/// slashes.
|
||||
pub inputs_dir: PathBuf,
|
||||
/// The list of output paths the build is expected to produce,
|
||||
/// relative to the root.
|
||||
/// If the path is not produced, the build is considered to have failed.
|
||||
/// These MUST be clean relative paths, without any ".", "..", or superfluous
|
||||
/// slashes, and sorted.
|
||||
pub outputs: Vec<PathBuf>,
|
||||
/// The list of environment variables and their values that should be set
|
||||
/// inside the build environment.
|
||||
/// This includes both environment vars set inside the derivation, as well as
|
||||
/// more "ephemeral" ones like NIX_BUILD_CORES, controlled by the `--cores`
|
||||
/// CLI option of `nix-build`.
|
||||
/// For now, we consume this as an option when turning a Derivation into a BuildRequest,
|
||||
/// similar to how Nix has a `--cores` option.
|
||||
/// We don't want to bleed these very nix-specific sandbox impl details into
|
||||
/// (dumber) builders if we don't have to.
|
||||
/// Environment variables are sorted by their keys.
|
||||
pub environment_vars: Vec<EnvVar>,
|
||||
/// A set of constraints that need to be satisfied on a build host before a
|
||||
/// Build can be started.
|
||||
pub constraints: HashSet<BuildConstraints>,
|
||||
/// Additional (small) files and their contents that should be placed into the
|
||||
/// build environment, but outside inputs_dir.
|
||||
/// Used for passAsFile and structuredAttrs in Nix.
|
||||
pub additional_files: Vec<AdditionalFile>,
|
||||
/// If this is an non-empty list, all paths in `outputs` are scanned for these.
|
||||
/// For Nix, `refscan_needles` would be populated with the nixbase32 hash parts of
|
||||
/// every input store path and output store path. The latter is necessary to scan
|
||||
/// for references between multi-output derivations.
|
||||
pub refscan_needles: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct EnvVar {
|
||||
/// name of the environment variable. Must not contain =.
|
||||
pub key: String,
|
||||
pub value: Bytes,
|
||||
}
|
||||
/// BuildConstraints represents certain conditions that must be fulfilled
|
||||
/// inside the build environment to be able to build this.
|
||||
/// Constraints can be things like required architecture and minimum amount of memory.
|
||||
/// The required input paths are *not* represented in here, because it
|
||||
/// wouldn't be hermetic enough - see the comment around inputs too.
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum BuildConstraints {
|
||||
/// The system that's needed to execute the build.
|
||||
/// Must not be empty.
|
||||
System(String),
|
||||
/// The amount of memory required to be available for the build, in bytes.
|
||||
MinMemory(u64),
|
||||
/// An absolute path that need to be available in the build
|
||||
/// environment, like `/dev/kvm`.
|
||||
/// This is distinct from the castore nodes in inputs.
|
||||
/// These MUST be clean absolute paths, without any ".", "..", or superfluous
|
||||
/// slashes, and sorted.
|
||||
AvailableReadOnlyPath(PathBuf),
|
||||
/// Whether the build should be able to access the network.
|
||||
NetworkAccess,
|
||||
/// Whether to provide a /bin/sh inside the build environment, usually a static bash.
|
||||
ProvideBinSh,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct AdditionalFile {
|
||||
pub path: PathBuf,
|
||||
pub contents: Bytes,
|
||||
}
|
|
@ -1,7 +1,9 @@
|
|||
use tonic::async_trait;
|
||||
|
||||
use crate::proto::{Build, BuildRequest};
|
||||
use crate::proto::{self, Build};
|
||||
|
||||
pub mod build_request;
|
||||
pub use crate::buildservice::build_request::*;
|
||||
mod dummy;
|
||||
mod from_addr;
|
||||
mod grpc;
|
||||
|
@ -15,5 +17,5 @@ pub use from_addr::from_addr;
|
|||
#[async_trait]
|
||||
pub trait BuildService: Send + Sync {
|
||||
/// TODO: document
|
||||
async fn do_build(&self, request: BuildRequest) -> std::io::Result<Build>;
|
||||
async fn do_build(&self, request: proto::BuildRequest) -> std::io::Result<Build>;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use itertools::Itertools;
|
||||
use tvix_castore::DirectoryError;
|
||||
use tvix_castore::{DirectoryError, Node, PathComponent};
|
||||
|
||||
mod grpc_buildservice_wrapper;
|
||||
|
||||
|
@ -201,6 +202,101 @@ impl BuildRequest {
|
|||
}
|
||||
}
|
||||
|
||||
impl TryFrom<BuildRequest> for crate::buildservice::BuildRequest {
|
||||
type Error = ValidateBuildRequestError;
|
||||
fn try_from(value: BuildRequest) -> Result<Self, Self::Error> {
|
||||
// validate input names. Make sure they're sorted
|
||||
|
||||
let mut last_name: bytes::Bytes = "".into();
|
||||
let mut inputs: BTreeMap<PathComponent, Node> = BTreeMap::new();
|
||||
for (i, node) in value.inputs.iter().enumerate() {
|
||||
let (name, node) = node
|
||||
.clone()
|
||||
.into_name_and_node()
|
||||
.map_err(|e| ValidateBuildRequestError::InvalidInputNode(i, e))?;
|
||||
|
||||
if name.as_ref() <= last_name.as_ref() {
|
||||
return Err(ValidateBuildRequestError::InputNodesNotSorted);
|
||||
} else {
|
||||
inputs.insert(name.clone(), node);
|
||||
last_name = name.into();
|
||||
}
|
||||
}
|
||||
|
||||
// validate working_dir
|
||||
if !is_clean_relative_path(&value.working_dir) {
|
||||
Err(ValidateBuildRequestError::InvalidWorkingDir)?;
|
||||
}
|
||||
|
||||
// validate scratch paths
|
||||
for (i, p) in value.scratch_paths.iter().enumerate() {
|
||||
if !is_clean_relative_path(p) {
|
||||
Err(ValidateBuildRequestError::InvalidScratchPath(i))?
|
||||
}
|
||||
}
|
||||
if !is_sorted(value.scratch_paths.iter().map(|e| e.as_bytes())) {
|
||||
Err(ValidateBuildRequestError::ScratchPathsNotSorted)?;
|
||||
}
|
||||
|
||||
// validate inputs_dir
|
||||
if !is_clean_relative_path(&value.inputs_dir) {
|
||||
Err(ValidateBuildRequestError::InvalidInputsDir)?;
|
||||
}
|
||||
|
||||
// validate outputs
|
||||
for (i, p) in value.outputs.iter().enumerate() {
|
||||
if !is_clean_relative_path(p) {
|
||||
Err(ValidateBuildRequestError::InvalidOutputPath(i))?
|
||||
}
|
||||
}
|
||||
if !is_sorted(value.outputs.iter().map(|e| e.as_bytes())) {
|
||||
Err(ValidateBuildRequestError::OutputsNotSorted)?;
|
||||
}
|
||||
|
||||
// validate environment_vars.
|
||||
for (i, e) in value.environment_vars.iter().enumerate() {
|
||||
if e.key.is_empty() || e.key.contains('=') {
|
||||
Err(ValidateBuildRequestError::InvalidEnvVar(i))?
|
||||
}
|
||||
}
|
||||
if !is_sorted(value.environment_vars.iter().map(|e| e.key.as_bytes())) {
|
||||
Err(ValidateBuildRequestError::EnvVarNotSorted)?;
|
||||
}
|
||||
|
||||
// validate build constraints
|
||||
let constraints = value
|
||||
.constraints
|
||||
.map_or(Ok(HashSet::new()), |constraints| {
|
||||
constraints
|
||||
.try_into()
|
||||
.map_err(ValidateBuildRequestError::InvalidBuildConstraints)
|
||||
})?;
|
||||
|
||||
// validate additional_files
|
||||
for (i, additional_file) in value.additional_files.iter().enumerate() {
|
||||
if !is_clean_relative_path(&additional_file.path) {
|
||||
Err(ValidateBuildRequestError::InvalidAdditionalFilePath(i))?
|
||||
}
|
||||
}
|
||||
if !is_sorted(value.additional_files.iter().map(|e| e.path.as_bytes())) {
|
||||
Err(ValidateBuildRequestError::AdditionalFilesNotSorted)?;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
inputs,
|
||||
command_args: value.command_args,
|
||||
working_dir: PathBuf::from(value.working_dir),
|
||||
scratch_paths: value.scratch_paths.iter().map(PathBuf::from).collect(),
|
||||
inputs_dir: PathBuf::from(value.inputs_dir),
|
||||
outputs: value.outputs.iter().map(PathBuf::from).collect(),
|
||||
environment_vars: value.environment_vars.into_iter().map(Into::into).collect(),
|
||||
constraints,
|
||||
additional_files: value.additional_files.into_iter().map(Into::into).collect(),
|
||||
refscan_needles: value.refscan_needles,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors that occur during the validation of
|
||||
/// [build_request::BuildConstraints] messages.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
@ -235,7 +331,90 @@ impl build_request::BuildConstraints {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<build_request::EnvVar> for crate::buildservice::EnvVar {
|
||||
fn from(value: build_request::EnvVar) -> Self {
|
||||
Self {
|
||||
key: value.key,
|
||||
value: value.value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::buildservice::EnvVar> for build_request::EnvVar {
|
||||
fn from(value: crate::buildservice::EnvVar) -> Self {
|
||||
Self {
|
||||
key: value.key,
|
||||
value: value.value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<build_request::AdditionalFile> for crate::buildservice::AdditionalFile {
|
||||
fn from(value: build_request::AdditionalFile) -> Self {
|
||||
Self {
|
||||
path: PathBuf::from(value.path),
|
||||
contents: value.contents,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<crate::buildservice::AdditionalFile> for build_request::AdditionalFile {
|
||||
fn from(value: crate::buildservice::AdditionalFile) -> Self {
|
||||
Self {
|
||||
path: value
|
||||
.path
|
||||
.to_str()
|
||||
.expect("Tvix bug: expected a valid path")
|
||||
.to_string(),
|
||||
contents: value.contents,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<build_request::BuildConstraints> for HashSet<crate::buildservice::BuildConstraints> {
|
||||
type Error = ValidateBuildConstraintsError;
|
||||
fn try_from(value: build_request::BuildConstraints) -> Result<Self, Self::Error> {
|
||||
use crate::buildservice::BuildConstraints;
|
||||
|
||||
// validate system
|
||||
if value.system.is_empty() {
|
||||
Err(ValidateBuildConstraintsError::InvalidSystem)?;
|
||||
}
|
||||
|
||||
let mut build_constraints = HashSet::from([
|
||||
BuildConstraints::System(value.system),
|
||||
BuildConstraints::MinMemory(value.min_memory),
|
||||
]);
|
||||
|
||||
// validate available_ro_paths
|
||||
for (i, p) in value.available_ro_paths.iter().enumerate() {
|
||||
if !is_clean_absolute_path(p) {
|
||||
Err(ValidateBuildConstraintsError::InvalidAvailableRoPaths(i))?
|
||||
} else {
|
||||
build_constraints.insert(BuildConstraints::AvailableReadOnlyPath(PathBuf::from(p)));
|
||||
}
|
||||
}
|
||||
if !is_sorted(value.available_ro_paths.iter().map(|e| e.as_bytes())) {
|
||||
Err(ValidateBuildConstraintsError::AvailableRoPathsNotSorted)?;
|
||||
}
|
||||
|
||||
if value.network_access {
|
||||
build_constraints.insert(BuildConstraints::NetworkAccess);
|
||||
}
|
||||
if value.provide_bin_sh {
|
||||
build_constraints.insert(BuildConstraints::ProvideBinSh);
|
||||
}
|
||||
|
||||
Ok(build_constraints)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
// TODO: add testcases for constraints special cases. The default cases in the protos
|
||||
// should result in the constraints not being added. For example min_memory 0 can be omitted.
|
||||
// Also interesting testcases are "merging semantics". MimMemory(1) and MinMemory(100) will
|
||||
// result in mim_memory 100, multiple AvailableReadOnlyPaths need to be merged. Contradicting
|
||||
// system constraints need to fail somewhere (maybe an assertion, as only buggy code can construct it)
|
||||
mod tests {
|
||||
use super::{is_clean_path, is_clean_relative_path};
|
||||
use rstest::rstest;
|
||||
|
|
Loading…
Add table
Reference in a new issue