diff --git a/integration-tests/build-on-target/default.nix b/integration-tests/build-on-target/default.nix new file mode 100644 index 0000000..2e7591e --- /dev/null +++ b/integration-tests/build-on-target/default.nix @@ -0,0 +1,35 @@ +{ pkgs ? import ../nixpkgs.nix }: + +let + tools = pkgs.callPackage ../tools.nix { + deployers = [ "deployer" "alpha" "beta" ]; + targets = []; + }; +in tools.makeTest { + name = "colmena-build-on-target"; + + bundle = ./.; + + testScript = '' + # The actual build will be initiated on alpha + deployer.succeed("cd /tmp/bundle && ${tools.colmenaExec} apply --on alpha") + + with subtest("Check that the new configurations are indeed applied"): + alpha.succeed("grep SUCCESS /etc/deployment") + + alpha_profile = alpha.succeed("readlink /run/current-system") + + with subtest("Check that the built profile is not on the deployer"): + deployer.fail(f"nix-store -qR {alpha_profile}") + + with subtest("Check that we can override per-node settings and build locally"): + deployer.succeed("cd /tmp/bundle && ${tools.colmenaExec} build --on alpha --no-build-on-target") + deployer.succeed(f"nix-store -qR {alpha_profile}") + + with subtest("Check that we can override per-node settings and build remotely"): + deployer.succeed("cd /tmp/bundle && ${tools.colmenaExec} apply --on beta --build-on-target") + beta.succeed("grep SUCCESS /etc/deployment") + profile = beta.succeed("readlink /run/current-system") + deployer.fail(f"nix-store -qR {profile}") + ''; +} diff --git a/integration-tests/build-on-target/hive.nix b/integration-tests/build-on-target/hive.nix new file mode 100644 index 0000000..3fefa43 --- /dev/null +++ b/integration-tests/build-on-target/hive.nix @@ -0,0 +1,33 @@ +let + tools = import ./tools.nix { + insideVm = true; + deployers = [ "deployer" "alpha" "beta" ]; + targets = []; + }; +in { + meta = { + nixpkgs = tools.pkgs; + }; + + defaults = { + environment.etc."deployment".text = "SUCCESS"; + }; + + deployer = tools.getStandaloneConfigFor "deployer"; + + alpha = { + imports = [ + (tools.getStandaloneConfigFor "alpha") + ]; + + deployment.buildOnTarget = true; + }; + + beta = { + imports = [ + (tools.getStandaloneConfigFor "beta") + ]; + + deployment.buildOnTarget = false; + }; +} diff --git a/integration-tests/default.nix b/integration-tests/default.nix index 89b31c6..3f1707e 100644 --- a/integration-tests/default.nix +++ b/integration-tests/default.nix @@ -1,6 +1,7 @@ { apply = import ./apply {}; apply-local = import ./apply-local {}; + build-on-target = import ./build-on-target {}; exec = import ./exec {}; flakes = import ./flakes {}; parallel = import ./parallel {}; diff --git a/integration-tests/tools.nix b/integration-tests/tools.nix index 1288d8e..92eb0b0 100644 --- a/integration-tests/tools.nix +++ b/integration-tests/tools.nix @@ -55,6 +55,11 @@ let ]; }; + services.openssh.enable = true; + users.users.root.openssh.authorizedKeys.keys = [ + sshKeys.snakeOilPublicKey + ]; + environment.systemPackages = with pkgs; [ git # for git flake tests diff --git a/manual/src/SUMMARY.md b/manual/src/SUMMARY.md index a0120cb..68c2d31 100644 --- a/manual/src/SUMMARY.md +++ b/manual/src/SUMMARY.md @@ -10,6 +10,7 @@ - [Secrets](./features/keys.md) - [Ad Hoc Evaluation](./features/eval.md) - [Parallelism](./features/parallelism.md) + - [Remote Builds](./features/remote-builds.md) - [Examples](./examples/index.md) - [Multi-Architecture Deployments](./examples/multi-arch.md) - [Reference](./reference/index.md) diff --git a/manual/src/features/index.md b/manual/src/features/index.md index 2cd4b92..5dad8fd 100644 --- a/manual/src/features/index.md +++ b/manual/src/features/index.md @@ -7,3 +7,4 @@ This section introduces the main features in Colmena: - **[Secrets](keys.md)** - Deploying sensitive files separate from the main configuration - **[Ad Hoc Evaluation](eval.md)** - Evaluating a Nix expression with access to your configuration - **[Parallelism](parallelism.md)** - Controlling how Colmena parallelizes the deployment process +- **[Remote Builds](remote-builds.md)** - Building system profiles on remote machines diff --git a/manual/src/features/remote-builds.md b/manual/src/features/remote-builds.md new file mode 100644 index 0000000..12c632f --- /dev/null +++ b/manual/src/features/remote-builds.md @@ -0,0 +1,21 @@ +# Remote Builds + +If the host running Colmena is not powerful enough, consider offloading the actual builds to remote machines. +Colmena supports two ways to achieve this: + +## Using Colmena's `deployment.buildOnTarget` + +If you set [`deployment.buildOnTarget = true;`](../reference/deployment.md#deploymentbuildontarget) for a node, then the actual build process will be initiated on the node itself. +Colmena will evaluate the configuration locally before copying the derivations to the target node. +You can temporarily enable this for all nodes by passing `--build-on-target` on the command line, or disable it with `--no-build-on-target`. + +This is most useful in scenarios where the machine running Colmena is bandwidth-constrained, or it's inconvenient to configure designated builders beforehand. +With this method, the build results will _not_ be copied back to the local machine or otherwise shared across the target nodes. +If you have custom packages used on multiple nodes, the work required to build those packages will be duplicated across the nodes. + +## Using the native distributed build feature in Nix + +When [distributed build](https://nixos.org/manual/nix/unstable/advanced-topics/distributed-builds.html) is enabled, Nix will transparently forward builds to the configured builders. +After the builds are done, Nix will copy the results back to the local machine. + +Builders can either be configured globally or in your configuration with [`meta.machinesFile`](../reference/meta.md#machinesFile). diff --git a/src/command/apply.rs b/src/command/apply.rs index b03e559..b3b5142 100644 --- a/src/command/apply.rs +++ b/src/command/apply.rs @@ -1,7 +1,7 @@ use std::env; use std::path::PathBuf; -use clap::{Arg, App, SubCommand, ArgMatches}; +use clap::{Arg, App, SubCommand, ArgMatches, ArgSettings}; use crate::nix::deployment::{ Deployment, @@ -89,6 +89,20 @@ To upload keys without building or deploying the rest of the configuration, use .help("Do not use gzip") .long_help("Disables the use of gzip when copying closures to the remote host.") .takes_value(false)) + .arg(Arg::with_name("build-on-target") + .long("build-on-target") + .help("Build the system profiles on the target nodes") + .long_help(r#"Build the system profiles on the target nodes themselves. + +If enabled, the system profiles will be built on the target nodes themselves, not on the host running Colmena itself. +This overrides per-node perferences set in `deployment.buildOnTarget`. +To temporarily disable remote build on all nodes, use `--no-build-on-target`. +"#) + .takes_value(false)) + .arg(Arg::with_name("no-build-on-target") + .long("no-build-on-target") + .set(ArgSettings::Hidden) + .takes_value(false)) .arg(Arg::with_name("force-replace-unknown-profiles") .long("force-replace-unknown-profiles") .help("Ignore all targeted nodes deployment.replaceUnknownProfiles setting") @@ -146,6 +160,12 @@ pub async fn run(_global_args: &ArgMatches<'_>, local_args: &ArgMatches<'_>) -> options.set_create_gc_roots(true); } + if local_args.is_present("no-build-on-target") { + options.set_force_build_on_target(false); + } else if local_args.is_present("build-on-target") { + options.set_force_build_on_target(true); + } + options }; diff --git a/src/nix/deployment/mod.rs b/src/nix/deployment/mod.rs index ec9abfa..b429d2b 100644 --- a/src/nix/deployment/mod.rs +++ b/src/nix/deployment/mod.rs @@ -31,6 +31,7 @@ use super::{ Profile, ProfileDerivation, CopyDirection, + CopyOptions, key::{Key, UploadAt as UploadKeyAt}, }; use super::host; @@ -229,8 +230,29 @@ impl Deployment { let mut futures = Vec::new(); for (name, profile_drv) in profile_drvs.iter() { - let target = chunk.remove(name).unwrap(); - futures.push(self.clone().deploy_node(parent.clone(), target, profile_drv.clone())); + let mut target = chunk.remove(name).unwrap(); + + if let Some(force_build_on_target) = self.options.force_build_on_target { + target.config.set_build_on_target(force_build_on_target); + } + + let job_handle = parent.clone(); + let arc_self = self.clone(); + futures.push(async move { + let (target, profile) = { + if target.config.build_on_target() { + arc_self.clone().build_on_node(job_handle.clone(), target, profile_drv.clone()).await? + } else { + arc_self.clone().build_and_push_node(job_handle.clone(), target, profile_drv.clone()).await? + } + }; + + if arc_self.goal.requires_activation() { + arc_self.activate_node(job_handle, target, profile).await + } else { + Ok(()) + } + }); } join_all(futures).await @@ -273,14 +295,45 @@ impl Deployment { }).await } - /// Builds, pushes, and optionally activates a system profile on a node. - /// - /// This will also upload keys to the node. - async fn deploy_node(self: DeploymentHandle, parent: JobHandle, mut target: TargetNode, profile_drv: ProfileDerivation) - -> NixResult<()> + /// Builds a system profile directly on the node itself. + async fn build_on_node(self: DeploymentHandle, parent: JobHandle, mut target: TargetNode, profile_drv: ProfileDerivation) + -> NixResult<(TargetNode, Profile)> + { + let nodes = vec![target.name.clone()]; + + let permit = self.parallelism_limit.apply.acquire().await.unwrap(); + + let build_job = parent.create_job(JobType::Build, nodes.clone())?; + let (target, profile) = build_job.run(|job| async move { + if target.host.is_none() { + return Err(NixError::Unsupported); + } + + let mut host = target.host.as_mut().unwrap(); + host.set_job(Some(job.clone())); + + host.copy_closure( + profile_drv.as_store_path(), + CopyDirection::ToRemote, + CopyOptions::default().include_outputs(true), + ).await?; + + let profile = profile_drv.realize_remote(&mut host).await?; + + job.success_with_message(format!("Built {:?} on target node", profile.as_path()))?; + Ok((target, profile)) + }).await?; + + drop(permit); + + Ok((target, profile)) + } + + /// Builds and pushes a system profile on a node. + async fn build_and_push_node(self: DeploymentHandle, parent: JobHandle, mut target: TargetNode, profile_drv: ProfileDerivation) + -> NixResult<(TargetNode, Profile)> { let nodes = vec![target.name.clone()]; - let target_name = target.name.clone(); let permit = self.parallelism_limit.apply.acquire().await.unwrap(); @@ -292,21 +345,21 @@ impl Deployment { let mut builder = host::local(arc_self.nix_options.clone()); builder.set_job(Some(job.clone())); - let profile = profile_drv.realize(&mut *builder).await?; + let profile = profile_drv.realize(&mut builder).await?; job.success_with_message(format!("Built {:?}", profile.as_path()))?; Ok(profile) }).await?; if self.goal == Goal::Build { - return Ok(()); + return Ok((target, profile)); } // Push closure to remote let push_job = parent.create_job(JobType::Push, nodes.clone())?; let push_profile = profile.clone(); let arc_self = self.clone(); - let mut target = push_job.run(|job| async move { + let target = push_job.run(|job| async move { if target.host.is_none() { return Err(NixError::Unsupported); } @@ -321,10 +374,21 @@ impl Deployment { Ok(target) }).await?; - if !self.goal.requires_activation() { - // We are done here :) - return Ok(()); - } + drop(permit); + + Ok((target, profile)) + } + + /// Activates a system profile on a node. + /// + /// This will also upload keys to the node. + async fn activate_node(self: DeploymentHandle, parent: JobHandle, mut target: TargetNode, profile: Profile) + -> NixResult<()> + { + let nodes = vec![target.name.clone()]; + let target_name = target.name.clone(); + + let permit = self.parallelism_limit.apply.acquire().await.unwrap(); // Upload pre-activation keys let mut target = if self.options.upload_keys { @@ -386,7 +450,7 @@ impl Deployment { }).await?; // Upload post-activation keys - if self.options.upload_keys { + let target = if self.options.upload_keys { let job = parent.create_job(JobType::UploadKeys, nodes.clone())?; job.run_waiting(|job| async move { let keys = target.config.keys.iter() @@ -396,7 +460,7 @@ impl Deployment { if keys.is_empty() { job.noop("No post-activation keys to upload".to_string())?; - return Ok(()); + return Ok(target); } job.state(JobState::Running)?; @@ -407,15 +471,21 @@ impl Deployment { host.upload_keys(&keys, true).await?; job.success_with_message("Uploaded keys (post-activation)".to_string())?; - Ok(()) - }).await?; - } + Ok(target) + }).await? + } else { + target + }; // Create GC root if self.options.create_gc_roots { let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?; let arc_self = self.clone(); job.run_waiting(|job| async move { + if target.config.build_on_target() { + job.noop("The system profile was built on target node itself".to_string())?; + } + if let Some(dir) = arc_self.hive.context_dir() { job.state(JobState::Running)?; let path = dir.join(".gcroots").join(format!("node-{}", &*target_name)); diff --git a/src/nix/deployment/options.rs b/src/nix/deployment/options.rs index 10941af..e9c577f 100644 --- a/src/nix/deployment/options.rs +++ b/src/nix/deployment/options.rs @@ -20,6 +20,9 @@ pub struct Options { /// directory if it exists. pub(super) create_gc_roots: bool, + /// Whether to override per-node setting to build on the nodes themselves. + pub(super) force_build_on_target: Option, + /// Ignore the node-level `deployment.replaceUnknownProfiles` option. pub(super) force_replace_unknown_profiles: bool, } @@ -41,6 +44,10 @@ impl Options { self.create_gc_roots = enable; } + pub fn set_force_build_on_target(&mut self, enable: bool) { + self.force_build_on_target = Some(enable); + } + pub fn set_force_replace_unknown_profiles(&mut self, enable: bool) { self.force_replace_unknown_profiles = enable; } @@ -61,6 +68,7 @@ impl Default for Options { gzip: true, upload_keys: true, create_gc_roots: false, + force_build_on_target: None, force_replace_unknown_profiles: false, } } diff --git a/src/nix/hive/eval.nix b/src/nix/hive/eval.nix index e911404..2fcfdc6 100644 --- a/src/nix/hive/eval.nix +++ b/src/nix/hive/eval.nix @@ -67,6 +67,11 @@ let See https://nixos.org/manual/nix/stable/#chap-distributed-builds for the machine specification format. + + This option is ignored when builds are initiated on the remote nodes + themselves via `deployment.buildOnTarget` or `--build-on-target`. To + still use the Nix distributed build functionality, configure the + builders on the target nodes with `nix.buildMachines`. ''; default = null; apply = value: if value == null then null else toString value; @@ -138,6 +143,26 @@ let type = types.bool; default = false; }; + buildOnTarget = lib.mkOption { + description = '' + Whether to build the system profiles on the target node itself. + + When enabled, Colmena will copy the derivation to the target + node and initiate the build there. This avoids copying back the + build results involved with the native distributed build + feature. Furthermore, the `build` goal will be equivalent to + the `push` goal. Since builds happen on the target node, the + results are automatically "pushed" and won't exist in the local + Nix store. + + You can temporarily override per-node settings by passing + `--build-on-target` (enable for all nodes) or + `--no-build-on-target` (disable for all nodes) on the command + line. + ''; + type = types.bool; + default = false; + }; tags = lib.mkOption { description = '' A list of tags for the node. diff --git a/src/nix/host/ssh.rs b/src/nix/host/ssh.rs index a7350d4..b8513b9 100644 --- a/src/nix/host/ssh.rs +++ b/src/nix/host/ssh.rs @@ -40,17 +40,16 @@ impl Host for Ssh { self.run_command(command).await } async fn realize_remote(&mut self, derivation: &StorePath) -> NixResult> { - // FIXME - let paths = self.ssh(&["nix-store", "--no-gc-warning", "--realise", derivation.as_path().to_str().unwrap()]) - .capture_output() - .await; + let command = self.ssh(&["nix-store", "--no-gc-warning", "--realise", derivation.as_path().to_str().unwrap()]); - match paths { - Ok(paths) => { - paths.lines().map(|p| p.to_string().try_into()).collect() - } - Err(e) => Err(e), - } + let mut execution = CommandExecution::new(command); + execution.set_job(self.job.clone()); + + let paths = execution + .capture_output() + .await?; + + paths.lines().map(|p| p.to_string().try_into()).collect() } async fn upload_keys(&mut self, keys: &HashMap, require_ownership: bool) -> NixResult<()> { for (name, key) in keys { diff --git a/src/nix/mod.rs b/src/nix/mod.rs index 0f68db6..316e1aa 100644 --- a/src/nix/mod.rs +++ b/src/nix/mod.rs @@ -24,7 +24,7 @@ pub mod hive; pub use hive::{Hive, HivePath}; pub mod store; -pub use store::{StorePath, StoreDerivation}; +pub use store::{StorePath, StoreDerivation, BuildResult}; pub mod key; pub use key::Key; @@ -158,6 +158,10 @@ pub struct NodeConfig { #[serde(rename = "allowLocalDeployment")] allow_local_deployment: bool, + + #[serde(rename = "buildOnTarget")] + build_on_target: bool, + tags: Vec, #[serde(rename = "replaceUnknownProfiles")] @@ -223,6 +227,11 @@ impl NodeConfig { pub fn tags(&self) -> &[String] { &self.tags } pub fn allows_local_deployment(&self) -> bool { self.allow_local_deployment } + pub fn build_on_target(&self) -> bool { self.build_on_target } + pub fn set_build_on_target(&mut self, enable: bool) { + self.build_on_target = enable; + } + pub fn to_ssh_host(&self) -> Option { self.target_host.as_ref().map(|target_host| { let username = diff --git a/src/nix/profile.rs b/src/nix/profile.rs index 4d25de9..33a759b 100644 --- a/src/nix/profile.rs +++ b/src/nix/profile.rs @@ -10,6 +10,7 @@ use super::{ NixError, StorePath, StoreDerivation, + BuildResult, }; pub type ProfileDerivation = StoreDerivation; @@ -75,12 +76,18 @@ impl Profile { Ok(()) } + + fn from_store_path_unchecked(path: StorePath) -> Self { + Self(path) + } } -impl TryFrom> for Profile { +impl TryFrom> for Profile { type Error = NixError; - fn try_from(paths: Vec) -> NixResult { + fn try_from(result: BuildResult) -> NixResult { + let paths = result.paths(); + if paths.is_empty() { return Err(NixError::BadOutput { output: String::from("There is no store path"), @@ -93,7 +100,9 @@ impl TryFrom> for Profile { }); } - let path = paths.into_iter().next().unwrap(); - Self::from_store_path(path) + let path = paths.into_iter().next() + .unwrap().to_owned(); + + Ok(Self::from_store_path_unchecked(path)) } } diff --git a/src/nix/store.rs b/src/nix/store.rs index e1675a3..f1b3d5e 100644 --- a/src/nix/store.rs +++ b/src/nix/store.rs @@ -13,6 +13,19 @@ use super::{Host, NixCommand, NixResult, NixError}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct StorePath(PathBuf); +/// A store derivation (.drv) that will result in a T when built. +#[derive(Debug)] +pub struct StoreDerivation>>{ + path: StorePath, + _target: PhantomData, +} + +/// Results of a build/realization. +pub struct BuildResult>> { + results: Vec, + _derivation: PhantomData, +} + impl StorePath { /// Returns the raw store path. pub fn as_path(&self) -> &Path { @@ -41,7 +54,7 @@ impl StorePath { } /// Converts the store path into a store derivation. - pub fn into_derivation>>(self) -> NixResult> { + pub fn into_derivation>>(self) -> NixResult> { if self.is_derivation() { Ok(StoreDerivation::::from_store_path_unchecked(self)) } else { @@ -76,14 +89,21 @@ impl From for PathBuf { } } -/// A store derivation (.drv) that will result in a T when built. -#[derive(Debug, Clone)] -pub struct StoreDerivation>>{ - path: StorePath, - _target: PhantomData, +impl>> Clone for StoreDerivation { + fn clone(&self) -> Self { + Self { + path: self.path.clone(), + _target: PhantomData, + } + } } -impl>> StoreDerivation { +impl>> StoreDerivation { + /// Returns the store path. + pub fn as_store_path(&self) -> &StorePath { + &self.path + } + fn from_store_path_unchecked(path: StorePath) -> Self { Self { path, @@ -92,16 +112,38 @@ impl>> StoreDerivation { } } -impl, Error=NixError>> StoreDerivation { +impl, Error=NixError>> StoreDerivation { /// Builds the store derivation on a host, resulting in a T. - pub async fn realize(&self, host: &mut dyn Host) -> NixResult { + pub async fn realize(&self, host: &mut Box) -> NixResult { let paths: Vec = host.realize(&self.path).await?; - paths.try_into() + + let result = BuildResult { + results: paths, + _derivation: PhantomData, + }; + result.try_into() + } + + /// Builds the store derivation on a host without copying the results back. + pub async fn realize_remote(&self, host: &mut Box) -> NixResult { + let paths: Vec = host.realize_remote(&self.path).await?; + + let result = BuildResult { + results: paths, + _derivation: PhantomData, + }; + result.try_into() } } -impl>> fmt::Display for StoreDerivation { +impl>> fmt::Display for StoreDerivation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:?}", self.path) } } + +impl, Error=NixError>> BuildResult { + pub fn paths(&self) -> &[StorePath] { + self.results.as_slice() + } +}