diff --git a/manual/src/release-notes.md b/manual/src/release-notes.md index d616375..09d64f5 100644 --- a/manual/src/release-notes.md +++ b/manual/src/release-notes.md @@ -1,5 +1,9 @@ # Release Notes +## Release 0.4.0 (Unreleased) + +- `--reboot` is added to trigger a reboot and wait for the node to come back up. + ## [Release 0.3.0](https://github.com/zhaofengli/colmena/releases/tag/v0.3.0) (2022/04/27) - [Remote builds](https://colmena.cli.rs/0.3/features/remote-builds.html) are now supported ([#33](https://github.com/zhaofengli/colmena/issues/33)). diff --git a/src/command/apply.rs b/src/command/apply.rs index b6b1ec4..1b6aa84 100644 --- a/src/command/apply.rs +++ b/src/command/apply.rs @@ -81,6 +81,11 @@ By default, Colmena will upload keys set in `deployment.keys` before deploying t To upload keys without building or deploying the rest of the configuration, use `colmena upload-keys`. "#) .takes_value(false)) + .arg(Arg::new("reboot") + .long("reboot") + .help("Reboot nodes after activation") + .long_help("Reboots nodes after activation and wait for them to come back up.") + .takes_value(false)) .arg(Arg::new("no-substitutes") .long("no-substitutes") .help("Do not use substitutes") @@ -164,6 +169,7 @@ pub async fn run(_global_args: &ArgMatches, local_args: &ArgMatches) -> Result<( options.set_substituters_push(!local_args.is_present("no-substitutes")); options.set_gzip(!local_args.is_present("no-gzip")); options.set_upload_keys(!local_args.is_present("no-keys")); + options.set_reboot(local_args.is_present("reboot")); options.set_force_replace_unknown_profiles(local_args.is_present("force-replace-unknown-profiles")); options.set_evaluator(local_args.value_of_t("evaluator").unwrap()); diff --git a/src/error.rs b/src/error.rs index 4f78543..aa50e0d 100644 --- a/src/error.rs +++ b/src/error.rs @@ -46,6 +46,9 @@ pub enum ColmenaError { #[snafu(display("Unknown active profile: {:?}", profile))] ActiveProfileUnknown { profile: Profile }, + #[snafu(display("Unexpected active profile: {:?}", profile))] + ActiveProfileUnexpected { profile: Profile }, + #[snafu(display("Could not determine current profile"))] FailedToGetCurrentProfile, diff --git a/src/job.rs b/src/job.rs index 7ec429e..0624daf 100644 --- a/src/job.rs +++ b/src/job.rs @@ -98,6 +98,9 @@ pub enum JobType { /// Creating GC roots. CreateGcRoots, + + /// Rebooting a host. + Reboot, } /// A handle to a job. @@ -710,6 +713,10 @@ impl JobMetadata { (JobType::Activate, JobState::Running) => "Activating system profile".to_string(), (JobType::Activate, JobState::Failed) => format!("Activation failed: {}", message), + (JobType::Reboot, JobState::Running) => "Rebooting".to_string(), + (JobType::Reboot, JobState::Succeeded) => "Rebooted".to_string(), + (JobType::Reboot, JobState::Failed) => format!("Reboot failed: {}", message), + (_, JobState::Failed) => format!("Failed: {}", message), (_, JobState::Succeeded) => "Succeeded".to_string(), _ => "".to_string(), @@ -727,6 +734,7 @@ impl JobMetadata { JobType::Push => format!("Failed to push system closure to {}", node_list), JobType::UploadKeys => format!("Failed to upload keys to {}", node_list), JobType::Activate => format!("Failed to deploy to {}", node_list), + JobType::Reboot => format!("Failed to reboot {}", node_list), JobType::Meta => "Failed to complete requested operation".to_string(), _ => format!("Failed to complete job on {}", node_list), } diff --git a/src/nix/deployment/goal.rs b/src/nix/deployment/goal.rs index aaab43c..6f7cdd5 100644 --- a/src/nix/deployment/goal.rs +++ b/src/nix/deployment/goal.rs @@ -75,6 +75,11 @@ impl Goal { !matches!(self, Build | UploadKeys | Push) } + pub fn persists_after_reboot(&self) -> bool { + use Goal::*; + matches!(self, Switch | Boot) + } + pub fn requires_target_host(&self) -> bool { use Goal::*; !matches!(self, Build) diff --git a/src/nix/deployment/mod.rs b/src/nix/deployment/mod.rs index da7d815..aba6ef3 100644 --- a/src/nix/deployment/mod.rs +++ b/src/nix/deployment/mod.rs @@ -34,6 +34,7 @@ use super::{ ProfileDerivation, CopyDirection, CopyOptions, + RebootOptions, key::{Key, UploadAt as UploadKeyAt}, evaluator::{ DrvSetEvaluator, @@ -585,7 +586,8 @@ impl Deployment { }; // Create GC root - if self.options.create_gc_roots { + let profile_r = profile.clone(); + let mut target = if self.options.create_gc_roots { let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?; let arc_self = self.clone(); job.run_waiting(|job| async move { @@ -597,10 +599,36 @@ impl Deployment { job.state(JobState::Running)?; let path = dir.join(".gcroots").join(format!("node-{}", &*target_name)); - profile.create_gc_root(&path).await?; + profile_r.create_gc_root(&path).await?; } else { job.noop("No context directory to create GC roots in".to_string())?; } + Ok(target) + }).await? + } else { + target + }; + + // Reboot + if self.options.reboot { + let job = parent.create_job(JobType::Reboot, nodes.clone())?; + let arc_self = self.clone(); + job.run(|job| async move { + let host = target.host.as_mut().unwrap(); + host.set_job(Some(job.clone())); + + let new_profile = if arc_self.goal.persists_after_reboot() { + Some(profile) + } else { + None + }; + + let options = RebootOptions::default() + .wait_for_boot(true) + .new_profile(new_profile); + + host.reboot(options).await?; + Ok(()) }).await?; } diff --git a/src/nix/deployment/options.rs b/src/nix/deployment/options.rs index 8de5582..7301add 100644 --- a/src/nix/deployment/options.rs +++ b/src/nix/deployment/options.rs @@ -17,6 +17,9 @@ pub struct Options { /// Whether to upload keys when deploying. pub(super) upload_keys: bool, + /// Whether to reboot the hosts after activation. + pub(super) reboot: bool, + /// Whether to create GC roots for node profiles. /// /// If true, .gc_roots will be created under the hive's context @@ -53,6 +56,10 @@ impl Options { self.upload_keys = enable; } + pub fn set_reboot(&mut self, enable: bool) { + self.reboot = enable; + } + pub fn set_create_gc_roots(&mut self, enable: bool) { self.create_gc_roots = enable; } @@ -84,6 +91,7 @@ impl Default for Options { substituters_push: true, gzip: true, upload_keys: true, + reboot: false, create_gc_roots: false, force_build_on_target: None, force_replace_unknown_profiles: false, diff --git a/src/nix/host/mod.rs b/src/nix/host/mod.rs index b721ced..c807d80 100644 --- a/src/nix/host/mod.rs +++ b/src/nix/host/mod.rs @@ -31,6 +31,15 @@ pub struct CopyOptions { gzip: bool, } +#[derive(Clone, Debug)] +pub struct RebootOptions { + /// Whether to wait for host to boot back up. + wait_for_boot: bool, + + /// New system profile to expect upon reboot. + new_profile: Option, +} + impl Default for CopyOptions { fn default() -> Self { Self { @@ -58,6 +67,27 @@ impl CopyOptions { } } +impl Default for RebootOptions { + fn default() -> Self { + Self { + wait_for_boot: true, + new_profile: None, + } + } +} + +impl RebootOptions { + pub fn wait_for_boot(mut self, val: bool) -> Self { + self.wait_for_boot = val; + self + } + + pub fn new_profile(mut self, profile: Option) -> Self { + self.new_profile = profile; + self + } +} + /// A Nix(OS) host. /// /// The underlying implementation must be Send and Sync. @@ -134,4 +164,10 @@ pub trait Host: Send + Sync + std::fmt::Debug { async fn run_command(&mut self, command: &[&str]) -> ColmenaResult<()> { Err(ColmenaError::Unsupported) } + + /// Reboots the host. + #[allow(unused_variables)] + async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> { + Err(ColmenaError::Unsupported) + } } diff --git a/src/nix/host/ssh.rs b/src/nix/host/ssh.rs index 1ce2871..7648e9e 100644 --- a/src/nix/host/ssh.rs +++ b/src/nix/host/ssh.rs @@ -2,15 +2,17 @@ use std::collections::HashMap; use std::convert::TryInto; use std::path::PathBuf; use std::process::Stdio; +use std::time::Duration; use async_trait::async_trait; use tokio::process::Command; +use tokio::time::sleep; use crate::error::{ColmenaResult, ColmenaError}; use crate::nix::{StorePath, Profile, Goal, Key, SYSTEM_PROFILE, CURRENT_PROFILE}; use crate::util::{CommandExecution, CommandExt}; use crate::job::JobHandle; -use super::{CopyDirection, CopyOptions, Host, key_uploader}; +use super::{CopyDirection, CopyOptions, RebootOptions, Host, key_uploader}; /// A remote machine connected over SSH. #[derive(Debug)] @@ -33,6 +35,10 @@ pub struct Ssh { job: Option, } +/// An opaque boot ID. +#[derive(Debug, Clone, PartialEq, Eq)] +struct BootId(String); + #[async_trait] impl Host for Ssh { async fn copy_closure(&mut self, closure: &StorePath, direction: CopyDirection, options: CopyOptions) -> ColmenaResult<()> { @@ -114,6 +120,49 @@ impl Host for Ssh { let command = self.ssh(command); self.run_command(command).await } + + async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> { + if !options.wait_for_boot { + return self.initate_reboot().await; + } + + let old_id = self.get_boot_id().await?; + + self.initate_reboot().await?; + + if let Some(job) = &self.job { + job.message("Waiting for reboot".to_string())?; + } + + // Wait for node to come back up + loop { + match self.get_boot_id().await { + Ok(new_id) => { + if new_id != old_id { + break; + } + } + Err(_) => { + // Ignore errors while waiting + } + } + + sleep(Duration::from_secs(2)).await; + } + + // Ensure node has correct system profile + if let Some(new_profile) = options.new_profile { + let profile = self.get_current_system_profile().await?; + + if new_profile != profile { + return Err(ColmenaError::ActiveProfileUnexpected { + profile, + }); + } + } + + Ok(()) + } } impl Ssh { @@ -248,4 +297,28 @@ impl Ssh { let uploader = command.spawn()?; key_uploader::feed_uploader(uploader, key, self.job.clone()).await } + + /// Returns the current Boot ID. + async fn get_boot_id(&mut self) -> ColmenaResult { + let boot_id = self.ssh(&["cat", "/proc/sys/kernel/random/boot_id"]) + .capture_output() + .await?; + + Ok(BootId(boot_id)) + } + + /// Initiates reboot. + async fn initate_reboot(&mut self) -> ColmenaResult<()> { + match self.run_command(self.ssh(&["reboot"])).await { + Ok(()) => Ok(()), + Err(e) => { + if let ColmenaError::ChildFailure { exit_code: 255 } = e { + // Assume it's "Connection closed by remote host" + Ok(()) + } else { + Err(e) + } + } + } + } } diff --git a/src/nix/mod.rs b/src/nix/mod.rs index c58dfdd..115811b 100644 --- a/src/nix/mod.rs +++ b/src/nix/mod.rs @@ -11,7 +11,7 @@ use validator::{Validate, ValidationError as ValidationErrorType}; use crate::error::{ColmenaResult, ColmenaError}; pub mod host; -pub use host::{Host, CopyDirection, CopyOptions}; +pub use host::{Host, CopyDirection, CopyOptions, RebootOptions}; use host::Ssh; pub mod hive;