apply: Add --reboot flag

Fixes #46.
This commit is contained in:
Zhaofeng Li 2022-05-22 02:15:33 -07:00
parent e239cbd260
commit 1be16d552d
10 changed files with 175 additions and 4 deletions

View file

@ -1,5 +1,9 @@
# Release Notes # Release Notes
## Release 0.4.0 (Unreleased)
- `--reboot` is added to trigger a reboot and wait for the node to come back up.
## [Release 0.3.0](https://github.com/zhaofengli/colmena/releases/tag/v0.3.0) (2022/04/27) ## [Release 0.3.0](https://github.com/zhaofengli/colmena/releases/tag/v0.3.0) (2022/04/27)
- [Remote builds](https://colmena.cli.rs/0.3/features/remote-builds.html) are now supported ([#33](https://github.com/zhaofengli/colmena/issues/33)). - [Remote builds](https://colmena.cli.rs/0.3/features/remote-builds.html) are now supported ([#33](https://github.com/zhaofengli/colmena/issues/33)).

View file

@ -81,6 +81,11 @@ By default, Colmena will upload keys set in `deployment.keys` before deploying t
To upload keys without building or deploying the rest of the configuration, use `colmena upload-keys`. To upload keys without building or deploying the rest of the configuration, use `colmena upload-keys`.
"#) "#)
.takes_value(false)) .takes_value(false))
.arg(Arg::new("reboot")
.long("reboot")
.help("Reboot nodes after activation")
.long_help("Reboots nodes after activation and wait for them to come back up.")
.takes_value(false))
.arg(Arg::new("no-substitutes") .arg(Arg::new("no-substitutes")
.long("no-substitutes") .long("no-substitutes")
.help("Do not use substitutes") .help("Do not use substitutes")
@ -164,6 +169,7 @@ pub async fn run(_global_args: &ArgMatches, local_args: &ArgMatches) -> Result<(
options.set_substituters_push(!local_args.is_present("no-substitutes")); options.set_substituters_push(!local_args.is_present("no-substitutes"));
options.set_gzip(!local_args.is_present("no-gzip")); options.set_gzip(!local_args.is_present("no-gzip"));
options.set_upload_keys(!local_args.is_present("no-keys")); options.set_upload_keys(!local_args.is_present("no-keys"));
options.set_reboot(local_args.is_present("reboot"));
options.set_force_replace_unknown_profiles(local_args.is_present("force-replace-unknown-profiles")); options.set_force_replace_unknown_profiles(local_args.is_present("force-replace-unknown-profiles"));
options.set_evaluator(local_args.value_of_t("evaluator").unwrap()); options.set_evaluator(local_args.value_of_t("evaluator").unwrap());

View file

@ -46,6 +46,9 @@ pub enum ColmenaError {
#[snafu(display("Unknown active profile: {:?}", profile))] #[snafu(display("Unknown active profile: {:?}", profile))]
ActiveProfileUnknown { profile: Profile }, ActiveProfileUnknown { profile: Profile },
#[snafu(display("Unexpected active profile: {:?}", profile))]
ActiveProfileUnexpected { profile: Profile },
#[snafu(display("Could not determine current profile"))] #[snafu(display("Could not determine current profile"))]
FailedToGetCurrentProfile, FailedToGetCurrentProfile,

View file

@ -98,6 +98,9 @@ pub enum JobType {
/// Creating GC roots. /// Creating GC roots.
CreateGcRoots, CreateGcRoots,
/// Rebooting a host.
Reboot,
} }
/// A handle to a job. /// A handle to a job.
@ -710,6 +713,10 @@ impl JobMetadata {
(JobType::Activate, JobState::Running) => "Activating system profile".to_string(), (JobType::Activate, JobState::Running) => "Activating system profile".to_string(),
(JobType::Activate, JobState::Failed) => format!("Activation failed: {}", message), (JobType::Activate, JobState::Failed) => format!("Activation failed: {}", message),
(JobType::Reboot, JobState::Running) => "Rebooting".to_string(),
(JobType::Reboot, JobState::Succeeded) => "Rebooted".to_string(),
(JobType::Reboot, JobState::Failed) => format!("Reboot failed: {}", message),
(_, JobState::Failed) => format!("Failed: {}", message), (_, JobState::Failed) => format!("Failed: {}", message),
(_, JobState::Succeeded) => "Succeeded".to_string(), (_, JobState::Succeeded) => "Succeeded".to_string(),
_ => "".to_string(), _ => "".to_string(),
@ -727,6 +734,7 @@ impl JobMetadata {
JobType::Push => format!("Failed to push system closure to {}", node_list), JobType::Push => format!("Failed to push system closure to {}", node_list),
JobType::UploadKeys => format!("Failed to upload keys to {}", node_list), JobType::UploadKeys => format!("Failed to upload keys to {}", node_list),
JobType::Activate => format!("Failed to deploy to {}", node_list), JobType::Activate => format!("Failed to deploy to {}", node_list),
JobType::Reboot => format!("Failed to reboot {}", node_list),
JobType::Meta => "Failed to complete requested operation".to_string(), JobType::Meta => "Failed to complete requested operation".to_string(),
_ => format!("Failed to complete job on {}", node_list), _ => format!("Failed to complete job on {}", node_list),
} }

View file

@ -75,6 +75,11 @@ impl Goal {
!matches!(self, Build | UploadKeys | Push) !matches!(self, Build | UploadKeys | Push)
} }
pub fn persists_after_reboot(&self) -> bool {
use Goal::*;
matches!(self, Switch | Boot)
}
pub fn requires_target_host(&self) -> bool { pub fn requires_target_host(&self) -> bool {
use Goal::*; use Goal::*;
!matches!(self, Build) !matches!(self, Build)

View file

@ -34,6 +34,7 @@ use super::{
ProfileDerivation, ProfileDerivation,
CopyDirection, CopyDirection,
CopyOptions, CopyOptions,
RebootOptions,
key::{Key, UploadAt as UploadKeyAt}, key::{Key, UploadAt as UploadKeyAt},
evaluator::{ evaluator::{
DrvSetEvaluator, DrvSetEvaluator,
@ -585,7 +586,8 @@ impl Deployment {
}; };
// Create GC root // Create GC root
if self.options.create_gc_roots { let profile_r = profile.clone();
let mut target = if self.options.create_gc_roots {
let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?; let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?;
let arc_self = self.clone(); let arc_self = self.clone();
job.run_waiting(|job| async move { job.run_waiting(|job| async move {
@ -597,10 +599,36 @@ impl Deployment {
job.state(JobState::Running)?; job.state(JobState::Running)?;
let path = dir.join(".gcroots").join(format!("node-{}", &*target_name)); let path = dir.join(".gcroots").join(format!("node-{}", &*target_name));
profile.create_gc_root(&path).await?; profile_r.create_gc_root(&path).await?;
} else { } else {
job.noop("No context directory to create GC roots in".to_string())?; job.noop("No context directory to create GC roots in".to_string())?;
} }
Ok(target)
}).await?
} else {
target
};
// Reboot
if self.options.reboot {
let job = parent.create_job(JobType::Reboot, nodes.clone())?;
let arc_self = self.clone();
job.run(|job| async move {
let host = target.host.as_mut().unwrap();
host.set_job(Some(job.clone()));
let new_profile = if arc_self.goal.persists_after_reboot() {
Some(profile)
} else {
None
};
let options = RebootOptions::default()
.wait_for_boot(true)
.new_profile(new_profile);
host.reboot(options).await?;
Ok(()) Ok(())
}).await?; }).await?;
} }

View file

@ -17,6 +17,9 @@ pub struct Options {
/// Whether to upload keys when deploying. /// Whether to upload keys when deploying.
pub(super) upload_keys: bool, pub(super) upload_keys: bool,
/// Whether to reboot the hosts after activation.
pub(super) reboot: bool,
/// Whether to create GC roots for node profiles. /// Whether to create GC roots for node profiles.
/// ///
/// If true, .gc_roots will be created under the hive's context /// If true, .gc_roots will be created under the hive's context
@ -53,6 +56,10 @@ impl Options {
self.upload_keys = enable; self.upload_keys = enable;
} }
pub fn set_reboot(&mut self, enable: bool) {
self.reboot = enable;
}
pub fn set_create_gc_roots(&mut self, enable: bool) { pub fn set_create_gc_roots(&mut self, enable: bool) {
self.create_gc_roots = enable; self.create_gc_roots = enable;
} }
@ -84,6 +91,7 @@ impl Default for Options {
substituters_push: true, substituters_push: true,
gzip: true, gzip: true,
upload_keys: true, upload_keys: true,
reboot: false,
create_gc_roots: false, create_gc_roots: false,
force_build_on_target: None, force_build_on_target: None,
force_replace_unknown_profiles: false, force_replace_unknown_profiles: false,

View file

@ -31,6 +31,15 @@ pub struct CopyOptions {
gzip: bool, gzip: bool,
} }
#[derive(Clone, Debug)]
pub struct RebootOptions {
/// Whether to wait for host to boot back up.
wait_for_boot: bool,
/// New system profile to expect upon reboot.
new_profile: Option<Profile>,
}
impl Default for CopyOptions { impl Default for CopyOptions {
fn default() -> Self { fn default() -> Self {
Self { Self {
@ -58,6 +67,27 @@ impl CopyOptions {
} }
} }
impl Default for RebootOptions {
fn default() -> Self {
Self {
wait_for_boot: true,
new_profile: None,
}
}
}
impl RebootOptions {
pub fn wait_for_boot(mut self, val: bool) -> Self {
self.wait_for_boot = val;
self
}
pub fn new_profile(mut self, profile: Option<Profile>) -> Self {
self.new_profile = profile;
self
}
}
/// A Nix(OS) host. /// A Nix(OS) host.
/// ///
/// The underlying implementation must be Send and Sync. /// The underlying implementation must be Send and Sync.
@ -134,4 +164,10 @@ pub trait Host: Send + Sync + std::fmt::Debug {
async fn run_command(&mut self, command: &[&str]) -> ColmenaResult<()> { async fn run_command(&mut self, command: &[&str]) -> ColmenaResult<()> {
Err(ColmenaError::Unsupported) Err(ColmenaError::Unsupported)
} }
/// Reboots the host.
#[allow(unused_variables)]
async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> {
Err(ColmenaError::Unsupported)
}
} }

View file

@ -2,15 +2,17 @@ use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::path::PathBuf; use std::path::PathBuf;
use std::process::Stdio; use std::process::Stdio;
use std::time::Duration;
use async_trait::async_trait; use async_trait::async_trait;
use tokio::process::Command; use tokio::process::Command;
use tokio::time::sleep;
use crate::error::{ColmenaResult, ColmenaError}; use crate::error::{ColmenaResult, ColmenaError};
use crate::nix::{StorePath, Profile, Goal, Key, SYSTEM_PROFILE, CURRENT_PROFILE}; use crate::nix::{StorePath, Profile, Goal, Key, SYSTEM_PROFILE, CURRENT_PROFILE};
use crate::util::{CommandExecution, CommandExt}; use crate::util::{CommandExecution, CommandExt};
use crate::job::JobHandle; use crate::job::JobHandle;
use super::{CopyDirection, CopyOptions, Host, key_uploader}; use super::{CopyDirection, CopyOptions, RebootOptions, Host, key_uploader};
/// A remote machine connected over SSH. /// A remote machine connected over SSH.
#[derive(Debug)] #[derive(Debug)]
@ -33,6 +35,10 @@ pub struct Ssh {
job: Option<JobHandle>, job: Option<JobHandle>,
} }
/// An opaque boot ID.
#[derive(Debug, Clone, PartialEq, Eq)]
struct BootId(String);
#[async_trait] #[async_trait]
impl Host for Ssh { impl Host for Ssh {
async fn copy_closure(&mut self, closure: &StorePath, direction: CopyDirection, options: CopyOptions) -> ColmenaResult<()> { async fn copy_closure(&mut self, closure: &StorePath, direction: CopyDirection, options: CopyOptions) -> ColmenaResult<()> {
@ -114,6 +120,49 @@ impl Host for Ssh {
let command = self.ssh(command); let command = self.ssh(command);
self.run_command(command).await self.run_command(command).await
} }
async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> {
if !options.wait_for_boot {
return self.initate_reboot().await;
}
let old_id = self.get_boot_id().await?;
self.initate_reboot().await?;
if let Some(job) = &self.job {
job.message("Waiting for reboot".to_string())?;
}
// Wait for node to come back up
loop {
match self.get_boot_id().await {
Ok(new_id) => {
if new_id != old_id {
break;
}
}
Err(_) => {
// Ignore errors while waiting
}
}
sleep(Duration::from_secs(2)).await;
}
// Ensure node has correct system profile
if let Some(new_profile) = options.new_profile {
let profile = self.get_current_system_profile().await?;
if new_profile != profile {
return Err(ColmenaError::ActiveProfileUnexpected {
profile,
});
}
}
Ok(())
}
} }
impl Ssh { impl Ssh {
@ -248,4 +297,28 @@ impl Ssh {
let uploader = command.spawn()?; let uploader = command.spawn()?;
key_uploader::feed_uploader(uploader, key, self.job.clone()).await key_uploader::feed_uploader(uploader, key, self.job.clone()).await
} }
/// Returns the current Boot ID.
async fn get_boot_id(&mut self) -> ColmenaResult<BootId> {
let boot_id = self.ssh(&["cat", "/proc/sys/kernel/random/boot_id"])
.capture_output()
.await?;
Ok(BootId(boot_id))
}
/// Initiates reboot.
async fn initate_reboot(&mut self) -> ColmenaResult<()> {
match self.run_command(self.ssh(&["reboot"])).await {
Ok(()) => Ok(()),
Err(e) => {
if let ColmenaError::ChildFailure { exit_code: 255 } = e {
// Assume it's "Connection closed by remote host"
Ok(())
} else {
Err(e)
}
}
}
}
} }

View file

@ -11,7 +11,7 @@ use validator::{Validate, ValidationError as ValidationErrorType};
use crate::error::{ColmenaResult, ColmenaError}; use crate::error::{ColmenaResult, ColmenaError};
pub mod host; pub mod host;
pub use host::{Host, CopyDirection, CopyOptions}; pub use host::{Host, CopyDirection, CopyOptions, RebootOptions};
use host::Ssh; use host::Ssh;
pub mod hive; pub mod hive;