forked from DGNum/colmena
parent
e239cbd260
commit
1be16d552d
10 changed files with 175 additions and 4 deletions
|
@ -1,5 +1,9 @@
|
|||
# Release Notes
|
||||
|
||||
## Release 0.4.0 (Unreleased)
|
||||
|
||||
- `--reboot` is added to trigger a reboot and wait for the node to come back up.
|
||||
|
||||
## [Release 0.3.0](https://github.com/zhaofengli/colmena/releases/tag/v0.3.0) (2022/04/27)
|
||||
|
||||
- [Remote builds](https://colmena.cli.rs/0.3/features/remote-builds.html) are now supported ([#33](https://github.com/zhaofengli/colmena/issues/33)).
|
||||
|
|
|
@ -81,6 +81,11 @@ By default, Colmena will upload keys set in `deployment.keys` before deploying t
|
|||
To upload keys without building or deploying the rest of the configuration, use `colmena upload-keys`.
|
||||
"#)
|
||||
.takes_value(false))
|
||||
.arg(Arg::new("reboot")
|
||||
.long("reboot")
|
||||
.help("Reboot nodes after activation")
|
||||
.long_help("Reboots nodes after activation and wait for them to come back up.")
|
||||
.takes_value(false))
|
||||
.arg(Arg::new("no-substitutes")
|
||||
.long("no-substitutes")
|
||||
.help("Do not use substitutes")
|
||||
|
@ -164,6 +169,7 @@ pub async fn run(_global_args: &ArgMatches, local_args: &ArgMatches) -> Result<(
|
|||
options.set_substituters_push(!local_args.is_present("no-substitutes"));
|
||||
options.set_gzip(!local_args.is_present("no-gzip"));
|
||||
options.set_upload_keys(!local_args.is_present("no-keys"));
|
||||
options.set_reboot(local_args.is_present("reboot"));
|
||||
options.set_force_replace_unknown_profiles(local_args.is_present("force-replace-unknown-profiles"));
|
||||
options.set_evaluator(local_args.value_of_t("evaluator").unwrap());
|
||||
|
||||
|
|
|
@ -46,6 +46,9 @@ pub enum ColmenaError {
|
|||
#[snafu(display("Unknown active profile: {:?}", profile))]
|
||||
ActiveProfileUnknown { profile: Profile },
|
||||
|
||||
#[snafu(display("Unexpected active profile: {:?}", profile))]
|
||||
ActiveProfileUnexpected { profile: Profile },
|
||||
|
||||
#[snafu(display("Could not determine current profile"))]
|
||||
FailedToGetCurrentProfile,
|
||||
|
||||
|
|
|
@ -98,6 +98,9 @@ pub enum JobType {
|
|||
|
||||
/// Creating GC roots.
|
||||
CreateGcRoots,
|
||||
|
||||
/// Rebooting a host.
|
||||
Reboot,
|
||||
}
|
||||
|
||||
/// A handle to a job.
|
||||
|
@ -710,6 +713,10 @@ impl JobMetadata {
|
|||
(JobType::Activate, JobState::Running) => "Activating system profile".to_string(),
|
||||
(JobType::Activate, JobState::Failed) => format!("Activation failed: {}", message),
|
||||
|
||||
(JobType::Reboot, JobState::Running) => "Rebooting".to_string(),
|
||||
(JobType::Reboot, JobState::Succeeded) => "Rebooted".to_string(),
|
||||
(JobType::Reboot, JobState::Failed) => format!("Reboot failed: {}", message),
|
||||
|
||||
(_, JobState::Failed) => format!("Failed: {}", message),
|
||||
(_, JobState::Succeeded) => "Succeeded".to_string(),
|
||||
_ => "".to_string(),
|
||||
|
@ -727,6 +734,7 @@ impl JobMetadata {
|
|||
JobType::Push => format!("Failed to push system closure to {}", node_list),
|
||||
JobType::UploadKeys => format!("Failed to upload keys to {}", node_list),
|
||||
JobType::Activate => format!("Failed to deploy to {}", node_list),
|
||||
JobType::Reboot => format!("Failed to reboot {}", node_list),
|
||||
JobType::Meta => "Failed to complete requested operation".to_string(),
|
||||
_ => format!("Failed to complete job on {}", node_list),
|
||||
}
|
||||
|
|
|
@ -75,6 +75,11 @@ impl Goal {
|
|||
!matches!(self, Build | UploadKeys | Push)
|
||||
}
|
||||
|
||||
pub fn persists_after_reboot(&self) -> bool {
|
||||
use Goal::*;
|
||||
matches!(self, Switch | Boot)
|
||||
}
|
||||
|
||||
pub fn requires_target_host(&self) -> bool {
|
||||
use Goal::*;
|
||||
!matches!(self, Build)
|
||||
|
|
|
@ -34,6 +34,7 @@ use super::{
|
|||
ProfileDerivation,
|
||||
CopyDirection,
|
||||
CopyOptions,
|
||||
RebootOptions,
|
||||
key::{Key, UploadAt as UploadKeyAt},
|
||||
evaluator::{
|
||||
DrvSetEvaluator,
|
||||
|
@ -585,7 +586,8 @@ impl Deployment {
|
|||
};
|
||||
|
||||
// Create GC root
|
||||
if self.options.create_gc_roots {
|
||||
let profile_r = profile.clone();
|
||||
let mut target = if self.options.create_gc_roots {
|
||||
let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?;
|
||||
let arc_self = self.clone();
|
||||
job.run_waiting(|job| async move {
|
||||
|
@ -597,10 +599,36 @@ impl Deployment {
|
|||
job.state(JobState::Running)?;
|
||||
let path = dir.join(".gcroots").join(format!("node-{}", &*target_name));
|
||||
|
||||
profile.create_gc_root(&path).await?;
|
||||
profile_r.create_gc_root(&path).await?;
|
||||
} else {
|
||||
job.noop("No context directory to create GC roots in".to_string())?;
|
||||
}
|
||||
Ok(target)
|
||||
}).await?
|
||||
} else {
|
||||
target
|
||||
};
|
||||
|
||||
// Reboot
|
||||
if self.options.reboot {
|
||||
let job = parent.create_job(JobType::Reboot, nodes.clone())?;
|
||||
let arc_self = self.clone();
|
||||
job.run(|job| async move {
|
||||
let host = target.host.as_mut().unwrap();
|
||||
host.set_job(Some(job.clone()));
|
||||
|
||||
let new_profile = if arc_self.goal.persists_after_reboot() {
|
||||
Some(profile)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let options = RebootOptions::default()
|
||||
.wait_for_boot(true)
|
||||
.new_profile(new_profile);
|
||||
|
||||
host.reboot(options).await?;
|
||||
|
||||
Ok(())
|
||||
}).await?;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,9 @@ pub struct Options {
|
|||
/// Whether to upload keys when deploying.
|
||||
pub(super) upload_keys: bool,
|
||||
|
||||
/// Whether to reboot the hosts after activation.
|
||||
pub(super) reboot: bool,
|
||||
|
||||
/// Whether to create GC roots for node profiles.
|
||||
///
|
||||
/// If true, .gc_roots will be created under the hive's context
|
||||
|
@ -53,6 +56,10 @@ impl Options {
|
|||
self.upload_keys = enable;
|
||||
}
|
||||
|
||||
pub fn set_reboot(&mut self, enable: bool) {
|
||||
self.reboot = enable;
|
||||
}
|
||||
|
||||
pub fn set_create_gc_roots(&mut self, enable: bool) {
|
||||
self.create_gc_roots = enable;
|
||||
}
|
||||
|
@ -84,6 +91,7 @@ impl Default for Options {
|
|||
substituters_push: true,
|
||||
gzip: true,
|
||||
upload_keys: true,
|
||||
reboot: false,
|
||||
create_gc_roots: false,
|
||||
force_build_on_target: None,
|
||||
force_replace_unknown_profiles: false,
|
||||
|
|
|
@ -31,6 +31,15 @@ pub struct CopyOptions {
|
|||
gzip: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RebootOptions {
|
||||
/// Whether to wait for host to boot back up.
|
||||
wait_for_boot: bool,
|
||||
|
||||
/// New system profile to expect upon reboot.
|
||||
new_profile: Option<Profile>,
|
||||
}
|
||||
|
||||
impl Default for CopyOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
|
@ -58,6 +67,27 @@ impl CopyOptions {
|
|||
}
|
||||
}
|
||||
|
||||
impl Default for RebootOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
wait_for_boot: true,
|
||||
new_profile: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RebootOptions {
|
||||
pub fn wait_for_boot(mut self, val: bool) -> Self {
|
||||
self.wait_for_boot = val;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn new_profile(mut self, profile: Option<Profile>) -> Self {
|
||||
self.new_profile = profile;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A Nix(OS) host.
|
||||
///
|
||||
/// The underlying implementation must be Send and Sync.
|
||||
|
@ -134,4 +164,10 @@ pub trait Host: Send + Sync + std::fmt::Debug {
|
|||
async fn run_command(&mut self, command: &[&str]) -> ColmenaResult<()> {
|
||||
Err(ColmenaError::Unsupported)
|
||||
}
|
||||
|
||||
/// Reboots the host.
|
||||
#[allow(unused_variables)]
|
||||
async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> {
|
||||
Err(ColmenaError::Unsupported)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,15 +2,17 @@ use std::collections::HashMap;
|
|||
use std::convert::TryInto;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use tokio::process::Command;
|
||||
use tokio::time::sleep;
|
||||
|
||||
use crate::error::{ColmenaResult, ColmenaError};
|
||||
use crate::nix::{StorePath, Profile, Goal, Key, SYSTEM_PROFILE, CURRENT_PROFILE};
|
||||
use crate::util::{CommandExecution, CommandExt};
|
||||
use crate::job::JobHandle;
|
||||
use super::{CopyDirection, CopyOptions, Host, key_uploader};
|
||||
use super::{CopyDirection, CopyOptions, RebootOptions, Host, key_uploader};
|
||||
|
||||
/// A remote machine connected over SSH.
|
||||
#[derive(Debug)]
|
||||
|
@ -33,6 +35,10 @@ pub struct Ssh {
|
|||
job: Option<JobHandle>,
|
||||
}
|
||||
|
||||
/// An opaque boot ID.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
struct BootId(String);
|
||||
|
||||
#[async_trait]
|
||||
impl Host for Ssh {
|
||||
async fn copy_closure(&mut self, closure: &StorePath, direction: CopyDirection, options: CopyOptions) -> ColmenaResult<()> {
|
||||
|
@ -114,6 +120,49 @@ impl Host for Ssh {
|
|||
let command = self.ssh(command);
|
||||
self.run_command(command).await
|
||||
}
|
||||
|
||||
async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> {
|
||||
if !options.wait_for_boot {
|
||||
return self.initate_reboot().await;
|
||||
}
|
||||
|
||||
let old_id = self.get_boot_id().await?;
|
||||
|
||||
self.initate_reboot().await?;
|
||||
|
||||
if let Some(job) = &self.job {
|
||||
job.message("Waiting for reboot".to_string())?;
|
||||
}
|
||||
|
||||
// Wait for node to come back up
|
||||
loop {
|
||||
match self.get_boot_id().await {
|
||||
Ok(new_id) => {
|
||||
if new_id != old_id {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// Ignore errors while waiting
|
||||
}
|
||||
}
|
||||
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
|
||||
// Ensure node has correct system profile
|
||||
if let Some(new_profile) = options.new_profile {
|
||||
let profile = self.get_current_system_profile().await?;
|
||||
|
||||
if new_profile != profile {
|
||||
return Err(ColmenaError::ActiveProfileUnexpected {
|
||||
profile,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Ssh {
|
||||
|
@ -248,4 +297,28 @@ impl Ssh {
|
|||
let uploader = command.spawn()?;
|
||||
key_uploader::feed_uploader(uploader, key, self.job.clone()).await
|
||||
}
|
||||
|
||||
/// Returns the current Boot ID.
|
||||
async fn get_boot_id(&mut self) -> ColmenaResult<BootId> {
|
||||
let boot_id = self.ssh(&["cat", "/proc/sys/kernel/random/boot_id"])
|
||||
.capture_output()
|
||||
.await?;
|
||||
|
||||
Ok(BootId(boot_id))
|
||||
}
|
||||
|
||||
/// Initiates reboot.
|
||||
async fn initate_reboot(&mut self) -> ColmenaResult<()> {
|
||||
match self.run_command(self.ssh(&["reboot"])).await {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
if let ColmenaError::ChildFailure { exit_code: 255 } = e {
|
||||
// Assume it's "Connection closed by remote host"
|
||||
Ok(())
|
||||
} else {
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@ use validator::{Validate, ValidationError as ValidationErrorType};
|
|||
use crate::error::{ColmenaResult, ColmenaError};
|
||||
|
||||
pub mod host;
|
||||
pub use host::{Host, CopyDirection, CopyOptions};
|
||||
pub use host::{Host, CopyDirection, CopyOptions, RebootOptions};
|
||||
use host::Ssh;
|
||||
|
||||
pub mod hive;
|
||||
|
|
Loading…
Reference in a new issue