forked from DGNum/colmena
parent
e239cbd260
commit
1be16d552d
10 changed files with 175 additions and 4 deletions
|
@ -1,5 +1,9 @@
|
||||||
# Release Notes
|
# Release Notes
|
||||||
|
|
||||||
|
## Release 0.4.0 (Unreleased)
|
||||||
|
|
||||||
|
- `--reboot` is added to trigger a reboot and wait for the node to come back up.
|
||||||
|
|
||||||
## [Release 0.3.0](https://github.com/zhaofengli/colmena/releases/tag/v0.3.0) (2022/04/27)
|
## [Release 0.3.0](https://github.com/zhaofengli/colmena/releases/tag/v0.3.0) (2022/04/27)
|
||||||
|
|
||||||
- [Remote builds](https://colmena.cli.rs/0.3/features/remote-builds.html) are now supported ([#33](https://github.com/zhaofengli/colmena/issues/33)).
|
- [Remote builds](https://colmena.cli.rs/0.3/features/remote-builds.html) are now supported ([#33](https://github.com/zhaofengli/colmena/issues/33)).
|
||||||
|
|
|
@ -81,6 +81,11 @@ By default, Colmena will upload keys set in `deployment.keys` before deploying t
|
||||||
To upload keys without building or deploying the rest of the configuration, use `colmena upload-keys`.
|
To upload keys without building or deploying the rest of the configuration, use `colmena upload-keys`.
|
||||||
"#)
|
"#)
|
||||||
.takes_value(false))
|
.takes_value(false))
|
||||||
|
.arg(Arg::new("reboot")
|
||||||
|
.long("reboot")
|
||||||
|
.help("Reboot nodes after activation")
|
||||||
|
.long_help("Reboots nodes after activation and wait for them to come back up.")
|
||||||
|
.takes_value(false))
|
||||||
.arg(Arg::new("no-substitutes")
|
.arg(Arg::new("no-substitutes")
|
||||||
.long("no-substitutes")
|
.long("no-substitutes")
|
||||||
.help("Do not use substitutes")
|
.help("Do not use substitutes")
|
||||||
|
@ -164,6 +169,7 @@ pub async fn run(_global_args: &ArgMatches, local_args: &ArgMatches) -> Result<(
|
||||||
options.set_substituters_push(!local_args.is_present("no-substitutes"));
|
options.set_substituters_push(!local_args.is_present("no-substitutes"));
|
||||||
options.set_gzip(!local_args.is_present("no-gzip"));
|
options.set_gzip(!local_args.is_present("no-gzip"));
|
||||||
options.set_upload_keys(!local_args.is_present("no-keys"));
|
options.set_upload_keys(!local_args.is_present("no-keys"));
|
||||||
|
options.set_reboot(local_args.is_present("reboot"));
|
||||||
options.set_force_replace_unknown_profiles(local_args.is_present("force-replace-unknown-profiles"));
|
options.set_force_replace_unknown_profiles(local_args.is_present("force-replace-unknown-profiles"));
|
||||||
options.set_evaluator(local_args.value_of_t("evaluator").unwrap());
|
options.set_evaluator(local_args.value_of_t("evaluator").unwrap());
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,9 @@ pub enum ColmenaError {
|
||||||
#[snafu(display("Unknown active profile: {:?}", profile))]
|
#[snafu(display("Unknown active profile: {:?}", profile))]
|
||||||
ActiveProfileUnknown { profile: Profile },
|
ActiveProfileUnknown { profile: Profile },
|
||||||
|
|
||||||
|
#[snafu(display("Unexpected active profile: {:?}", profile))]
|
||||||
|
ActiveProfileUnexpected { profile: Profile },
|
||||||
|
|
||||||
#[snafu(display("Could not determine current profile"))]
|
#[snafu(display("Could not determine current profile"))]
|
||||||
FailedToGetCurrentProfile,
|
FailedToGetCurrentProfile,
|
||||||
|
|
||||||
|
|
|
@ -98,6 +98,9 @@ pub enum JobType {
|
||||||
|
|
||||||
/// Creating GC roots.
|
/// Creating GC roots.
|
||||||
CreateGcRoots,
|
CreateGcRoots,
|
||||||
|
|
||||||
|
/// Rebooting a host.
|
||||||
|
Reboot,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A handle to a job.
|
/// A handle to a job.
|
||||||
|
@ -710,6 +713,10 @@ impl JobMetadata {
|
||||||
(JobType::Activate, JobState::Running) => "Activating system profile".to_string(),
|
(JobType::Activate, JobState::Running) => "Activating system profile".to_string(),
|
||||||
(JobType::Activate, JobState::Failed) => format!("Activation failed: {}", message),
|
(JobType::Activate, JobState::Failed) => format!("Activation failed: {}", message),
|
||||||
|
|
||||||
|
(JobType::Reboot, JobState::Running) => "Rebooting".to_string(),
|
||||||
|
(JobType::Reboot, JobState::Succeeded) => "Rebooted".to_string(),
|
||||||
|
(JobType::Reboot, JobState::Failed) => format!("Reboot failed: {}", message),
|
||||||
|
|
||||||
(_, JobState::Failed) => format!("Failed: {}", message),
|
(_, JobState::Failed) => format!("Failed: {}", message),
|
||||||
(_, JobState::Succeeded) => "Succeeded".to_string(),
|
(_, JobState::Succeeded) => "Succeeded".to_string(),
|
||||||
_ => "".to_string(),
|
_ => "".to_string(),
|
||||||
|
@ -727,6 +734,7 @@ impl JobMetadata {
|
||||||
JobType::Push => format!("Failed to push system closure to {}", node_list),
|
JobType::Push => format!("Failed to push system closure to {}", node_list),
|
||||||
JobType::UploadKeys => format!("Failed to upload keys to {}", node_list),
|
JobType::UploadKeys => format!("Failed to upload keys to {}", node_list),
|
||||||
JobType::Activate => format!("Failed to deploy to {}", node_list),
|
JobType::Activate => format!("Failed to deploy to {}", node_list),
|
||||||
|
JobType::Reboot => format!("Failed to reboot {}", node_list),
|
||||||
JobType::Meta => "Failed to complete requested operation".to_string(),
|
JobType::Meta => "Failed to complete requested operation".to_string(),
|
||||||
_ => format!("Failed to complete job on {}", node_list),
|
_ => format!("Failed to complete job on {}", node_list),
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,6 +75,11 @@ impl Goal {
|
||||||
!matches!(self, Build | UploadKeys | Push)
|
!matches!(self, Build | UploadKeys | Push)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn persists_after_reboot(&self) -> bool {
|
||||||
|
use Goal::*;
|
||||||
|
matches!(self, Switch | Boot)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn requires_target_host(&self) -> bool {
|
pub fn requires_target_host(&self) -> bool {
|
||||||
use Goal::*;
|
use Goal::*;
|
||||||
!matches!(self, Build)
|
!matches!(self, Build)
|
||||||
|
|
|
@ -34,6 +34,7 @@ use super::{
|
||||||
ProfileDerivation,
|
ProfileDerivation,
|
||||||
CopyDirection,
|
CopyDirection,
|
||||||
CopyOptions,
|
CopyOptions,
|
||||||
|
RebootOptions,
|
||||||
key::{Key, UploadAt as UploadKeyAt},
|
key::{Key, UploadAt as UploadKeyAt},
|
||||||
evaluator::{
|
evaluator::{
|
||||||
DrvSetEvaluator,
|
DrvSetEvaluator,
|
||||||
|
@ -585,7 +586,8 @@ impl Deployment {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Create GC root
|
// Create GC root
|
||||||
if self.options.create_gc_roots {
|
let profile_r = profile.clone();
|
||||||
|
let mut target = if self.options.create_gc_roots {
|
||||||
let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?;
|
let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?;
|
||||||
let arc_self = self.clone();
|
let arc_self = self.clone();
|
||||||
job.run_waiting(|job| async move {
|
job.run_waiting(|job| async move {
|
||||||
|
@ -597,10 +599,36 @@ impl Deployment {
|
||||||
job.state(JobState::Running)?;
|
job.state(JobState::Running)?;
|
||||||
let path = dir.join(".gcroots").join(format!("node-{}", &*target_name));
|
let path = dir.join(".gcroots").join(format!("node-{}", &*target_name));
|
||||||
|
|
||||||
profile.create_gc_root(&path).await?;
|
profile_r.create_gc_root(&path).await?;
|
||||||
} else {
|
} else {
|
||||||
job.noop("No context directory to create GC roots in".to_string())?;
|
job.noop("No context directory to create GC roots in".to_string())?;
|
||||||
}
|
}
|
||||||
|
Ok(target)
|
||||||
|
}).await?
|
||||||
|
} else {
|
||||||
|
target
|
||||||
|
};
|
||||||
|
|
||||||
|
// Reboot
|
||||||
|
if self.options.reboot {
|
||||||
|
let job = parent.create_job(JobType::Reboot, nodes.clone())?;
|
||||||
|
let arc_self = self.clone();
|
||||||
|
job.run(|job| async move {
|
||||||
|
let host = target.host.as_mut().unwrap();
|
||||||
|
host.set_job(Some(job.clone()));
|
||||||
|
|
||||||
|
let new_profile = if arc_self.goal.persists_after_reboot() {
|
||||||
|
Some(profile)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let options = RebootOptions::default()
|
||||||
|
.wait_for_boot(true)
|
||||||
|
.new_profile(new_profile);
|
||||||
|
|
||||||
|
host.reboot(options).await?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}).await?;
|
}).await?;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,9 @@ pub struct Options {
|
||||||
/// Whether to upload keys when deploying.
|
/// Whether to upload keys when deploying.
|
||||||
pub(super) upload_keys: bool,
|
pub(super) upload_keys: bool,
|
||||||
|
|
||||||
|
/// Whether to reboot the hosts after activation.
|
||||||
|
pub(super) reboot: bool,
|
||||||
|
|
||||||
/// Whether to create GC roots for node profiles.
|
/// Whether to create GC roots for node profiles.
|
||||||
///
|
///
|
||||||
/// If true, .gc_roots will be created under the hive's context
|
/// If true, .gc_roots will be created under the hive's context
|
||||||
|
@ -53,6 +56,10 @@ impl Options {
|
||||||
self.upload_keys = enable;
|
self.upload_keys = enable;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_reboot(&mut self, enable: bool) {
|
||||||
|
self.reboot = enable;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn set_create_gc_roots(&mut self, enable: bool) {
|
pub fn set_create_gc_roots(&mut self, enable: bool) {
|
||||||
self.create_gc_roots = enable;
|
self.create_gc_roots = enable;
|
||||||
}
|
}
|
||||||
|
@ -84,6 +91,7 @@ impl Default for Options {
|
||||||
substituters_push: true,
|
substituters_push: true,
|
||||||
gzip: true,
|
gzip: true,
|
||||||
upload_keys: true,
|
upload_keys: true,
|
||||||
|
reboot: false,
|
||||||
create_gc_roots: false,
|
create_gc_roots: false,
|
||||||
force_build_on_target: None,
|
force_build_on_target: None,
|
||||||
force_replace_unknown_profiles: false,
|
force_replace_unknown_profiles: false,
|
||||||
|
|
|
@ -31,6 +31,15 @@ pub struct CopyOptions {
|
||||||
gzip: bool,
|
gzip: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct RebootOptions {
|
||||||
|
/// Whether to wait for host to boot back up.
|
||||||
|
wait_for_boot: bool,
|
||||||
|
|
||||||
|
/// New system profile to expect upon reboot.
|
||||||
|
new_profile: Option<Profile>,
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for CopyOptions {
|
impl Default for CopyOptions {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
@ -58,6 +67,27 @@ impl CopyOptions {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for RebootOptions {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
wait_for_boot: true,
|
||||||
|
new_profile: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RebootOptions {
|
||||||
|
pub fn wait_for_boot(mut self, val: bool) -> Self {
|
||||||
|
self.wait_for_boot = val;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_profile(mut self, profile: Option<Profile>) -> Self {
|
||||||
|
self.new_profile = profile;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A Nix(OS) host.
|
/// A Nix(OS) host.
|
||||||
///
|
///
|
||||||
/// The underlying implementation must be Send and Sync.
|
/// The underlying implementation must be Send and Sync.
|
||||||
|
@ -134,4 +164,10 @@ pub trait Host: Send + Sync + std::fmt::Debug {
|
||||||
async fn run_command(&mut self, command: &[&str]) -> ColmenaResult<()> {
|
async fn run_command(&mut self, command: &[&str]) -> ColmenaResult<()> {
|
||||||
Err(ColmenaError::Unsupported)
|
Err(ColmenaError::Unsupported)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reboots the host.
|
||||||
|
#[allow(unused_variables)]
|
||||||
|
async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> {
|
||||||
|
Err(ColmenaError::Unsupported)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,15 +2,17 @@ use std::collections::HashMap;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::process::Stdio;
|
use std::process::Stdio;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use tokio::process::Command;
|
use tokio::process::Command;
|
||||||
|
use tokio::time::sleep;
|
||||||
|
|
||||||
use crate::error::{ColmenaResult, ColmenaError};
|
use crate::error::{ColmenaResult, ColmenaError};
|
||||||
use crate::nix::{StorePath, Profile, Goal, Key, SYSTEM_PROFILE, CURRENT_PROFILE};
|
use crate::nix::{StorePath, Profile, Goal, Key, SYSTEM_PROFILE, CURRENT_PROFILE};
|
||||||
use crate::util::{CommandExecution, CommandExt};
|
use crate::util::{CommandExecution, CommandExt};
|
||||||
use crate::job::JobHandle;
|
use crate::job::JobHandle;
|
||||||
use super::{CopyDirection, CopyOptions, Host, key_uploader};
|
use super::{CopyDirection, CopyOptions, RebootOptions, Host, key_uploader};
|
||||||
|
|
||||||
/// A remote machine connected over SSH.
|
/// A remote machine connected over SSH.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -33,6 +35,10 @@ pub struct Ssh {
|
||||||
job: Option<JobHandle>,
|
job: Option<JobHandle>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An opaque boot ID.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
struct BootId(String);
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Host for Ssh {
|
impl Host for Ssh {
|
||||||
async fn copy_closure(&mut self, closure: &StorePath, direction: CopyDirection, options: CopyOptions) -> ColmenaResult<()> {
|
async fn copy_closure(&mut self, closure: &StorePath, direction: CopyDirection, options: CopyOptions) -> ColmenaResult<()> {
|
||||||
|
@ -114,6 +120,49 @@ impl Host for Ssh {
|
||||||
let command = self.ssh(command);
|
let command = self.ssh(command);
|
||||||
self.run_command(command).await
|
self.run_command(command).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn reboot(&mut self, options: RebootOptions) -> ColmenaResult<()> {
|
||||||
|
if !options.wait_for_boot {
|
||||||
|
return self.initate_reboot().await;
|
||||||
|
}
|
||||||
|
|
||||||
|
let old_id = self.get_boot_id().await?;
|
||||||
|
|
||||||
|
self.initate_reboot().await?;
|
||||||
|
|
||||||
|
if let Some(job) = &self.job {
|
||||||
|
job.message("Waiting for reboot".to_string())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for node to come back up
|
||||||
|
loop {
|
||||||
|
match self.get_boot_id().await {
|
||||||
|
Ok(new_id) => {
|
||||||
|
if new_id != old_id {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Ignore errors while waiting
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure node has correct system profile
|
||||||
|
if let Some(new_profile) = options.new_profile {
|
||||||
|
let profile = self.get_current_system_profile().await?;
|
||||||
|
|
||||||
|
if new_profile != profile {
|
||||||
|
return Err(ColmenaError::ActiveProfileUnexpected {
|
||||||
|
profile,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ssh {
|
impl Ssh {
|
||||||
|
@ -248,4 +297,28 @@ impl Ssh {
|
||||||
let uploader = command.spawn()?;
|
let uploader = command.spawn()?;
|
||||||
key_uploader::feed_uploader(uploader, key, self.job.clone()).await
|
key_uploader::feed_uploader(uploader, key, self.job.clone()).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the current Boot ID.
|
||||||
|
async fn get_boot_id(&mut self) -> ColmenaResult<BootId> {
|
||||||
|
let boot_id = self.ssh(&["cat", "/proc/sys/kernel/random/boot_id"])
|
||||||
|
.capture_output()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(BootId(boot_id))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initiates reboot.
|
||||||
|
async fn initate_reboot(&mut self) -> ColmenaResult<()> {
|
||||||
|
match self.run_command(self.ssh(&["reboot"])).await {
|
||||||
|
Ok(()) => Ok(()),
|
||||||
|
Err(e) => {
|
||||||
|
if let ColmenaError::ChildFailure { exit_code: 255 } = e {
|
||||||
|
// Assume it's "Connection closed by remote host"
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@ use validator::{Validate, ValidationError as ValidationErrorType};
|
||||||
use crate::error::{ColmenaResult, ColmenaError};
|
use crate::error::{ColmenaResult, ColmenaError};
|
||||||
|
|
||||||
pub mod host;
|
pub mod host;
|
||||||
pub use host::{Host, CopyDirection, CopyOptions};
|
pub use host::{Host, CopyDirection, CopyOptions, RebootOptions};
|
||||||
use host::Ssh;
|
use host::Ssh;
|
||||||
|
|
||||||
pub mod hive;
|
pub mod hive;
|
||||||
|
|
Loading…
Reference in a new issue