696 lines
23 KiB
Rust
696 lines
23 KiB
Rust
//! Deployment logic.
|
|
|
|
pub mod goal;
|
|
|
|
pub use goal::Goal;
|
|
|
|
pub mod limits;
|
|
pub use limits::{EvaluationNodeLimit, ParallelismLimit};
|
|
|
|
pub mod options;
|
|
pub use options::{Evaluator, Options};
|
|
|
|
use std::collections::HashMap;
|
|
use std::mem;
|
|
use std::sync::Arc;
|
|
|
|
use futures::future::join_all;
|
|
use itertools::Itertools;
|
|
use tokio_stream::StreamExt;
|
|
|
|
use super::NixOptions;
|
|
use crate::job::{JobHandle, JobMonitor, JobState, JobType};
|
|
use crate::progress::Sender as ProgressSender;
|
|
use crate::util;
|
|
|
|
use super::{
|
|
evaluator::{DrvSetEvaluator, EvalError, NixEvalJobs},
|
|
host::Local as LocalHost,
|
|
key::{Key, UploadAt as UploadKeyAt},
|
|
ColmenaError, ColmenaResult, CopyDirection, CopyOptions, Hive, Host, NodeConfig, NodeName,
|
|
Profile, ProfileDerivation, RebootOptions,
|
|
};
|
|
|
|
/// A deployment.
|
|
pub type DeploymentHandle = Arc<Deployment>;
|
|
|
|
/// A map of target nodes.
|
|
pub type TargetNodeMap = HashMap<NodeName, TargetNode>;
|
|
|
|
/// A deployment.
|
|
#[derive(Debug)]
|
|
pub struct Deployment {
|
|
/// The configuration.
|
|
hive: Hive,
|
|
|
|
/// The goal of this deployment.
|
|
goal: Goal,
|
|
|
|
/// Deployment options.
|
|
options: Options,
|
|
|
|
/// Options passed to Nix invocations.
|
|
nix_options: NixOptions,
|
|
|
|
/// Handle to send messages to the ProgressOutput.
|
|
progress: Option<ProgressSender>,
|
|
|
|
/// Handles to the deployment targets.
|
|
targets: HashMap<NodeName, TargetNode>,
|
|
|
|
/// Parallelism limit.
|
|
parallelism_limit: ParallelismLimit,
|
|
|
|
/// Evaluation limit.
|
|
evaluation_node_limit: EvaluationNodeLimit,
|
|
|
|
/// Whether it was executed.
|
|
executed: bool,
|
|
}
|
|
|
|
/// Handle to a target node.
|
|
#[derive(Debug)]
|
|
pub struct TargetNode {
|
|
/// Name of the node.
|
|
name: NodeName,
|
|
|
|
/// The host to deploy to.
|
|
host: Option<Box<dyn Host>>,
|
|
|
|
/// The config.deployment values of the node.
|
|
config: NodeConfig,
|
|
}
|
|
|
|
impl TargetNode {
|
|
pub fn new(name: NodeName, host: Option<Box<dyn Host>>, config: NodeConfig) -> Self {
|
|
Self { name, host, config }
|
|
}
|
|
|
|
pub fn into_host(self) -> Option<Box<dyn Host>> {
|
|
self.host
|
|
}
|
|
}
|
|
|
|
impl Deployment {
|
|
/// Creates a new deployment.
|
|
pub fn new(
|
|
hive: Hive,
|
|
targets: TargetNodeMap,
|
|
goal: Goal,
|
|
progress: Option<ProgressSender>,
|
|
) -> Self {
|
|
Self {
|
|
hive,
|
|
goal,
|
|
options: Options::default(),
|
|
nix_options: NixOptions::default(),
|
|
progress,
|
|
targets,
|
|
parallelism_limit: ParallelismLimit::default(),
|
|
evaluation_node_limit: EvaluationNodeLimit::default(),
|
|
executed: false,
|
|
}
|
|
}
|
|
|
|
/// Executes the deployment.
|
|
///
|
|
/// If a ProgressSender is supplied, then this should be run in parallel
|
|
/// with its `run_until_completion()` future.
|
|
pub async fn execute(mut self) -> ColmenaResult<()> {
|
|
if self.executed {
|
|
return Err(ColmenaError::DeploymentAlreadyExecuted);
|
|
}
|
|
|
|
self.executed = true;
|
|
|
|
let (mut monitor, meta) = JobMonitor::new(self.progress.clone());
|
|
|
|
if let Some(width) = util::get_label_width(&self.targets) {
|
|
monitor.set_label_width(width);
|
|
}
|
|
|
|
let nix_options = self.hive.nix_options_with_builders().await?;
|
|
self.nix_options = nix_options;
|
|
|
|
if self.goal == Goal::UploadKeys {
|
|
// Just upload keys
|
|
let targets = mem::take(&mut self.targets);
|
|
let deployment = DeploymentHandle::new(self);
|
|
let meta_future = meta.run(|meta| async move {
|
|
let mut futures = Vec::new();
|
|
|
|
for target in targets.into_values() {
|
|
futures.push(deployment.upload_keys_to_node(meta.clone(), target));
|
|
}
|
|
|
|
join_all(futures)
|
|
.await
|
|
.into_iter()
|
|
.collect::<ColmenaResult<Vec<()>>>()?;
|
|
|
|
Ok(())
|
|
});
|
|
|
|
let (result, _) = tokio::join!(meta_future, monitor.run_until_completion(),);
|
|
|
|
result?;
|
|
|
|
Ok(())
|
|
} else {
|
|
// Do the whole eval-build-deploy flow
|
|
let targets = mem::take(&mut self.targets);
|
|
let deployment = DeploymentHandle::new(self);
|
|
let meta_future = meta.run(|meta| async move {
|
|
match deployment.options.evaluator {
|
|
Evaluator::Chunked => {
|
|
deployment.execute_chunked(meta.clone(), targets).await?;
|
|
}
|
|
Evaluator::Streaming => {
|
|
log::warn!("Streaming evaluation is an experimental feature");
|
|
deployment.execute_streaming(meta.clone(), targets).await?;
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
});
|
|
|
|
let (result, _) = tokio::join!(meta_future, monitor.run_until_completion(),);
|
|
|
|
result?;
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub fn set_options(&mut self, options: Options) {
|
|
self.options = options;
|
|
}
|
|
|
|
pub fn set_parallelism_limit(&mut self, limit: ParallelismLimit) {
|
|
self.parallelism_limit = limit;
|
|
}
|
|
|
|
pub fn set_evaluation_node_limit(&mut self, limit: EvaluationNodeLimit) {
|
|
self.evaluation_node_limit = limit;
|
|
}
|
|
|
|
/// Executes the deployment on selected nodes, evaluating a chunk at a time.
|
|
async fn execute_chunked(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
mut targets: TargetNodeMap,
|
|
) -> ColmenaResult<()> {
|
|
let eval_limit = self
|
|
.evaluation_node_limit
|
|
.get_limit()
|
|
.unwrap_or(self.targets.len());
|
|
|
|
let mut futures = Vec::new();
|
|
|
|
for chunk in targets.drain().chunks(eval_limit).into_iter() {
|
|
let mut map = HashMap::new();
|
|
for (name, host) in chunk {
|
|
map.insert(name, host);
|
|
}
|
|
|
|
futures.push(self.execute_one_chunk(parent.clone(), map));
|
|
}
|
|
|
|
join_all(futures)
|
|
.await
|
|
.into_iter()
|
|
.collect::<ColmenaResult<Vec<()>>>()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Executes the deployment on selected nodes using a streaming evaluator.
|
|
async fn execute_streaming(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
mut targets: TargetNodeMap,
|
|
) -> ColmenaResult<()> {
|
|
if self.goal == Goal::UploadKeys {
|
|
unreachable!(); // some logic is screwed up
|
|
}
|
|
|
|
let nodes: Vec<NodeName> = targets.keys().cloned().collect();
|
|
let expr = self.hive.eval_selected_expr(&nodes)?;
|
|
|
|
let job = parent.create_job(JobType::Evaluate, nodes.clone())?;
|
|
|
|
let futures = job
|
|
.run(|job| async move {
|
|
let mut evaluator = NixEvalJobs::default();
|
|
let eval_limit = self
|
|
.evaluation_node_limit
|
|
.get_limit()
|
|
.unwrap_or(self.targets.len());
|
|
evaluator.set_eval_limit(eval_limit);
|
|
evaluator.set_job(job.clone());
|
|
|
|
// FIXME: nix-eval-jobs currently does not support IFD with builders
|
|
let options = self.hive.nix_options();
|
|
let mut stream = evaluator.evaluate(&expr, options).await?;
|
|
|
|
let mut futures: Vec<tokio::task::JoinHandle<ColmenaResult<()>>> = Vec::new();
|
|
|
|
while let Some(item) = stream.next().await {
|
|
match item {
|
|
Ok(attr) => {
|
|
let node_name = NodeName::new(attr.attribute().to_owned())?;
|
|
let profile_drv: ProfileDerivation = attr.into_derivation()?;
|
|
|
|
// FIXME: Consolidate
|
|
let mut target = targets.remove(&node_name).unwrap();
|
|
|
|
if let Some(force_build_on_target) = self.options.force_build_on_target
|
|
{
|
|
target.config.set_build_on_target(force_build_on_target);
|
|
}
|
|
|
|
let job_handle = job.clone();
|
|
let arc_self = self.clone();
|
|
futures.push(tokio::spawn(async move {
|
|
let (target, profile) = {
|
|
if target.config.build_on_target() {
|
|
arc_self
|
|
.build_on_node(
|
|
job_handle.clone(),
|
|
target,
|
|
profile_drv.clone(),
|
|
)
|
|
.await?
|
|
} else {
|
|
arc_self
|
|
.build_and_push_node(
|
|
job_handle.clone(),
|
|
target,
|
|
profile_drv.clone(),
|
|
)
|
|
.await?
|
|
}
|
|
};
|
|
|
|
if arc_self.goal.requires_activation() {
|
|
arc_self.activate_node(job_handle, target, profile).await
|
|
} else {
|
|
Ok(())
|
|
}
|
|
}));
|
|
}
|
|
Err(e) => {
|
|
match e {
|
|
EvalError::Global(e) => {
|
|
// Global error - Abort immediately
|
|
return Err(e);
|
|
}
|
|
EvalError::Attribute(e) => {
|
|
// Attribute-level error
|
|
//
|
|
// Here the eventual non-zero exit code of the evaluator
|
|
// will translate into an `EvalError::Global`, causing
|
|
// the entire future to resolve to an Err.
|
|
|
|
let node_name =
|
|
NodeName::new(e.attribute().to_string()).unwrap();
|
|
let nodes = vec![node_name];
|
|
let job = parent.create_job(JobType::Evaluate, nodes)?;
|
|
|
|
job.state(JobState::Running)?;
|
|
for line in e.error().lines() {
|
|
job.stderr(line.to_string())?;
|
|
}
|
|
job.state(JobState::Failed)?;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(futures)
|
|
})
|
|
.await?;
|
|
|
|
join_all(futures)
|
|
.await
|
|
.into_iter()
|
|
.map(|r| r.unwrap()) // panic on JoinError (future panicked)
|
|
.collect::<ColmenaResult<Vec<()>>>()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Executes the deployment against a portion of nodes.
|
|
async fn execute_one_chunk(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
mut chunk: TargetNodeMap,
|
|
) -> ColmenaResult<()> {
|
|
if self.goal == Goal::UploadKeys {
|
|
unreachable!(); // some logic is screwed up
|
|
}
|
|
|
|
let nodes: Vec<NodeName> = chunk.keys().cloned().collect();
|
|
let profile_drvs = self.evaluate_nodes(parent.clone(), nodes.clone()).await?;
|
|
|
|
let mut futures = Vec::new();
|
|
|
|
for (name, profile_drv) in profile_drvs.iter() {
|
|
let mut target = chunk.remove(name).unwrap();
|
|
|
|
if let Some(force_build_on_target) = self.options.force_build_on_target {
|
|
target.config.set_build_on_target(force_build_on_target);
|
|
}
|
|
|
|
let job_handle = parent.clone();
|
|
let arc_self = self.clone();
|
|
futures.push(async move {
|
|
let (target, profile) = {
|
|
if target.config.build_on_target() {
|
|
arc_self
|
|
.build_on_node(job_handle.clone(), target, profile_drv.clone())
|
|
.await?
|
|
} else {
|
|
arc_self
|
|
.build_and_push_node(job_handle.clone(), target, profile_drv.clone())
|
|
.await?
|
|
}
|
|
};
|
|
|
|
if arc_self.goal.requires_activation() {
|
|
arc_self.activate_node(job_handle, target, profile).await
|
|
} else {
|
|
Ok(())
|
|
}
|
|
});
|
|
}
|
|
|
|
join_all(futures)
|
|
.await
|
|
.into_iter()
|
|
.collect::<ColmenaResult<Vec<()>>>()?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Evaluates a set of nodes, returning their corresponding store derivations.
|
|
async fn evaluate_nodes(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
nodes: Vec<NodeName>,
|
|
) -> ColmenaResult<HashMap<NodeName, ProfileDerivation>> {
|
|
let job = parent.create_job(JobType::Evaluate, nodes.clone())?;
|
|
|
|
job.run_waiting(|job| async move {
|
|
// Wait for eval limit
|
|
let permit = self.parallelism_limit.evaluation.acquire().await.unwrap();
|
|
job.state(JobState::Running)?;
|
|
|
|
let result = self.hive.eval_selected(&nodes, Some(job.clone())).await;
|
|
|
|
drop(permit);
|
|
result
|
|
})
|
|
.await
|
|
}
|
|
|
|
/// Only uploads keys to a node.
|
|
async fn upload_keys_to_node(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
mut target: TargetNode,
|
|
) -> ColmenaResult<()> {
|
|
let nodes = vec![target.name.clone()];
|
|
let job = parent.create_job(JobType::UploadKeys, nodes)?;
|
|
job.run(|_| async move {
|
|
if target.host.is_none() {
|
|
return Err(ColmenaError::Unsupported);
|
|
}
|
|
|
|
let host = target.host.as_mut().unwrap();
|
|
host.upload_keys(&target.config.keys, true).await?;
|
|
|
|
Ok(())
|
|
})
|
|
.await
|
|
}
|
|
|
|
/// Builds a system profile directly on the node itself.
|
|
async fn build_on_node(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
mut target: TargetNode,
|
|
profile_drv: ProfileDerivation,
|
|
) -> ColmenaResult<(TargetNode, Profile)> {
|
|
let nodes = vec![target.name.clone()];
|
|
|
|
let permit = self.parallelism_limit.apply.acquire().await.unwrap();
|
|
|
|
let build_job = parent.create_job(JobType::Build, nodes.clone())?;
|
|
let (target, profile) = build_job
|
|
.run(|job| async move {
|
|
if target.host.is_none() {
|
|
return Err(ColmenaError::Unsupported);
|
|
}
|
|
|
|
let host = target.host.as_mut().unwrap();
|
|
host.set_job(Some(job.clone()));
|
|
|
|
host.copy_closure(
|
|
profile_drv.as_store_path(),
|
|
CopyDirection::ToRemote,
|
|
CopyOptions::default().include_outputs(true),
|
|
)
|
|
.await?;
|
|
|
|
let profile = profile_drv.realize_remote(host).await?;
|
|
|
|
job.success_with_message(format!("Built {:?} on target node", profile.as_path()))?;
|
|
Ok((target, profile))
|
|
})
|
|
.await?;
|
|
|
|
drop(permit);
|
|
|
|
Ok((target, profile))
|
|
}
|
|
|
|
/// Builds and pushes a system profile on a node.
|
|
async fn build_and_push_node(
|
|
self: &DeploymentHandle,
|
|
parent: JobHandle,
|
|
target: TargetNode,
|
|
profile_drv: ProfileDerivation,
|
|
) -> ColmenaResult<(TargetNode, Profile)> {
|
|
let nodes = vec![target.name.clone()];
|
|
|
|
let permit = self.parallelism_limit.apply.acquire().await.unwrap();
|
|
|
|
// Build system profile
|
|
let build_job = parent.create_job(JobType::Build, nodes.clone())?;
|
|
let arc_self = self.clone();
|
|
let profile: Profile = build_job
|
|
.run(|job| async move {
|
|
// FIXME: Remote builder?
|
|
let mut builder = LocalHost::new(arc_self.nix_options.clone()).upcast();
|
|
builder.set_job(Some(job.clone()));
|
|
|
|
let profile = profile_drv.realize(&mut builder).await?;
|
|
|
|
job.success_with_message(format!("Built {:?}", profile.as_path()))?;
|
|
Ok(profile)
|
|
})
|
|
.await?;
|
|
|
|
// Create GC root
|
|
let profile_r = profile.clone();
|
|
let mut target = if self.options.create_gc_roots {
|
|
let job = parent.create_job(JobType::CreateGcRoots, nodes.clone())?;
|
|
let arc_self = self.clone();
|
|
job.run_waiting(|job| async move {
|
|
if let Some(dir) = arc_self.hive.context_dir() {
|
|
job.state(JobState::Running)?;
|
|
let path = dir.join(".gcroots").join(format!("node-{}", &*target.name));
|
|
|
|
profile_r.create_gc_root(&path).await?;
|
|
} else {
|
|
job.noop("No context directory to create GC roots in".to_string())?;
|
|
}
|
|
Ok(target)
|
|
})
|
|
.await?
|
|
} else {
|
|
target
|
|
};
|
|
|
|
if self.goal == Goal::Build {
|
|
return Ok((target, profile));
|
|
}
|
|
|
|
// Push closure to remote
|
|
let push_job = parent.create_job(JobType::Push, nodes.clone())?;
|
|
let push_profile = profile.clone();
|
|
let arc_self = self.clone();
|
|
let target = push_job
|
|
.run(|job| async move {
|
|
if target.host.is_none() {
|
|
return Err(ColmenaError::Unsupported);
|
|
}
|
|
|
|
let host = target.host.as_mut().unwrap();
|
|
host.set_job(Some(job.clone()));
|
|
host.copy_closure(
|
|
push_profile.as_store_path(),
|
|
CopyDirection::ToRemote,
|
|
arc_self.options.to_copy_options(),
|
|
)
|
|
.await?;
|
|
|
|
Ok(target)
|
|
})
|
|
.await?;
|
|
|
|
drop(permit);
|
|
|
|
Ok((target, profile))
|
|
}
|
|
|
|
/// Activates a system profile on a node.
|
|
///
|
|
/// This will also upload keys to the node.
|
|
async fn activate_node(
|
|
self: DeploymentHandle,
|
|
parent: JobHandle,
|
|
mut target: TargetNode,
|
|
profile: Profile,
|
|
) -> ColmenaResult<()> {
|
|
let nodes = vec![target.name.clone()];
|
|
|
|
let permit = self.parallelism_limit.apply.acquire().await.unwrap();
|
|
|
|
// Upload pre-activation keys
|
|
let mut target = if self.options.upload_keys {
|
|
let job = parent.create_job(JobType::UploadKeys, nodes.clone())?;
|
|
job.run_waiting(|job| async move {
|
|
let keys = target
|
|
.config
|
|
.keys
|
|
.iter()
|
|
.filter(|(_, v)| v.upload_at() == UploadKeyAt::PreActivation)
|
|
.map(|(k, v)| (k.clone(), v.clone()))
|
|
.collect::<HashMap<String, Key>>();
|
|
|
|
if keys.is_empty() {
|
|
job.noop("No pre-activation keys to upload".to_string())?;
|
|
return Ok(target);
|
|
}
|
|
|
|
job.state(JobState::Running)?;
|
|
job.message("Uploading pre-activation keys...".to_string())?;
|
|
|
|
let host = target.host.as_mut().unwrap();
|
|
host.set_job(Some(job.clone()));
|
|
host.upload_keys(&keys, false).await?;
|
|
|
|
job.success_with_message("Uploaded keys (pre-activation)".to_string())?;
|
|
Ok(target)
|
|
})
|
|
.await?
|
|
} else {
|
|
target
|
|
};
|
|
|
|
// Activate profile
|
|
let activation_job = parent.create_job(JobType::Activate, nodes.clone())?;
|
|
let arc_self = self.clone();
|
|
let profile_r = profile.clone();
|
|
let mut target = activation_job.run(|job| async move {
|
|
let host = target.host.as_mut().unwrap();
|
|
host.set_job(Some(job.clone()));
|
|
|
|
if !target.config.replace_unknown_profiles {
|
|
job.message("Checking remote profile...".to_string())?;
|
|
|
|
let profile = host.get_main_system_profile().await?;
|
|
|
|
if profile.as_store_path().exists() {
|
|
job.message("Remote profile known".to_string())?;
|
|
} else if arc_self.options.force_replace_unknown_profiles {
|
|
job.message("Warning: Remote profile is unknown, but unknown profiles are being ignored".to_string())?;
|
|
} else {
|
|
return Err(ColmenaError::ActiveProfileUnknown {
|
|
profile,
|
|
});
|
|
}
|
|
}
|
|
|
|
host.activate(&profile_r, arc_self.goal).await?;
|
|
|
|
job.success_with_message(arc_self.goal.success_str().to_string())?;
|
|
|
|
Ok(target)
|
|
}).await?;
|
|
|
|
// Upload post-activation keys
|
|
let mut target = if self.options.upload_keys {
|
|
let job = parent.create_job(JobType::UploadKeys, nodes.clone())?;
|
|
job.run_waiting(|job| async move {
|
|
let keys = target
|
|
.config
|
|
.keys
|
|
.iter()
|
|
.filter(|(_, v)| v.upload_at() == UploadKeyAt::PostActivation)
|
|
.map(|(k, v)| (k.clone(), v.clone()))
|
|
.collect::<HashMap<String, Key>>();
|
|
|
|
if keys.is_empty() {
|
|
job.noop("No post-activation keys to upload".to_string())?;
|
|
return Ok(target);
|
|
}
|
|
|
|
job.state(JobState::Running)?;
|
|
job.message("Uploading post-activation keys...".to_string())?;
|
|
|
|
let host = target.host.as_mut().unwrap();
|
|
host.set_job(Some(job.clone()));
|
|
host.upload_keys(&keys, true).await?;
|
|
|
|
job.success_with_message("Uploaded keys (post-activation)".to_string())?;
|
|
Ok(target)
|
|
})
|
|
.await?
|
|
} else {
|
|
target
|
|
};
|
|
|
|
// Reboot
|
|
if self.options.reboot {
|
|
let job = parent.create_job(JobType::Reboot, nodes.clone())?;
|
|
let arc_self = self.clone();
|
|
job.run(|job| async move {
|
|
let host = target.host.as_mut().unwrap();
|
|
host.set_job(Some(job.clone()));
|
|
|
|
let new_profile = if arc_self.goal.persists_after_reboot() {
|
|
Some(profile)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let options = RebootOptions::default()
|
|
.wait_for_boot(true)
|
|
.new_profile(new_profile);
|
|
|
|
host.reboot(options).await?;
|
|
|
|
Ok(())
|
|
})
|
|
.await?;
|
|
}
|
|
|
|
drop(permit);
|
|
|
|
Ok(())
|
|
}
|
|
}
|