colmena/src/nix/deployment/limits.rs
Zhaofeng Li 0cb3f8e968 Redesign deployment process (again)
We now ship Events from different parts of the deployment process
via a channel to a job monitor.
2021-11-21 00:11:37 -08:00

109 lines
3.1 KiB
Rust

//! Parallelism limits.
use tokio::sync::Semaphore;
/// Amount of RAM reserved for the system, in MB.
const EVAL_RESERVE_MB: u64 = 1024;
/// Estimated amount of RAM needed to evaluate one host, in MB.
const EVAL_PER_HOST_MB: u64 = 512;
/// The parallelism limit for a deployment.
#[derive(Debug)]
pub struct ParallelismLimit {
/// Limit of concurrent evaluation processes.
pub evaluation: Semaphore,
/// Limit of concurrent build processes.
pub build: Semaphore,
/// Limit of concurrent apply processes.
pub apply: Semaphore,
}
impl Default for ParallelismLimit {
fn default() -> Self {
Self {
evaluation: Semaphore::new(1),
build: Semaphore::new(2),
apply: Semaphore::new(10),
}
}
}
impl ParallelismLimit {
/// Sets the concurrent apply limit.
pub fn set_apply_limit(&mut self, limit: usize) {
self.apply = Semaphore::new(limit);
}
}
/// Limit of the number of nodes in each evaluation process.
///
/// The evaluation process is very RAM-intensive, with memory
/// consumption scaling linearly with the number of nodes
/// evaluated at the same time. This can be a problem if you
/// are deploying to a large number of nodes at the same time,
/// where `nix-instantiate` may consume too much RAM and get
/// killed by the OS (`NixKilled` error).
///
/// Evaluating each node on its own is not an efficient solution,
/// with total CPU time and memory consumption vastly exceeding the
/// case where we evaluate the same set of nodes at the same time
/// (TODO: Provide statistics).
///
/// To overcome this problem, we split the evaluation process into
/// chunks when necessary, with the maximum number of nodes in
/// each `nix-instantiate` invocation determined with:
///
/// - A simple heuristic based on remaining memory in the system
/// - A supplied number
/// - No limit at all
#[derive(Copy, Clone, Debug)]
pub enum EvaluationNodeLimit {
/// Use a naive heuristic based on available memory.
Heuristic,
/// Supply the maximum number of nodes.
Manual(usize),
/// Do not limit the number of nodes in each evaluation process
None,
}
impl Default for EvaluationNodeLimit {
fn default() -> Self {
Self::Heuristic
}
}
impl EvaluationNodeLimit {
/// Returns the maximum number of hosts in each evaluation.
///
/// The result should be cached.
pub fn get_limit(&self) -> Option<usize> {
match self {
EvaluationNodeLimit::Heuristic => {
if let Ok(mem_info) = sys_info::mem_info() {
let mut mb = mem_info.avail / 1024;
if mb >= EVAL_RESERVE_MB {
mb -= EVAL_RESERVE_MB;
}
let nodes = mb / EVAL_PER_HOST_MB;
if nodes == 0 {
Some(1)
} else {
Some(nodes as usize)
}
} else {
Some(10)
}
}
EvaluationNodeLimit::Manual(limit) => Some(*limit),
EvaluationNodeLimit::None => None,
}
}
}