forked from DGNum/colmena
We now ship Events from different parts of the deployment process via a channel to a job monitor.
109 lines
3.1 KiB
Rust
109 lines
3.1 KiB
Rust
//! Parallelism limits.
|
|
|
|
use tokio::sync::Semaphore;
|
|
|
|
/// Amount of RAM reserved for the system, in MB.
|
|
const EVAL_RESERVE_MB: u64 = 1024;
|
|
|
|
/// Estimated amount of RAM needed to evaluate one host, in MB.
|
|
const EVAL_PER_HOST_MB: u64 = 512;
|
|
|
|
/// The parallelism limit for a deployment.
|
|
#[derive(Debug)]
|
|
pub struct ParallelismLimit {
|
|
/// Limit of concurrent evaluation processes.
|
|
pub evaluation: Semaphore,
|
|
|
|
/// Limit of concurrent build processes.
|
|
pub build: Semaphore,
|
|
|
|
/// Limit of concurrent apply processes.
|
|
pub apply: Semaphore,
|
|
}
|
|
|
|
impl Default for ParallelismLimit {
|
|
fn default() -> Self {
|
|
Self {
|
|
evaluation: Semaphore::new(1),
|
|
build: Semaphore::new(2),
|
|
apply: Semaphore::new(10),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ParallelismLimit {
|
|
/// Sets the concurrent apply limit.
|
|
pub fn set_apply_limit(&mut self, limit: usize) {
|
|
self.apply = Semaphore::new(limit);
|
|
}
|
|
}
|
|
|
|
/// Limit of the number of nodes in each evaluation process.
|
|
///
|
|
/// The evaluation process is very RAM-intensive, with memory
|
|
/// consumption scaling linearly with the number of nodes
|
|
/// evaluated at the same time. This can be a problem if you
|
|
/// are deploying to a large number of nodes at the same time,
|
|
/// where `nix-instantiate` may consume too much RAM and get
|
|
/// killed by the OS (`NixKilled` error).
|
|
///
|
|
/// Evaluating each node on its own is not an efficient solution,
|
|
/// with total CPU time and memory consumption vastly exceeding the
|
|
/// case where we evaluate the same set of nodes at the same time
|
|
/// (TODO: Provide statistics).
|
|
///
|
|
/// To overcome this problem, we split the evaluation process into
|
|
/// chunks when necessary, with the maximum number of nodes in
|
|
/// each `nix-instantiate` invocation determined with:
|
|
///
|
|
/// - A simple heuristic based on remaining memory in the system
|
|
/// - A supplied number
|
|
/// - No limit at all
|
|
#[derive(Copy, Clone, Debug)]
|
|
pub enum EvaluationNodeLimit {
|
|
/// Use a naive heuristic based on available memory.
|
|
Heuristic,
|
|
|
|
/// Supply the maximum number of nodes.
|
|
Manual(usize),
|
|
|
|
/// Do not limit the number of nodes in each evaluation process
|
|
None,
|
|
}
|
|
|
|
impl Default for EvaluationNodeLimit {
|
|
fn default() -> Self {
|
|
Self::Heuristic
|
|
}
|
|
}
|
|
|
|
impl EvaluationNodeLimit {
|
|
/// Returns the maximum number of hosts in each evaluation.
|
|
///
|
|
/// The result should be cached.
|
|
pub fn get_limit(&self) -> Option<usize> {
|
|
match self {
|
|
EvaluationNodeLimit::Heuristic => {
|
|
if let Ok(mem_info) = sys_info::mem_info() {
|
|
let mut mb = mem_info.avail / 1024;
|
|
|
|
if mb >= EVAL_RESERVE_MB {
|
|
mb -= EVAL_RESERVE_MB;
|
|
}
|
|
|
|
let nodes = mb / EVAL_PER_HOST_MB;
|
|
|
|
if nodes == 0 {
|
|
Some(1)
|
|
} else {
|
|
Some(nodes as usize)
|
|
}
|
|
} else {
|
|
Some(10)
|
|
}
|
|
}
|
|
EvaluationNodeLimit::Manual(limit) => Some(*limit),
|
|
EvaluationNodeLimit::None => None,
|
|
}
|
|
}
|
|
}
|