infrastructure/machines/vault01/ups.nix

98 lines
2.6 KiB
Nix

{
config,
lib,
pkgs,
meta,
name,
...
}:
{
power.ups = {
enable = true;
ups.eaton = {
driver = "usbhid-ups";
port = "auto";
};
users.eatonmon = {
passwordFile = config.age.secrets."eatonmon-password_file".path;
upsmon = "primary";
};
upsmon.monitor.eaton = {
user = "eatonmon";
};
schedulerRules =
let
cmdScript = pkgs.writeShellApplication {
name = "upssched-cmd.sh";
runtimeInputs = with pkgs; [
systemd
msmtp
];
text = ''
case $1 in
shutdown-low) MEANING="Battery is low, shutting down.";;
shutdown-batt) MEANING="On battery for 15min, shutting down.";;
warn-batt) MEANING="Power line faillure, going on battery.";;
warn-comm) MEANING="Communication with the UPS was broken.";;
warn-bypass) MEANING="The UPS is not protecting the server, power line failure would kill $HOSTNAME instantly.";;
*) MEANING="Signal unknown, check configuration.";;
esac
sendmail -i -t <<ERRMAIL
To: fai+monitoring@dgnum.eu
Subject: [$HOSTNAME] Battery signal: $1
Content-Transfer-Encoding: 8bit
Content-Type: text/plain; charset=UTF-8
$MEANING
ERRMAIL
case $1 in
shutdown-*) shutdown 20s # let 20s to send the email
esac
'';
};
in
(pkgs.writeTextFile {
name = "upssched.conf";
text = ''
CMDSCRIPT ${lib.getExe cmdScript}
PIPEFN /var/state/ups/upssched/upssched.pipe
LOCKFN /var/state/ups/upssched/upssched.lock
AT LOWBATT * EXECUTE shutdown-low
AT ONBATT * EXECUTE warn-batt
AT ONBATT * START-TIMER shutdown-batt 900
AT ONLINE * CANCEL-TIMER shutdown-batt
AT COMMBAD * EXECUTE warn-comm
AT NOCOMM * EXECUTE warn-comm
AT BYPASS * EXECUTE warn-bypass
'';
}).outPath;
};
systemd.tmpfiles.settings."10-upsmon" =
let
root = {
user = "root";
group = "root";
mode = "0600";
};
in
{
"/var/state/ups/upssched".d = root // {
mode = "0700";
};
"/var/state/ups/upssched/upssched.pipe".p = root;
};
services.prometheus.exporters.nut = {
enable = true;
listenAddress = meta.network.${name}.netbirdIp;
port = 9199;
};
networking.firewall.interfaces.wt0.allowedTCPPorts = [ 9199 ];
}