infrastructure/modules/nixos/dgn-monitoring/default.nix
sinavir aed96b16e1
All checks were successful
Check meta / check_dns (pull_request) Successful in 16s
Check meta / check_meta (pull_request) Successful in 16s
Check workflows / check_workflows (pull_request) Successful in 17s
Build all the nodes / ap01 (pull_request) Successful in 32s
Build all the nodes / netaccess01 (pull_request) Successful in 20s
Build all the nodes / netcore00 (pull_request) Successful in 20s
Build all the nodes / bridge01 (pull_request) Successful in 52s
Build all the nodes / build01 (pull_request) Successful in 56s
Build all the nodes / geo01 (pull_request) Successful in 54s
Build all the nodes / netcore01 (pull_request) Successful in 24s
Build all the nodes / geo02 (pull_request) Successful in 56s
Build all the nodes / cof02 (pull_request) Successful in 1m5s
Build all the nodes / hypervisor01 (pull_request) Successful in 58s
Build all the nodes / hypervisor02 (pull_request) Successful in 58s
Build all the nodes / netcore02 (pull_request) Successful in 28s
Build all the nodes / compute01 (pull_request) Successful in 1m25s
Run pre-commit on all files / pre-commit (pull_request) Successful in 26s
Build the shell / build-shell (pull_request) Successful in 30s
Build all the nodes / hypervisor03 (pull_request) Successful in 1m30s
Build all the nodes / rescue01 (pull_request) Successful in 1m2s
Build all the nodes / storage01 (pull_request) Successful in 1m3s
Build all the nodes / web03 (pull_request) Successful in 1m2s
Build all the nodes / vault01 (pull_request) Successful in 1m5s
Build all the nodes / tower01 (pull_request) Successful in 1m45s
Build all the nodes / web02 (pull_request) Successful in 1m45s
Build all the nodes / web01 (pull_request) Successful in 2m24s
Check meta / check_meta (push) Successful in 16s
Check meta / check_dns (push) Successful in 18s
Build all the nodes / netcore01 (push) Successful in 22s
Build all the nodes / netaccess01 (push) Successful in 22s
Build all the nodes / netcore00 (push) Successful in 21s
Build all the nodes / ap01 (push) Successful in 31s
Build all the nodes / netcore02 (push) Successful in 20s
Build all the nodes / bridge01 (push) Successful in 55s
Build all the nodes / build01 (push) Successful in 56s
Build all the nodes / hypervisor02 (push) Successful in 1m4s
Build all the nodes / hypervisor03 (push) Successful in 1m4s
Build all the nodes / hypervisor01 (push) Successful in 1m5s
Build all the nodes / geo02 (push) Successful in 1m5s
Build all the nodes / geo01 (push) Successful in 1m6s
Build all the nodes / storage01 (push) Successful in 55s
Build all the nodes / tower01 (push) Successful in 55s
Build all the nodes / compute01 (push) Successful in 1m31s
Run pre-commit on all files / pre-commit (push) Successful in 25s
Build the shell / build-shell (push) Successful in 31s
Build all the nodes / vault01 (push) Successful in 1m7s
Build all the nodes / cof02 (push) Successful in 1m40s
Build all the nodes / web02 (push) Successful in 56s
Build all the nodes / web03 (push) Successful in 58s
Build all the nodes / rescue01 (push) Successful in 1m40s
Build all the nodes / web01 (push) Successful in 1m32s
feat(monitoring): drop prometheus in favor of victorialogs
2025-04-01 17:04:54 +02:00

99 lines
2.2 KiB
Nix

# SPDX-FileCopyrightText: 2024 Tom Hubrecht <tom.hubrecht@dgnum.eu>
#
# SPDX-License-Identifier: EUPL-1.2
{
config,
lib,
pkgs,
meta,
name,
nodeMeta,
...
}:
let
inherit (lib)
filterAttrs
mapAttrs
mapAttrsToList
mkDefault
mkEnableOption
mkForce
mkIf
mkOption
;
inherit (lib.types) attrsOf;
cfg = config.dgn-monitoring;
in
{
imports = [ ./exporters.nix ];
options.dgn-monitoring = {
enable = mkEnableOption "the DGNum monitoring system" // {
default = true;
};
scrapeConfigs = mkOption {
type = attrsOf (pkgs.formats.yaml { }).type;
description = ''
Specifications of `scrape_config` sections.
'';
};
};
config = mkIf cfg.enable {
dgn-monitoring.scrapeConfigs =
mapAttrs
(_: cfg: {
static_configs = mkDefault [ { targets = [ "127.0.0.1:${builtins.toString cfg.port}" ]; } ];
})
(
filterAttrs (
name: cfg:
!(builtins.elem name [
"assertions"
"warnings"
"blackbox"
"unifi-poller"
"domain"
"minio"
"idrac"
"pve"
"tor"
])
&& cfg.enable
) config.services.prometheus.exporters
);
services.vmagent = {
enable = true;
flags = {
"remoteWrite.url" = "http://${meta.network.storage01.netbirdIp}:8428/api/v1/write";
"remoteWrite.label" = "node=${name}";
};
prometheusConfig = {
scrape_configs = mapAttrsToList (job_name: value: value // { inherit job_name; }) cfg.scrapeConfigs;
global = {
scrape_interval = "15s";
external_labels.hostname = "${name}.${nodeMeta.site}.infra.dgnum.eu";
};
};
};
services.journald.upload = {
enable = true;
settings = {
Upload.URL = "http://${meta.network.storage01.netbirdIp}:9428/insert/journald";
};
};
# Don't restart too often to reduce e-mail notifications when the network or the database is down
systemd.services.systemd-journal-upload.serviceConfig.RestartSec = mkForce 60;
};
}