From 6b36215d61e3bece3622a1c258a4dcb57a409a71 Mon Sep 17 00:00:00 2001 From: Daniel Barlow Date: Tue, 30 Jul 2024 22:37:43 +0100 Subject: [PATCH] add health check service and example that uses it --- examples/l2tp.nix | 93 +++++++++++++++++++++++++++++++- modules/health-check/default.nix | 43 +++++++++++++++ modules/health-check/service.nix | 37 +++++++++++++ overlay.nix | 3 +- 4 files changed, 174 insertions(+), 2 deletions(-) create mode 100644 modules/health-check/default.nix create mode 100644 modules/health-check/service.nix diff --git a/examples/l2tp.nix b/examples/l2tp.nix index 169e62f..7d41e92 100644 --- a/examples/l2tp.nix +++ b/examples/l2tp.nix @@ -28,6 +28,9 @@ in rec { ../modules/watchdog ../modules/mount ../modules/ppp + ../modules/round-robin + ../modules/health-check + ../modules/profiles/gateway.nix ]; hostname = "thing"; @@ -38,7 +41,95 @@ in rec { authType = "chap"; }; - services.dhcpc = svc.network.dhcp.client.build { + profile.gateway = { + lan = { + interfaces = with config.hardware.networkInterfaces; + [ + # EDIT: these are the interfaces exposed by the gl.inet gl-ar750: + # if your device has more or differently named lan interfaces, + # specify them here + wlan wlan5 + lan + ]; + inherit (rsecrets.lan) prefix; + address = { + family = "inet"; address ="${rsecrets.lan.prefix}.1"; prefixLength = 24; + }; + dhcp = { + start = 10; + end = 240; + hosts = { } // lib.optionalAttrs (builtins.pathExists ./static-leases.nix) (import ./static-leases.nix); + localDomain = "lan"; + }; + }; + wan = { + interface = let + pppoe = svc.pppoe.build { + interface = config.hardware.networkInterfaces.wan; + debug = true; + username = rsecrets.l2tp.name; + password = rsecrets.l2tp.password; + }; + + l2tp = + let + check-address = oneshot rec { + name = "check-lns-address"; + up = "grep -Fx ${lns.address} $(output_path ${services.lns-address} addresses)"; + dependencies = [ services.lns-address ]; + }; + route = svc.network.route.build { + via = "$(output ${services.bootstrap-dhcpc} router)"; + target = lns.address; + dependencies = [services.bootstrap-dhcpc check-address]; + }; + l2tpd= svc.l2tp.build { + lns = lns.address; + ppp-options = [ + "debug" "+ipv6" "noauth" + "name" rsecrets.l2tp.name + "password" rsecrets.l2tp.password + ]; + dependencies = [config.services.lns-address route check-address]; + }; + in + svc.health-check.build { + service = l2tpd; + threshold = 3; + interval = 2; + healthCheck = pkgs.writeAshScript "ping-check" {} "ping 1.1.1.1"; + }; + in svc.round-robin.build { + name = "wan"; + services = [ + pppoe + l2tp + ]; + }; + dhcp6.enable = true; + }; + + wireless.networks = { + "${rsecrets.ssid}" = { + interface = config.hardware.networkInterfaces.wlan; + hw_mode = "g"; + channel = "6"; + ieee80211n = 1; + } // wirelessConfig; + "${rsecrets.ssid}5" = rec { + interface = config.hardware.networkInterfaces.wlan5; + hw_mode = "a"; + channel = 36; + ht_capab = "[HT40+]"; + vht_oper_chwidth = 1; + vht_oper_centr_freq_seg0_idx = channel + 6; + ieee80211n = 1; + ieee80211ac = 1; + } // wirelessConfig; + }; + }; + + services.bootstrap-dhcpc = svc.network.dhcp.client.build { interface = config.services.wwan; dependencies = [ config.services.hostname ]; }; diff --git a/modules/health-check/default.nix b/modules/health-check/default.nix new file mode 100644 index 0000000..9ac8858 --- /dev/null +++ b/modules/health-check/default.nix @@ -0,0 +1,43 @@ +## Health check +## +## Runs a service and a separate periodic health process. When the +## health check starts failing over a period of time, kill the service. +## (Usually that means the supervisor will restart it, but you can +## have other behaviours by e.g. combining this service with a round-robin +## for failover) + + +{ lib, pkgs, config, ...}: +let + inherit (lib) mkOption types; + inherit (pkgs) liminix; +# inherit (pkgs.liminix.services) longrun; +in { + options = { + system.service.health-check = mkOption { + description = "run a service while periodically checking it is healthy"; + type = liminix.lib.types.serviceDefn; + }; + }; + config.system.service.health-check = config.system.callService ./service.nix { + service = mkOption { + type = liminix.lib.types.service; + }; + interval = mkOption { + description = "interval between checks, in seconds"; + type = types.int; + default = 10; + example = 10; + }; + threshold = mkOption { + description = "number of consecutive failures required for the service to be kicked"; + type = types.int; + example = 3; + }; + healthCheck = mkOption { + description = "health check command or script. Expected to exit 0 if the service is healthy or any other exit status otherwise"; + type = types.path; + }; + }; + config.programs.busybox.applets = ["expr"]; +} diff --git a/modules/health-check/service.nix b/modules/health-check/service.nix new file mode 100644 index 0000000..80b3e34 --- /dev/null +++ b/modules/health-check/service.nix @@ -0,0 +1,37 @@ +{ + liminix, lib, lim, s6 +}: +{ service, interval, threshold, healthCheck } : +let + inherit (liminix.services) oneshot longrun; + inherit (builtins) toString; + inherit (service) name; + checker = let name' = "check-${name}"; in longrun { + name = name'; + run = '' + fails=0 + echo waiting for /run/service/${name} + ${s6}/bin/s6-svwait -U /run/service/${name} || exit + while sleep ${toString interval} ; do + ${healthCheck} + if test $? -gt 0; then + fails=$(expr $fails + 1) + else + fails=0 + fi + echo fails $fails/${toString threshold} for ${name} + if test "$fails" -gt "${toString threshold}" ; then + echo time to die + ${s6}/bin/s6-svc -r /run/service/${name} + echo bounced + fails=0 + echo waiting for /run/service/${name} + ${s6}/bin/s6-svwait -U /run/service/${name} + fi + done + ''; + }; +in service.overrideAttrs(o: { + buildInputs = (lim.orEmpty o.buildInputs) ++ [ checker ]; + dependencies = (lim.orEmpty o.dependencies) ++ [ checker ]; +}) diff --git a/overlay.nix b/overlay.nix index dda4ddc..52b5872 100644 --- a/overlay.nix +++ b/overlay.nix @@ -47,7 +47,8 @@ in extraPkgs // { # liminix library functions lim = { - parseInt = s : (builtins.fromTOML "r=${s}").r; + parseInt = s: (builtins.fromTOML "r=${s}").r; + orEmpty = x: if x != null then x else []; }; # keep these alphabetical