infrastructure/scripts/check-deployment.sh
sinavir d55faceefb
All checks were successful
build configuration / build_web02 (push) Successful in 1m5s
build configuration / build_rescue01 (push) Successful in 1m5s
build configuration / build_vault01 (push) Successful in 1m7s
build configuration / build_storage01 (push) Successful in 1m11s
build configuration / build_compute01 (push) Successful in 1m16s
lint / check (push) Successful in 21s
build configuration / build_web01 (push) Successful in 1m31s
fix(check-deployment): add some error handling and fix the "one-node" mode
2024-02-23 17:35:25 +01:00

126 lines
2.8 KiB
Bash

#!/usr/bin/env bash
#!@bash@/bin/bash
# shellcheck shell=bash
set -o errexit
set -o nounset
set -o pipefail
shopt -s lastpipe
usage="$(basename "$0") [-h] [--diff] [NODE]
Check if deployed config is actually the one on master
By default check all nodes
where:
-h Show this help text
--diff Show diff with nvd
Exemple:
check-deployment web01"
while [[ $# -gt 0 ]]; do
case "$1" in
--help|-h)
echo "$usage"
exit 0
;;
--diff)
diff=y
;;
*)
if [[ -z ${node-} ]]; then
node="$1"
else
echo "Too many arguments. Help:"
echo "$usage"
exit 1
fi
;;
esac
shift
done
#############
# go to tmp #
#############
TMP=$(mktemp -d)
GIT_TOP_LEVEL=$(git rev-parse --show-toplevel)
echo "Cloning local main..."
git clone -q --branch main --single-branch "$GIT_TOP_LEVEL" "$TMP"
pushd "$TMP" > /dev/null || exit 2
####################
# Evaluate configs #
####################
colmena_failed () {
>&2 echo "Colmena failed. Check your config. Logs:"
>&2 cat "$COLMENA_LOGS"
exit 3
}
COLMENA_LOGS=$(mktemp)
echo "Evaluating configs..."
# Disable warning because of '${}'
# shellcheck disable=SC2016
RESULTS=$(colmena eval -E '{ nodes, lib, ...}: lib.mapAttrsToList (k: v: { machine = k; path = v.config.system.build.toplevel; drv = v.config.system.build.toplevel.drvPath; domain = "${v.config.networking.hostName}.${v.config.networking.domain}"; }) nodes' 2> "$COLMENA_LOGS" || colmena_failed)
rm "$COLMENA_LOGS"
echo "Evaluation finished"
#####################################
# retrieve and check current-system #
#####################################
retrieve_current_system () {
# TODO implement a less invasive method
ssh -n "root@$1" "readlink -f /run/current-system"
}
return_status=0
echo "$RESULTS" | @jq@/bin/jq -c '.[]' |
while IFS=$'\n' read -r c; do
machine=$(echo "$c" | @jq@/bin/jq -r '.machine')
if [[ -n ${node-} ]] && [[ "$machine" != "$node" ]]; then
echo "Skipping ${machine}"
continue
fi
expected_path=$(echo "$c" | @jq@/bin/jq -r '.path')
domain=$(echo "$c" | @jq@/bin/jq -r '.domain')
drv_path=$(echo "$c" | @jq@/bin/jq -r '.drv')
err=0
current_path=$(retrieve_current_system "$domain") || err=1
if [[ "1" == "${err}" ]] ; then
echo "❌ failed to contact $domain !"
continue
fi
if [ "$expected_path" == "$current_path" ] ; then
echo "$machine -> OK"
elif [[ -n ${diff-} ]] ; then
nix-copy-closure --from "root@$domain" "$current_path"
nix-store -r "$drv_path"
echo "$machine -> error. nvd output:"
@nvd@/bin/nvd diff "$expected_path" "$current_path"
return_status=1
else
echo "☠️ $machine -> error:"
echo " - Expected system: $expected_path"
echo " - Current system: $current_path"
return_status=1
fi
done
popd > /dev/null || exit 2
rm -r "$TMP"
exit $return_status