{ config, lib, pkgs, ... }: lib.extra.mkConfig { enabledModules = [ # INFO: This list needs to stay sorted alphabetically ]; enabledServices = [ # INFO: This list needs to stay sorted alphabetically # Machine learning API machine "microvm-ml01" "microvm-router01" "nvidia-tesla-k80" "proxmox" ]; extraConfig = { microvm = { host.enable = true; }; dgn-hardware = { useZfs = true; zfsPools = [ "dpool" "ppool0" ]; }; services.netbird.enable = true; # We are going to use CUDA here. nixpkgs.config.cudaSupport = true; hardware.graphics.enable = true; environment.systemPackages = [ ((pkgs.openai-whisper-cpp.override { cudaPackages = pkgs.cudaPackages_11; }).overrideAttrs (old: { src = pkgs.fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; rev = "v1.7.1"; hash = "sha256-EDFUVjud79ZRCzGbOh9L9NcXfN3ikvsqkVSOME9F9oo="; }; env = { WHISPER_CUBLAS = ""; GGML_CUDA = "1"; }; # We only need Compute Capability 3.7. CUDA_ARCH_FLAGS = [ "sm_37" ]; # We are GPU-only anyway. patches = (old.patches or [ ]) ++ [ ./no-weird-microarch.patch ./all-nvcc-arch.patch ]; })) ]; services = { nginx = { enable = true; recommendedProxySettings = true; virtualHosts."ollama01.beta.dgnum.eu" = { enableACME = true; forceSSL = true; locations."/" = { proxyPass = "http://${config.services.ollama.host}:${toString config.services.ollama.port}"; basicAuthFile = pkgs.writeText "ollama-htpasswd" '' raito:$y$j9T$UDEHpLtM52hRGK0I4qT6M0$N75AhENLqgtJnTGaPzq51imhjZvuPr.ow81Co1ZTcX2 ''; }; }; }; ollama = { enable = true; package = pkgs.callPackage ./ollama.nix { cudaPackages = pkgs.cudaPackages_11; # We need to thread our nvidia x11 driver for CUDA. extraLibraries = [ config.hardware.nvidia.package ]; }; }; }; networking.firewall.allowedTCPPorts = [ 80 443 ]; }; root = ./.; }