{ config, lib, pkgs, meta, name, ... }: lib.extra.mkConfig { enabledModules = [ # INFO: This list needs to stay sorted alphabetically ]; enabledServices = [ # INFO: This list needs to stay sorted alphabetically # Machine learning API machine "microvm-ml01" "microvm-router01" "nvidia-tesla-k80" "proxmox" ]; extraConfig = { microvm = { host.enable = true; }; dgn-hardware = { useZfs = true; zfsPools = [ "dpool" "ppool0" ]; }; services.netbird.enable = true; # We are going to use CUDA here. nixpkgs.config.cudaSupport = true; hardware.graphics.enable = true; environment.systemPackages = [ ((pkgs.openai-whisper-cpp.override { cudaPackages = pkgs.cudaPackages_11; }).overrideAttrs (old: { src = pkgs.fetchFromGitHub { owner = "ggerganov"; repo = "whisper.cpp"; rev = "v1.7.1"; hash = "sha256-EDFUVjud79ZRCzGbOh9L9NcXfN3ikvsqkVSOME9F9oo="; }; env = { WHISPER_CUBLAS = ""; GGML_CUDA = "1"; }; # We only need Compute Capability 3.7. CUDA_ARCH_FLAGS = [ "sm_37" ]; # We are GPU-only anyway. patches = (old.patches or [ ]) ++ [ ./no-weird-microarch.patch ./all-nvcc-arch.patch ]; })) ]; services = { ollama = { enable = true; host = meta.network.${name}.netbirdIp; package = pkgs.callPackage ./ollama.nix { cudaPackages = pkgs.cudaPackages_11; # We need to thread our nvidia x11 driver for CUDA. extraLibraries = [ config.hardware.nvidia.package ]; }; }; }; networking.firewall.interfaces.wt0.allowedTCPPorts = [ config.services.ollama.port ]; }; root = ./.; }