From 1a05ea3a9ab322c15d805aa86199646b7a6ba858 Mon Sep 17 00:00:00 2001 From: sinavir Date: Fri, 18 Oct 2024 11:34:46 +0200 Subject: [PATCH] feat(krz01): Move to lab-infra repo --- .forgejo/workflows/eval.yaml | 19 -- keys/default.nix | 1 - machines/compute01/ollama-proxy.nix | 9 +- machines/krz01/_configuration.nix | 41 --- machines/krz01/_hardware-configuration.nix | 50 ---- machines/krz01/microvm-ml01.nix | 22 -- machines/krz01/microvm-router01.nix | 16 -- machines/krz01/networking.nix | 22 -- machines/krz01/nvidia-tesla-k80.nix | 8 - machines/krz01/ollama/K80-support.patch | 179 ------------- machines/krz01/ollama/all-nvcc-arch.patch | 26 -- machines/krz01/ollama/default.nix | 20 -- machines/krz01/ollama/disable-git.patch | 20 -- .../krz01/ollama/no-weird-microarch.patch | 34 --- machines/krz01/ollama/package.nix | 243 ------------------ machines/krz01/proxmox/default.nix | 12 - machines/krz01/secrets/secrets.nix | 3 - machines/krz01/whisper/all-nvcc-arch.patch | 26 -- machines/krz01/whisper/default.nix | 25 -- .../krz01/whisper/no-weird-microarch.patch | 34 --- meta/network.nix | 24 -- meta/nodes.nix | 11 - 22 files changed, 2 insertions(+), 843 deletions(-) delete mode 100644 machines/krz01/_configuration.nix delete mode 100644 machines/krz01/_hardware-configuration.nix delete mode 100644 machines/krz01/microvm-ml01.nix delete mode 100644 machines/krz01/microvm-router01.nix delete mode 100644 machines/krz01/networking.nix delete mode 100644 machines/krz01/nvidia-tesla-k80.nix delete mode 100644 machines/krz01/ollama/K80-support.patch delete mode 100644 machines/krz01/ollama/all-nvcc-arch.patch delete mode 100644 machines/krz01/ollama/default.nix delete mode 100644 machines/krz01/ollama/disable-git.patch delete mode 100644 machines/krz01/ollama/no-weird-microarch.patch delete mode 100644 machines/krz01/ollama/package.nix delete mode 100644 machines/krz01/proxmox/default.nix delete mode 100644 machines/krz01/secrets/secrets.nix delete mode 100644 machines/krz01/whisper/all-nvcc-arch.patch delete mode 100644 machines/krz01/whisper/default.nix delete mode 100644 machines/krz01/whisper/no-weird-microarch.patch diff --git a/.forgejo/workflows/eval.yaml b/.forgejo/workflows/eval.yaml index e12ac67..cadb56d 100644 --- a/.forgejo/workflows/eval.yaml +++ b/.forgejo/workflows/eval.yaml @@ -9,25 +9,6 @@ on: - main jobs: - build_and_cache_krz01: - runs-on: nix - steps: - - uses: actions/checkout@v3 - - - name: Build and cache the node - run: nix-shell --run cache-node - env: - STORE_ENDPOINT: "https://tvix-store.dgnum.eu/infra-signing/" - STORE_USER: "admin" - STORE_PASSWORD: ${{ secrets.STORE_PASSWORD }} - BUILD_NODE: "krz01" - - - uses: actions/upload-artifact@v3 - if: always() - with: - name: outputs_krz01 - path: paths.txt - build_and_cache_compute01: runs-on: nix steps: diff --git a/keys/default.nix b/keys/default.nix index d0c1cf9..1d00a18 100644 --- a/keys/default.nix +++ b/keys/default.nix @@ -17,7 +17,6 @@ rec { compute01 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE/YluSVS+4h3oV8CIUj0OmquyJXju8aEQy0Jz210vTu" ]; geo01 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEl6Pubbau+usQkemymoSKrTBbrX8JU5m5qpZbhNx8p4" ]; geo02 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFNXaCS0/Nsu5npqQk1TP6wMHCVIOaj4pblp2tIg6Ket" ]; - krz01 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIP4o65gWOgNrxbSd3kiQIGZUM+YD6kuZOQtblvzUGsfB" ]; rescue01 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIEJa02Annu8o7ggPjTH/9ttotdNGyghlWfU9E8pnuLUf" ]; storage01 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIA0s+rPcEcfWCqZ4B2oJiWT/60awOI8ijL1rtDM2glXZ" ]; vault01 = [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAJA6VA7LENvTRlKdcrqt8DxDOPvX3bg3Gjy9mNkdFEW" ]; diff --git a/machines/compute01/ollama-proxy.nix b/machines/compute01/ollama-proxy.nix index 31edbbf..b06561d 100644 --- a/machines/compute01/ollama-proxy.nix +++ b/machines/compute01/ollama-proxy.nix @@ -1,16 +1,11 @@ -{ - pkgs, - nodes, - meta, - ... -}: +{ pkgs, ... }: { services.nginx = { virtualHosts."ollama01.beta.dgnum.eu" = { enableACME = true; forceSSL = true; locations."/" = { - proxyPass = "http://${meta.network.krz01.netbirdIp}:${toString nodes.krz01.config.services.ollama.port}"; + proxyPass = "http://krz01.dgnum:11434"; basicAuthFile = pkgs.writeText "ollama-htpasswd" '' raito:$y$j9T$UDEHpLtM52hRGK0I4qT6M0$N75AhENLqgtJnTGaPzq51imhjZvuPr.ow81Co1ZTcX2 ''; diff --git a/machines/krz01/_configuration.nix b/machines/krz01/_configuration.nix deleted file mode 100644 index 2323df4..0000000 --- a/machines/krz01/_configuration.nix +++ /dev/null @@ -1,41 +0,0 @@ -{ config, lib, ... }: - -lib.extra.mkConfig { - enabledModules = [ - # INFO: This list needs to stay sorted alphabetically - ]; - - enabledServices = [ - # INFO: This list needs to stay sorted alphabetically - # Machine learning API machine - # "microvm-ml01" - # "microvm-router01" - "nvidia-tesla-k80" - "ollama" - "whisper" - "proxmox" - "networking" - ]; - - extraConfig = { - microvm = { - host.enable = true; - }; - dgn-hardware = { - useZfs = true; - zfsPools = [ - "dpool" - "ppool0" - ]; - }; - - # We are going to use CUDA here. - nixpkgs.config.cudaSupport = true; - hardware.graphics.enable = true; - - services.netbird.enable = true; - networking.firewall.trustedInterfaces = [ "wt0" ]; - }; - - root = ./.; -} diff --git a/machines/krz01/_hardware-configuration.nix b/machines/krz01/_hardware-configuration.nix deleted file mode 100644 index 6b39e1a..0000000 --- a/machines/krz01/_hardware-configuration.nix +++ /dev/null @@ -1,50 +0,0 @@ -{ - config, - lib, - modulesPath, - ... -}: - -{ - imports = [ (modulesPath + "/installer/scan/not-detected.nix") ]; - - boot = { - initrd = { - availableKernelModules = [ - "ehci_pci" - "ahci" - "mpt3sas" - "usbhid" - "sd_mod" - ]; - kernelModules = [ ]; - }; - kernelModules = [ "kvm-intel" ]; - extraModulePackages = [ ]; - }; - - fileSystems."/" = { - device = "/dev/disk/by-uuid/92bf4d66-2693-4eca-9b26-f86ae09d468d"; - fsType = "ext4"; - }; - - boot.initrd.luks.devices."mainfs" = { - device = "/dev/disk/by-uuid/26f9737b-28aa-4c3f-bd3b-b028283cef88"; - keyFileSize = 1; - keyFile = "/dev/zero"; - }; - - fileSystems."/boot" = { - device = "/dev/disk/by-uuid/280C-8844"; - fsType = "vfat"; - options = [ - "fmask=0022" - "dmask=0022" - ]; - }; - - swapDevices = [ ]; - - nixpkgs.hostPlatform = lib.mkDefault "x86_64-linux"; - hardware.cpu.intel.updateMicrocode = lib.mkDefault config.hardware.enableRedistributableFirmware; -} diff --git a/machines/krz01/microvm-ml01.nix b/machines/krz01/microvm-ml01.nix deleted file mode 100644 index b0a8be8..0000000 --- a/machines/krz01/microvm-ml01.nix +++ /dev/null @@ -1,22 +0,0 @@ -_: { - microvm.autostart = [ "ml01" ]; - microvm.vms.ml01 = { - config = { - networking.hostName = "ml01"; - microvm = { - hypervisor = "cloud-hypervisor"; - vcpu = 4; - mem = 4096; - balloonMem = 2048; - shares = [ - { - source = "/nix/store"; - mountPoint = "/nix/.ro-store"; - tag = "ro-store"; - proto = "virtiofs"; - } - ]; - }; - }; - }; -} diff --git a/machines/krz01/microvm-router01.nix b/machines/krz01/microvm-router01.nix deleted file mode 100644 index 9af9bdf..0000000 --- a/machines/krz01/microvm-router01.nix +++ /dev/null @@ -1,16 +0,0 @@ -_: { - microvm.autostart = [ "router01" ]; - microvm.vms.router01 = { - config = { - networking.hostName = "router01"; - microvm.shares = [ - { - source = "/nix/store"; - mountPoint = "/nix/.ro-store"; - tag = "ro-store"; - proto = "virtiofs"; - } - ]; - }; - }; -} diff --git a/machines/krz01/networking.nix b/machines/krz01/networking.nix deleted file mode 100644 index cc41276..0000000 --- a/machines/krz01/networking.nix +++ /dev/null @@ -1,22 +0,0 @@ -{ - systemd.networknetworks = { - "10-eno1" = { - matchConfig.Name = [ "eno1" ]; - networkConfig = { - Bridge = "vmbr0"; - }; - }; - - "10-vmbr0" = { - matchConfig.Name = "vmbr0"; - linkConfig.RequiredForOnline = "routable"; - }; - }; - - systemd.network.netdevs."vmbr0" = { - netdevConfig = { - Name = "vmbr0"; - Kind = "bridge"; - }; - }; -} diff --git a/machines/krz01/nvidia-tesla-k80.nix b/machines/krz01/nvidia-tesla-k80.nix deleted file mode 100644 index 3d7f6ba..0000000 --- a/machines/krz01/nvidia-tesla-k80.nix +++ /dev/null @@ -1,8 +0,0 @@ -{ config, ... }: -{ - nixpkgs.config.nvidia.acceptLicense = true; - # Tesla K80 is not supported by the latest driver. - hardware.nvidia.package = config.boot.kernelPackages.nvidia_x11_legacy470; - # Don't ask. - services.xserver.videoDrivers = [ "nvidia" ]; -} diff --git a/machines/krz01/ollama/K80-support.patch b/machines/krz01/ollama/K80-support.patch deleted file mode 100644 index d8f0c3a..0000000 --- a/machines/krz01/ollama/K80-support.patch +++ /dev/null @@ -1,179 +0,0 @@ -From 2abd226ff3093c5a9e18a618fba466853e7ebaf7 Mon Sep 17 00:00:00 2001 -From: Raito Bezarius -Date: Tue, 8 Oct 2024 18:27:41 +0200 -Subject: [PATCH] K80 support - -Signed-off-by: Raito Bezarius ---- - docs/development.md | 6 +++- - docs/gpu.md | 1 + - gpu/amd_linux.go | 6 +++- - gpu/gpu.go | 63 ++++++++++++++++++++++++++++++++++++----- - scripts/build_docker.sh | 2 +- - scripts/build_linux.sh | 2 +- - 6 files changed, 69 insertions(+), 11 deletions(-) - -diff --git a/docs/development.md b/docs/development.md -index 2f7b9ecf..9da35931 100644 ---- a/docs/development.md -+++ b/docs/development.md -@@ -51,7 +51,11 @@ Typically the build scripts will auto-detect CUDA, however, if your Linux distro - or installation approach uses unusual paths, you can specify the location by - specifying an environment variable `CUDA_LIB_DIR` to the location of the shared - libraries, and `CUDACXX` to the location of the nvcc compiler. You can customize --a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "50;60;70") -+a set of target CUDA architectures by setting `CMAKE_CUDA_ARCHITECTURES` (e.g. "35;37;50;60;70") -+ -+To support GPUs older than Compute Capability 5.0, you will need to use an older version of -+the Driver from [Unix Driver Archive](https://www.nvidia.com/en-us/drivers/unix/) (tested with 470) and [CUDA Toolkit Archive](https://developer.nvidia.com/cuda-toolkit-archive) (tested with cuda V11). When you build Ollama, you will need to set two environment variable to adjust the minimum compute capability Ollama supports via `export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/gpu.CudaComputeMajorMin=3\" \"-X=github.com/ollama/ollama/gpu.CudaComputeMinorMin=5\"'"` and the `CMAKE_CUDA_ARCHITECTURES`. To find the Compute Capability of your older GPU, refer to [GPU Compute Capability](https://developer.nvidia.com/cuda-gpus). -+ - - Then generate dependencies: - -diff --git a/docs/gpu.md b/docs/gpu.md -index a6b559f0..66627611 100644 ---- a/docs/gpu.md -+++ b/docs/gpu.md -@@ -28,6 +28,7 @@ Check your compute compatibility to see if your card is supported: - | 5.0 | GeForce GTX | `GTX 750 Ti` `GTX 750` `NVS 810` | - | | Quadro | `K2200` `K1200` `K620` `M1200` `M520` `M5000M` `M4000M` `M3000M` `M2000M` `M1000M` `K620M` `M600M` `M500M` | - -+For building locally to support older GPUs, see [developer.md](./development.md#linux-cuda-nvidia) - - ### GPU Selection - -diff --git a/gpu/amd_linux.go b/gpu/amd_linux.go -index 6b08ac2e..768fb97a 100644 ---- a/gpu/amd_linux.go -+++ b/gpu/amd_linux.go -@@ -159,7 +159,11 @@ func AMDGetGPUInfo() []GpuInfo { - return []GpuInfo{} - } - -- if int(major) < RocmComputeMin { -+ minVer, err := strconv.Atoi(RocmComputeMajorMin) -+ if err != nil { -+ slog.Error("invalid RocmComputeMajorMin setting", "value", RocmComputeMajorMin, "error", err) -+ } -+ if int(major) < minVer { - slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch), "gpu", gpuID) - continue - } -diff --git a/gpu/gpu.go b/gpu/gpu.go -index 781e23df..60d68c33 100644 ---- a/gpu/gpu.go -+++ b/gpu/gpu.go -@@ -16,6 +16,7 @@ import ( - "os" - "path/filepath" - "runtime" -+ "strconv" - "strings" - "sync" - "unsafe" -@@ -38,9 +39,11 @@ const ( - var gpuMutex sync.Mutex - - // With our current CUDA compile flags, older than 5.0 will not work properly --var CudaComputeMin = [2]C.int{5, 0} -+// (string values used to allow ldflags overrides at build time) -+var CudaComputeMajorMin = "5" -+var CudaComputeMinorMin = "0" - --var RocmComputeMin = 9 -+var RocmComputeMajorMin = "9" - - // TODO find a better way to detect iGPU instead of minimum memory - const IGPUMemLimit = 1 * format.GibiByte // 512G is what they typically report, so anything less than 1G must be iGPU -@@ -175,11 +178,57 @@ func GetGPUInfo() GpuInfoList { - var memInfo C.mem_info_t - resp := []GpuInfo{} - -- // NVIDIA first -- for i := 0; i < gpuHandles.deviceCount; i++ { -- // TODO once we support CPU compilation variants of GPU libraries refine this... -- if cpuVariant == "" && runtime.GOARCH == "amd64" { -- continue -+ // Load ALL libraries -+ cHandles = initCudaHandles() -+ minMajorVer, err := strconv.Atoi(CudaComputeMajorMin) -+ if err != nil { -+ slog.Error("invalid CudaComputeMajorMin setting", "value", CudaComputeMajorMin, "error", err) -+ } -+ minMinorVer, err := strconv.Atoi(CudaComputeMinorMin) -+ if err != nil { -+ slog.Error("invalid CudaComputeMinorMin setting", "value", CudaComputeMinorMin, "error", err) -+ } -+ -+ // NVIDIA -+ for i := range cHandles.deviceCount { -+ if cHandles.cudart != nil || cHandles.nvcuda != nil { -+ gpuInfo := CudaGPUInfo{ -+ GpuInfo: GpuInfo{ -+ Library: "cuda", -+ }, -+ index: i, -+ } -+ var driverMajor int -+ var driverMinor int -+ if cHandles.cudart != nil { -+ C.cudart_bootstrap(*cHandles.cudart, C.int(i), &memInfo) -+ } else { -+ C.nvcuda_bootstrap(*cHandles.nvcuda, C.int(i), &memInfo) -+ driverMajor = int(cHandles.nvcuda.driver_major) -+ driverMinor = int(cHandles.nvcuda.driver_minor) -+ } -+ if memInfo.err != nil { -+ slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err)) -+ C.free(unsafe.Pointer(memInfo.err)) -+ continue -+ } -+ -+ if int(memInfo.major) < minMajorVer || (int(memInfo.major) == minMajorVer && int(memInfo.minor) < minMinorVer) { -+ slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor)) -+ continue -+ } -+ gpuInfo.TotalMemory = uint64(memInfo.total) -+ gpuInfo.FreeMemory = uint64(memInfo.free) -+ gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) -+ gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor) -+ gpuInfo.MinimumMemory = cudaMinimumMemory -+ gpuInfo.DependencyPath = depPath -+ gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) -+ gpuInfo.DriverMajor = driverMajor -+ gpuInfo.DriverMinor = driverMinor -+ -+ // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... -+ cudaGPUs = append(cudaGPUs, gpuInfo) - } - gpuInfo := GpuInfo{ - Library: "cuda", -diff --git a/scripts/build_docker.sh b/scripts/build_docker.sh -index e91c56ed..c03bc25f 100755 ---- a/scripts/build_docker.sh -+++ b/scripts/build_docker.sh -@@ -3,7 +3,7 @@ - set -eu - - export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} --export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" -+export GOFLAGS=${GOFLAGS:-"'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"} - - # We use 2 different image repositories to handle combining architecture images into multiarch manifest - # (The ROCm image is x86 only and is not a multiarch manifest) -diff --git a/scripts/build_linux.sh b/scripts/build_linux.sh -index 27c4ff1f..e7e6d0dd 100755 ---- a/scripts/build_linux.sh -+++ b/scripts/build_linux.sh -@@ -3,7 +3,7 @@ - set -eu - - export VERSION=${VERSION:-$(git describe --tags --first-parent --abbrev=7 --long --dirty --always | sed -e "s/^v//g")} --export GOFLAGS="'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'" -+export GOFLAGS=${GOFLAGS:-"'-ldflags=-w -s \"-X=github.com/ollama/ollama/version.Version=$VERSION\" \"-X=github.com/ollama/ollama/server.mode=release\"'"} - - BUILD_ARCH=${BUILD_ARCH:-"amd64 arm64"} - export AMDGPU_TARGETS=${AMDGPU_TARGETS:=""} --- -2.46.0 - diff --git a/machines/krz01/ollama/all-nvcc-arch.patch b/machines/krz01/ollama/all-nvcc-arch.patch deleted file mode 100644 index 6696836..0000000 --- a/machines/krz01/ollama/all-nvcc-arch.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 2278389ef9ac9231349440aa68f9544ddc69cdc7 Mon Sep 17 00:00:00 2001 -From: Raito Bezarius -Date: Wed, 9 Oct 2024 13:37:08 +0200 -Subject: [PATCH] fix: sm_37 for nvcc - -Signed-off-by: Raito Bezarius ---- - Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Makefile b/Makefile -index 2ccb750..70dfd9b 100644 ---- a/Makefile -+++ b/Makefile -@@ -537,7 +537,7 @@ endif #GGML_CUDA_NVCC - ifdef CUDA_DOCKER_ARCH - MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) - else ifndef CUDA_POWER_ARCH -- MK_NVCCFLAGS += -arch=native -+ MK_NVCCFLAGS += -arch=sm_37 - endif # CUDA_DOCKER_ARCH - - ifdef GGML_CUDA_FORCE_DMMV --- -2.46.0 - diff --git a/machines/krz01/ollama/default.nix b/machines/krz01/ollama/default.nix deleted file mode 100644 index b0fead5..0000000 --- a/machines/krz01/ollama/default.nix +++ /dev/null @@ -1,20 +0,0 @@ -{ - config, - pkgs, - meta, - name, - ... -}: -{ - services = { - ollama = { - enable = true; - host = meta.network.${name}.netbirdIp; - package = pkgs.callPackage ./package.nix { - cudaPackages = pkgs.cudaPackages_11; - # We need to thread our nvidia x11 driver for CUDA. - extraLibraries = [ config.hardware.nvidia.package ]; - }; - }; - }; -} diff --git a/machines/krz01/ollama/disable-git.patch b/machines/krz01/ollama/disable-git.patch deleted file mode 100644 index c305c48..0000000 --- a/machines/krz01/ollama/disable-git.patch +++ /dev/null @@ -1,20 +0,0 @@ -diff --git c/llm/generate/gen_common.sh i/llm/generate/gen_common.sh -index 3825c155..238a74a7 100644 ---- c/llm/generate/gen_common.sh -+++ i/llm/generate/gen_common.sh -@@ -69,6 +69,7 @@ git_module_setup() { - } - - apply_patches() { -+ return - # apply temporary patches until fix is upstream - for patch in ../patches/*.patch; do - git -c 'user.name=nobody' -c 'user.email=<>' -C ${LLAMACPP_DIR} am ${patch} -@@ -133,6 +134,7 @@ install() { - - # Keep the local tree clean after we're done with the build - cleanup() { -+ return - (cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt) - - if [ -n "$(ls -A ../patches/*.diff)" ]; then diff --git a/machines/krz01/ollama/no-weird-microarch.patch b/machines/krz01/ollama/no-weird-microarch.patch deleted file mode 100644 index 7a93b53..0000000 --- a/machines/krz01/ollama/no-weird-microarch.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 51568b61ef63ecd97867562571411082c32751d3 Mon Sep 17 00:00:00 2001 -From: Raito Bezarius -Date: Wed, 9 Oct 2024 13:36:51 +0200 -Subject: [PATCH] fix: avx & f16c in Makefile - -Signed-off-by: Raito Bezarius ---- - Makefile | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/Makefile b/Makefile -index 32b7cbb..2ccb750 100644 ---- a/Makefile -+++ b/Makefile -@@ -361,12 +361,12 @@ ifndef RISCV - - ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) - # Use all CPU extensions that are available: -- MK_CFLAGS += -march=native -mtune=native -- HOST_CXXFLAGS += -march=native -mtune=native -+ # MK_CFLAGS += -march=native -mtune=native -+ # HOST_CXXFLAGS += -march=native -mtune=native - - # Usage AVX-only -- #MK_CFLAGS += -mfma -mf16c -mavx -- #MK_CXXFLAGS += -mfma -mf16c -mavx -+ MK_CFLAGS += -mf16c -mavx -+ MK_CXXFLAGS += -mf16c -mavx - - # Usage SSSE3-only (Not is SSE3!) - #MK_CFLAGS += -mssse3 --- -2.46.0 - diff --git a/machines/krz01/ollama/package.nix b/machines/krz01/ollama/package.nix deleted file mode 100644 index 6e252c1..0000000 --- a/machines/krz01/ollama/package.nix +++ /dev/null @@ -1,243 +0,0 @@ -{ - lib, - buildGoModule, - fetchFromGitHub, - buildEnv, - linkFarm, - overrideCC, - makeWrapper, - stdenv, - addDriverRunpath, - nix-update-script, - - cmake, - gcc11, - clblast, - libdrm, - rocmPackages, - cudaPackages, - darwin, - autoAddDriverRunpath, - extraLibraries ? [ ], - - nixosTests, - testers, - ollama, - ollama-rocm, - ollama-cuda, - - config, - # one of `[ null false "rocm" "cuda" ]` - acceleration ? null, -}: - -assert builtins.elem acceleration [ - null - false - "rocm" - "cuda" -]; - -let - pname = "ollama"; - version = "2024-09-10-cc35"; - - src = fetchFromGitHub { - owner = "aliotard"; - repo = "ollama"; - rev = "34827c01f7723c7f5f9f5e392fe85f5a4a5d5fc0"; - hash = "sha256-xFNuqcW7YWeyCyw5QLBnCHHTSMITR6LJkJT0CXZC+Y8="; - fetchSubmodules = true; - }; - - vendorHash = "sha256-hSxcREAujhvzHVNwnRTfhi0MKI3s8HNavER2VLz6SYk="; - - validateFallback = lib.warnIf (config.rocmSupport && config.cudaSupport) (lib.concatStrings [ - "both `nixpkgs.config.rocmSupport` and `nixpkgs.config.cudaSupport` are enabled, " - "but they are mutually exclusive; falling back to cpu" - ]) (!(config.rocmSupport && config.cudaSupport)); - shouldEnable = - mode: fallback: (acceleration == mode) || (fallback && acceleration == null && validateFallback); - - rocmRequested = shouldEnable "rocm" config.rocmSupport; - cudaRequested = shouldEnable "cuda" config.cudaSupport; - - enableRocm = rocmRequested && stdenv.isLinux; - enableCuda = cudaRequested && stdenv.isLinux; - - rocmLibs = [ - rocmPackages.clr - rocmPackages.hipblas - rocmPackages.rocblas - rocmPackages.rocsolver - rocmPackages.rocsparse - rocmPackages.rocm-device-libs - rocmPackages.rocm-smi - ]; - rocmClang = linkFarm "rocm-clang" { llvm = rocmPackages.llvm.clang; }; - rocmPath = buildEnv { - name = "rocm-path"; - paths = rocmLibs ++ [ rocmClang ]; - }; - - cudaLibs = [ - cudaPackages.cuda_cudart - cudaPackages.libcublas - cudaPackages.cuda_cccl - ]; - cudaToolkit = buildEnv { - name = "cuda-merged"; - paths = map lib.getLib cudaLibs ++ [ - (lib.getOutput "static" cudaPackages.cuda_cudart) - (lib.getBin (cudaPackages.cuda_nvcc.__spliced.buildHost or cudaPackages.cuda_nvcc)) - ]; - }; - - metalFrameworks = with darwin.apple_sdk_11_0.frameworks; [ - Accelerate - Metal - MetalKit - MetalPerformanceShaders - ]; - - wrapperOptions = - [ - # ollama embeds llama-cpp binaries which actually run the ai models - # these llama-cpp binaries are unaffected by the ollama binary's DT_RUNPATH - # LD_LIBRARY_PATH is temporarily required to use the gpu - # until these llama-cpp binaries can have their runpath patched - "--suffix LD_LIBRARY_PATH : '${addDriverRunpath.driverLink}/lib'" - "--suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath (map lib.getLib extraLibraries)}'" - ] - ++ lib.optionals enableRocm [ - "--suffix LD_LIBRARY_PATH : '${rocmPath}/lib'" - "--set-default HIP_PATH '${rocmPath}'" - ] - ++ lib.optionals enableCuda [ - "--suffix LD_LIBRARY_PATH : '${lib.makeLibraryPath (map lib.getLib cudaLibs)}'" - ]; - wrapperArgs = builtins.concatStringsSep " " wrapperOptions; - - goBuild = - if enableCuda then buildGoModule.override { stdenv = overrideCC stdenv gcc11; } else buildGoModule; - inherit (lib) licenses platforms maintainers; -in -goBuild { - inherit - pname - version - src - vendorHash - ; - - env = - lib.optionalAttrs enableRocm { - ROCM_PATH = rocmPath; - CLBlast_DIR = "${clblast}/lib/cmake/CLBlast"; - } - // lib.optionalAttrs enableCuda { CUDA_LIB_DIR = "${cudaToolkit}/lib"; } - // { - CMAKE_CUDA_ARCHITECTURES = "35;37"; - }; - - nativeBuildInputs = - [ cmake ] - ++ lib.optionals enableRocm [ rocmPackages.llvm.bintools ] - ++ lib.optionals enableCuda [ cudaPackages.cuda_nvcc ] - ++ lib.optionals (enableRocm || enableCuda) [ - makeWrapper - autoAddDriverRunpath - ] - ++ lib.optionals stdenv.isDarwin metalFrameworks; - - buildInputs = - lib.optionals enableRocm (rocmLibs ++ [ libdrm ]) - ++ lib.optionals enableCuda cudaLibs - ++ lib.optionals stdenv.isDarwin metalFrameworks; - - patches = [ - # disable uses of `git` in the `go generate` script - # ollama's build script assumes the source is a git repo, but nix removes the git directory - # this also disables necessary patches contained in `ollama/llm/patches/` - # those patches are applied in `postPatch` - ./disable-git.patch - ]; - - postPatch = '' - # replace inaccurate version number with actual release version - substituteInPlace version/version.go --replace-fail 0.0.0 '${version}' - - # apply ollama's patches to `llama.cpp` submodule - for diff in llm/patches/*; do - patch -p1 -d llm/llama.cpp < $diff - done - ''; - - overrideModAttrs = _: _: { - # don't run llama.cpp build in the module fetch phase - preBuild = ""; - }; - - preBuild = '' - # disable uses of `git`, since nix removes the git directory - export OLLAMA_SKIP_PATCHING=true - # build llama.cpp libraries for ollama - go generate ./... - ''; - - postFixup = - '' - # the app doesn't appear functional at the moment, so hide it - mv "$out/bin/app" "$out/bin/.ollama-app" - '' - + lib.optionalString (enableRocm || enableCuda) '' - # expose runtime libraries necessary to use the gpu - wrapProgram "$out/bin/ollama" ${wrapperArgs} - ''; - - ldflags = [ - "-s" - "-w" - "-X=github.com/ollama/ollama/version.Version=${version}" - "-X=github.com/ollama/ollama/server.mode=release" - "-X=github.com/ollama/ollama/gpu.CudaComputeMajorMin=3" - "-X=github.com/ollama/ollama/gpu.CudaComputeMinorMin=5" - ]; - - passthru = { - tests = - { - inherit ollama; - version = testers.testVersion { - inherit version; - package = ollama; - }; - } - // lib.optionalAttrs stdenv.isLinux { - inherit ollama-rocm ollama-cuda; - service = nixosTests.ollama; - service-cuda = nixosTests.ollama-cuda; - service-rocm = nixosTests.ollama-rocm; - }; - - updateScript = nix-update-script { }; - }; - - meta = { - description = - "Get up and running with large language models locally" - + lib.optionalString rocmRequested ", using ROCm for AMD GPU acceleration" - + lib.optionalString cudaRequested ", using CUDA for NVIDIA GPU acceleration"; - homepage = "https://github.com/ollama/ollama"; - changelog = "https://github.com/ollama/ollama/releases/tag/v${version}"; - license = licenses.mit; - platforms = if (rocmRequested || cudaRequested) then platforms.linux else platforms.unix; - mainProgram = "ollama"; - maintainers = with maintainers; [ - abysssol - dit7ya - elohmeier - roydubnium - ]; - }; -} diff --git a/machines/krz01/proxmox/default.nix b/machines/krz01/proxmox/default.nix deleted file mode 100644 index 650163f..0000000 --- a/machines/krz01/proxmox/default.nix +++ /dev/null @@ -1,12 +0,0 @@ -{ sources, ... }: -let - proxmox-nixos = import sources.proxmox-nixos; -in -{ - imports = [ proxmox-nixos.nixosModules.proxmox-ve ]; - services.proxmox-ve = { - enable = true; - openFirewall = false; - }; - nixpkgs.overlays = [ proxmox-nixos.overlays.x86_64-linux ]; -} diff --git a/machines/krz01/secrets/secrets.nix b/machines/krz01/secrets/secrets.nix deleted file mode 100644 index 45004b9..0000000 --- a/machines/krz01/secrets/secrets.nix +++ /dev/null @@ -1,3 +0,0 @@ -(import ../../../keys).mkSecrets [ "krz01" ] [ - # List of secrets for krz01 -] diff --git a/machines/krz01/whisper/all-nvcc-arch.patch b/machines/krz01/whisper/all-nvcc-arch.patch deleted file mode 100644 index 6696836..0000000 --- a/machines/krz01/whisper/all-nvcc-arch.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 2278389ef9ac9231349440aa68f9544ddc69cdc7 Mon Sep 17 00:00:00 2001 -From: Raito Bezarius -Date: Wed, 9 Oct 2024 13:37:08 +0200 -Subject: [PATCH] fix: sm_37 for nvcc - -Signed-off-by: Raito Bezarius ---- - Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Makefile b/Makefile -index 2ccb750..70dfd9b 100644 ---- a/Makefile -+++ b/Makefile -@@ -537,7 +537,7 @@ endif #GGML_CUDA_NVCC - ifdef CUDA_DOCKER_ARCH - MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) - else ifndef CUDA_POWER_ARCH -- MK_NVCCFLAGS += -arch=native -+ MK_NVCCFLAGS += -arch=sm_37 - endif # CUDA_DOCKER_ARCH - - ifdef GGML_CUDA_FORCE_DMMV --- -2.46.0 - diff --git a/machines/krz01/whisper/default.nix b/machines/krz01/whisper/default.nix deleted file mode 100644 index 689b60f..0000000 --- a/machines/krz01/whisper/default.nix +++ /dev/null @@ -1,25 +0,0 @@ -{ pkgs, ... }: -{ - environment.systemPackages = [ - ((pkgs.openai-whisper-cpp.override { cudaPackages = pkgs.cudaPackages_11; }).overrideAttrs (old: { - src = pkgs.fetchFromGitHub { - owner = "ggerganov"; - repo = "whisper.cpp"; - rev = "v1.7.1"; - hash = "sha256-EDFUVjud79ZRCzGbOh9L9NcXfN3ikvsqkVSOME9F9oo="; - }; - env = { - WHISPER_CUBLAS = ""; - GGML_CUDA = "1"; - }; - # We only need Compute Capability 3.7. - CUDA_ARCH_FLAGS = [ "sm_37" ]; - # We are GPU-only anyway. - patches = (old.patches or [ ]) ++ [ - ./no-weird-microarch.patch - ./all-nvcc-arch.patch - ]; - })) - ]; - -} diff --git a/machines/krz01/whisper/no-weird-microarch.patch b/machines/krz01/whisper/no-weird-microarch.patch deleted file mode 100644 index 7a93b53..0000000 --- a/machines/krz01/whisper/no-weird-microarch.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 51568b61ef63ecd97867562571411082c32751d3 Mon Sep 17 00:00:00 2001 -From: Raito Bezarius -Date: Wed, 9 Oct 2024 13:36:51 +0200 -Subject: [PATCH] fix: avx & f16c in Makefile - -Signed-off-by: Raito Bezarius ---- - Makefile | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/Makefile b/Makefile -index 32b7cbb..2ccb750 100644 ---- a/Makefile -+++ b/Makefile -@@ -361,12 +361,12 @@ ifndef RISCV - - ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) - # Use all CPU extensions that are available: -- MK_CFLAGS += -march=native -mtune=native -- HOST_CXXFLAGS += -march=native -mtune=native -+ # MK_CFLAGS += -march=native -mtune=native -+ # HOST_CXXFLAGS += -march=native -mtune=native - - # Usage AVX-only -- #MK_CFLAGS += -mfma -mf16c -mavx -- #MK_CXXFLAGS += -mfma -mf16c -mavx -+ MK_CFLAGS += -mf16c -mavx -+ MK_CXXFLAGS += -mf16c -mavx - - # Usage SSSE3-only (Not is SSE3!) - #MK_CFLAGS += -mssse3 --- -2.46.0 - diff --git a/meta/network.nix b/meta/network.nix index b72f4d5..ffb3638 100644 --- a/meta/network.nix +++ b/meta/network.nix @@ -29,30 +29,6 @@ netbirdIp = "100.80.75.197"; }; - krz01 = { - interfaces = { - # see also machines/krz01/networking.nix - vmbr0 = { - ipv4 = [ - { - address = "129.199.146.21"; - prefixLength = 24; - } - { - address = "192.168.1.145"; - prefixLength = 24; - } - ]; - - gateways = [ "129.199.146.254" ]; - enableDefaultDNS = true; - }; - }; - - hostId = "bd11e8fc"; - netbirdIp = "100.80.103.206"; - }; - geo01 = { interfaces = { eno1 = { diff --git a/meta/nodes.nix b/meta/nodes.nix index 4d2b679..d0ef82d 100644 --- a/meta/nodes.nix +++ b/meta/nodes.nix @@ -80,17 +80,6 @@ nixpkgs = "24.05"; }; - krz01 = { - site = "pav01"; - - hashedPassword = "$y$j9T$eNZQgDN.J5y7KTG2hXgat1$J1i5tjx5dnSZu.C9B7swXi5zMFIkUnmRrnmyLHFAt8/"; - - stateVersion = "24.05"; - nixpkgs = "unstable"; - - adminGroups = [ "lab" ]; - }; - storage01 = { site = "pav01";