From e8838713df29d94529ea4d7865180e936393340b Mon Sep 17 00:00:00 2001 From: aszlig Date: Fri, 11 Nov 2016 04:59:48 +0100 Subject: [PATCH 1/6] Run builds as root in user namespace again This reverts commit ff0c0b645cc1448959126185bb2fafe41cf0bddf. We're going to use seccomp to allow "cp -p" and force chown-related syscalls to always return 0. Signed-off-by: aszlig --- src/libstore/build.cc | 45 +++++++++++++++---------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 61286ceac..ed9d48378 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -811,9 +811,6 @@ private: result. */ ValidPathInfos prevInfos; - const uid_t sandboxUid = 1000; - const gid_t sandboxGid = 100; - public: DerivationGoal(const Path & drvPath, const StringSet & wantedOutputs, Worker & worker, BuildMode buildMode = bmNormal); @@ -1956,18 +1953,14 @@ void DerivationGoal::startBuilder() createDirs(chrootRootDir + "/etc"); writeFile(chrootRootDir + "/etc/passwd", - (format( - "root:x:0:0:Nix build user:/:/noshell\n" - "nixbld:x:%1%:%2%:Nix build user:/:/noshell\n" - "nobody:x:65534:65534:Nobody:/:/noshell\n") % sandboxUid % sandboxGid).str()); + "root:x:0:0:Nix build user:/:/noshell\n" + "nobody:x:65534:65534:Nobody:/:/noshell\n"); /* Declare the build user's group so that programs get a consistent view of the system (e.g., "id -gn"). */ writeFile(chrootRootDir + "/etc/group", - (format( - "root:x:0:\n" - "nixbld:!:%1%:\n" - "nogroup:x:65534:\n") % sandboxGid).str()); + "root:x:0:\n" + "nobody:x:65534:\n"); /* Create /etc/hosts with localhost entry. */ if (!fixedOutput) @@ -2149,12 +2142,7 @@ void DerivationGoal::startBuilder() Pid helper = startProcess([&]() { /* Drop additional groups here because we can't do it - after we've created the new user namespace. FIXME: - this means that if we're not root in the parent - namespace, we can't drop additional groups; they will - be mapped to nogroup in the child namespace. There does - not seem to be a workaround for this. (But who can tell - from reading user_namespaces(7)?)*/ + after we've created the new user namespace. */ if (getuid() == 0 && setgroups(0, 0) == -1) throw SysError("setgroups failed"); @@ -2187,19 +2175,19 @@ void DerivationGoal::startBuilder() if (!string2Int(readLine(builderOut.readSide.get()), tmp)) abort(); pid = tmp; - /* Set the UID/GID mapping of the builder's user namespace - such that the sandbox user maps to the build user, or to - the calling user (if build users are disabled). */ - uid_t hostUid = buildUser.enabled() ? buildUser.getUID() : getuid(); - uid_t hostGid = buildUser.enabled() ? buildUser.getGID() : getgid(); + /* Set the UID/GID mapping of the builder's user + namespace such that root maps to the build user, or to the + calling user (if build users are disabled). */ + uid_t targetUid = buildUser.enabled() ? buildUser.getUID() : getuid(); + uid_t targetGid = buildUser.enabled() ? buildUser.getGID() : getgid(); writeFile("/proc/" + std::to_string(pid) + "/uid_map", - (format("%d %d 1") % sandboxUid % hostUid).str()); + (format("0 %d 1") % targetUid).str()); writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/gid_map", - (format("%d %d 1") % sandboxGid % hostGid).str()); + (format("0 %d 1") % targetGid).str()); /* Signal the builder that we've updated its user namespace. */ @@ -2409,12 +2397,11 @@ void DerivationGoal::runChild() if (rmdir("real-root") == -1) throw SysError("cannot remove real-root directory"); - /* Switch to the sandbox uid/gid in the user namespace, - which corresponds to the build user or calling user in - the parent namespace. */ - if (setgid(sandboxGid) == -1) + /* Become root in the user namespace, which corresponds to + the build user or calling user in the parent namespace. */ + if (setgid(0) == -1) throw SysError("setgid failed"); - if (setuid(sandboxUid) == -1) + if (setuid(0) == -1) throw SysError("setuid failed"); setUser = false; From 1c52e344c48e9cb8cf2b332201d5c96c06e4cf3e Mon Sep 17 00:00:00 2001 From: aszlig Date: Wed, 16 Nov 2016 12:30:11 +0100 Subject: [PATCH 2/6] Add build dependency for libseccomp We're going to use libseccomp instead of creating the raw BPF program, because we have different syscall numbers on different architectures. Although our initial seccomp rules will be quite small it really doesn't make sense to generate the raw BPF program because we need to duplicate it and/or make branches on every single architecture we want to suuport. Signed-off-by: aszlig --- Makefile.config.in | 1 + configure.ac | 9 +++++++++ release.nix | 3 ++- src/libstore/local.mk | 4 ++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/Makefile.config.in b/Makefile.config.in index 2db7172b1..57f1f3e77 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -10,6 +10,7 @@ OPENSSL_LIBS = @OPENSSL_LIBS@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ SODIUM_LIBS = @SODIUM_LIBS@ +LIBSECCOMP_LIBS = @LIBSECCOMP_LIBS@ LIBLZMA_LIBS = @LIBLZMA_LIBS@ SQLITE3_LIBS = @SQLITE3_LIBS@ bash = @bash@ diff --git a/configure.ac b/configure.ac index 91ed9947a..1a5ad660a 100644 --- a/configure.ac +++ b/configure.ac @@ -194,6 +194,15 @@ AC_SUBST(HAVE_SODIUM, [$have_sodium]) PKG_CHECK_MODULES([LIBLZMA], [liblzma], [CXXFLAGS="$LIBLZMA_CFLAGS $CXXFLAGS"]) +# Look for libseccomp, required for Linux sandboxing. +if test "$sys_name" = linux; then + PKG_CHECK_MODULES([LIBSECCOMP], [libseccomp], + [CXXFLAGS="$LIBSECCOMP_CFLAGS $CXXFLAGS"]) +# AC_CHECK_LIB([seccomp], [seccomp_init], [true], +# [AC_MSG_ERROR([Nix requires libseccomp for sandboxing. See https://github.com/seccomp/libseccomp.])]) +fi + + # Look for aws-cpp-sdk-s3. AC_LANG_PUSH(C++) AC_CHECK_HEADERS([aws/s3/S3Client.h], diff --git a/release.nix b/release.nix index 6b16bc718..fbed401df 100644 --- a/release.nix +++ b/release.nix @@ -25,7 +25,7 @@ let buildInputs = [ curl bison flex perl libxml2 libxslt bzip2 xz - pkgconfig sqlite libsodium boehmgc + pkgconfig sqlite libsodium libseccomp boehmgc docbook5 docbook5_xsl autoconf-archive ] ++ lib.optional (!lib.inNixShell) git; @@ -75,6 +75,7 @@ let buildInputs = [ curl perl bzip2 xz openssl pkgconfig sqlite boehmgc ] ++ lib.optional stdenv.isLinux libsodium + ++ lib.optional stdenv.isLinux libseccomp ++ lib.optional stdenv.isLinux (aws-sdk-cpp.override { apis = ["s3"]; diff --git a/src/libstore/local.mk b/src/libstore/local.mk index 9d5c04dca..a8222025c 100644 --- a/src/libstore/local.mk +++ b/src/libstore/local.mk @@ -18,6 +18,10 @@ ifeq ($(OS), SunOS) libstore_LDFLAGS += -lsocket endif +ifeq ($(OS), Linux) + libstore_LDFLAGS += -lseccomp +endif + libstore_CXXFLAGS = \ -DNIX_PREFIX=\"$(prefix)\" \ -DNIX_STORE_DIR=\"$(storedir)\" \ From b90a43533249a50f238a5e6cc9d77edb0fe6d748 Mon Sep 17 00:00:00 2001 From: aszlig Date: Wed, 16 Nov 2016 12:33:42 +0100 Subject: [PATCH 3/6] libstore/build: Forge chown() to return success What we basically want is a seccomp mode 2 BPF program like this but for every architecture: BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_chown, 4, 0), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_fchown, 3, 0), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_fchownat, 2, 0), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, __NR_lchown, 1, 0), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO) However, on 32 bit architectures we do have chown32, lchown32 and fchown32, so we'd need to add all the architecture blurb which libseccomp handles for us. So we only need to make sure that we add the 32bit seccomp arch while we're on x86_64 and otherwise we just stay at the native architecture which was set during seccomp_init(), which more or less replicates setting 32bit personality during runChild(). The FORCE_SUCCESS() macro here could be a bit less ugly but I think repeating the seccomp_rule_add() all over the place is way uglier. Another way would have been to create a vector of syscalls to iterate over, but that would make error messages uglier because we can either only print the (libseccomp-internal) syscall number or use seccomp_syscall_resolve_num_arch() to get the name or even make the vector a pair number/name, essentially duplicating everything again. Signed-off-by: aszlig --- src/libstore/build.cc | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index ed9d48378..6c6d0dee3 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -54,6 +54,7 @@ #include #include #include +#include #define pivot_root(new_root, put_old) (syscall(SYS_pivot_root, new_root, put_old)) #endif @@ -1632,8 +1633,48 @@ void chmod_(const Path & path, mode_t mode) } +#if __linux__ + +#define FORCE_SUCCESS(syscall) \ + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(0), SCMP_SYS(syscall), 0) != 0) { \ + seccomp_release(ctx); \ + throw SysError("unable to add seccomp rule for " #syscall); \ + } + +void setupSeccomp(void) { + scmp_filter_ctx ctx; + + if ((ctx = seccomp_init(SCMP_ACT_ALLOW)) == NULL) + throw SysError("unable to initialize seccomp mode 2"); + +#if defined(__x86_64__) + if (seccomp_arch_add(ctx, SCMP_ARCH_X86) != 0) { + seccomp_release(ctx); + throw SysError("unable to add 32bit seccomp architecture"); + } +#endif + + FORCE_SUCCESS(chown); + FORCE_SUCCESS(fchown); + FORCE_SUCCESS(fchownat); + FORCE_SUCCESS(lchown); + + if (seccomp_load(ctx) != 0) { + seccomp_release(ctx); + throw SysError("unable to load seccomp BPF program"); + } + + seccomp_release(ctx); +} + +#undef FORCE_SUCCESS + +#endif + + int childEntry(void * arg) { + setupSeccomp(); ((DerivationGoal *) arg)->runChild(); return 1; } From 651a18dd2466662e7027e4dc04147e4f38c7bbf8 Mon Sep 17 00:00:00 2001 From: aszlig Date: Wed, 16 Nov 2016 12:46:43 +0100 Subject: [PATCH 4/6] release.nix: Add a test for sandboxing Right now it only tests whether seccomp correctly forges the return value of chown, but the long-term goal is to test the full sandboxing functionality at some point in the future. Signed-off-by: aszlig --- release.nix | 4 ++++ tests/sandbox.nix | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/sandbox.nix diff --git a/release.nix b/release.nix index fbed401df..958460d6f 100644 --- a/release.nix +++ b/release.nix @@ -200,6 +200,10 @@ let nix = build.x86_64-linux; system = "x86_64-linux"; }); + tests.sandbox = (import ./tests/sandbox.nix rec { + nix = build.x86_64-linux; system = "x86_64-linux"; + }); + tests.binaryTarball = with import { system = "x86_64-linux"; }; vmTools.runInLinuxImage (runCommand "nix-binary-tarball-test" diff --git a/tests/sandbox.nix b/tests/sandbox.nix new file mode 100644 index 000000000..7e2055038 --- /dev/null +++ b/tests/sandbox.nix @@ -0,0 +1,53 @@ +# Test Nix builder sandbox. + +{ system, nix }: + +with import { inherit system; }; + +let + mkUtils = pkgs: pkgs.buildEnv { + name = "sandbox-utils"; + paths = [ pkgs.coreutils pkgs.utillinux pkgs.bash ]; + pathsToLink = [ "/bin" "/sbin" ]; + }; + + utils32 = mkUtils pkgs.pkgsi686Linux; + utils64 = mkUtils pkgs; + + sandboxTestScript = pkgs.writeText "sandbox-testscript.sh" '' + [ $(id -u) -eq 0 ] + touch foo + chown 1024:1024 foo + touch "$out" + ''; + + testExpr = arch: pkgs.writeText "sandbox-test.nix" '' + let + utils = builtins.storePath + ${if arch == "i686-linux" then utils32 else utils64}; + in derivation { + name = "sandbox-test"; + system = "${arch}"; + builder = "''${utils}/bin/bash"; + args = ["-e" ${sandboxTestScript}]; + PATH = "''${utils}/bin"; + } + ''; + +in makeTest { + name = "nix-sandbox"; + + machine = { pkgs, ... }: { + nix.package = nix; + nix.useSandbox = true; + nix.binaryCaches = []; + virtualisation.writableStore = true; + virtualisation.pathsInNixDB = [ utils32 utils64 ]; + }; + + testScript = '' + $machine->waitForUnit("multi-user.target"); + $machine->succeed("nix-build ${testExpr "x86_64-linux"}"); + $machine->succeed("nix-build ${testExpr "i686-linux"}"); + ''; +} From ed64976cec43f9f067a40fc6921b5513a19fd757 Mon Sep 17 00:00:00 2001 From: aszlig Date: Wed, 16 Nov 2016 17:25:00 +0100 Subject: [PATCH 5/6] seccomp: Forge return codes for POSIX ACL syscalls Commands such as "cp -p" also use fsetxattr() in addition to fchown(), so we need to make sure these syscalls always return successful as well in order to avoid nasty "Invalid value" errors. Signed-off-by: aszlig --- src/libstore/build.cc | 4 ++++ tests/sandbox.nix | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 6c6d0dee3..6fc6220e0 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -1659,6 +1659,10 @@ void setupSeccomp(void) { FORCE_SUCCESS(fchownat); FORCE_SUCCESS(lchown); + FORCE_SUCCESS(setxattr); + FORCE_SUCCESS(lsetxattr); + FORCE_SUCCESS(fsetxattr); + if (seccomp_load(ctx) != 0) { seccomp_release(ctx); throw SysError("unable to load seccomp BPF program"); diff --git a/tests/sandbox.nix b/tests/sandbox.nix index 7e2055038..dc72a5985 100644 --- a/tests/sandbox.nix +++ b/tests/sandbox.nix @@ -16,7 +16,7 @@ let sandboxTestScript = pkgs.writeText "sandbox-testscript.sh" '' [ $(id -u) -eq 0 ] - touch foo + cp -p "$testfile" foo chown 1024:1024 foo touch "$out" ''; @@ -31,6 +31,7 @@ let builder = "''${utils}/bin/bash"; args = ["-e" ${sandboxTestScript}]; PATH = "''${utils}/bin"; + testfile = builtins.toFile "test" "i am a test file"; } ''; From 4e1a2cd537b5b910937499c544043ddac291843e Mon Sep 17 00:00:00 2001 From: aszlig Date: Wed, 16 Nov 2016 17:27:20 +0100 Subject: [PATCH 6/6] seccomp: Forge return values for *chown32 These syscalls are only available in 32bit architectures, but libseccomp should handle them correctly even if we're on native architectures that do not have these syscalls. Signed-off-by: aszlig --- src/libstore/build.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index 6fc6220e0..b411b7de7 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -1654,6 +1654,10 @@ void setupSeccomp(void) { } #endif + FORCE_SUCCESS(chown32); + FORCE_SUCCESS(fchown32); + FORCE_SUCCESS(lchown32); + FORCE_SUCCESS(chown); FORCE_SUCCESS(fchown); FORCE_SUCCESS(fchownat);