refactor(tvix): completely remove boehm gc

We have decided that leaking memory is a better fate than random,
non-debuggable memory corruption. Future CLs will begin changing
various fields to std::unique_ptr and std::shared_ptr.

It turns out that disabling the GC does not have disasterous impact.
The Nix evaluator only runs on the client CLI, never in any long-
running process. Even the REPL does not leak too badly under this
change, because it uses one EvalState for the duration of the REPL.

Building an explicitly tracing garbage collector is likely in the
future of this project, but that giant amount of work cannot be
done under a nix evaluator that is constantly crashing. We need to
restore development velocity here, and this is the best way we've
figured out to do it.

Change-Id: I2fcda8fcee853c15a9a5e22eca7c5a784bc2bf76
Reviewed-on: https://cl.tvl.fyi/c/depot/+/1720
Reviewed-by: glittershark <grfn@gws.fyi>
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
This commit is contained in:
Kane York 2020-08-13 15:18:40 -07:00 committed by kanepyork
parent d4f5fcef66
commit 72e61aa584
15 changed files with 51 additions and 170 deletions

View file

@ -12,12 +12,6 @@ let
customMemoryManagement = false;
};
# TODO(tazjin): this is copied from the original derivation, but what
# is it for?
largeBoehm = pkgs.boehmgc.override {
enableLargeConfig = true;
};
src = ./.;
# Proto generation in CMake is theoretically possible, but that is
@ -84,7 +78,6 @@ in lib.fix (self: pkgs.llvmPackages.libcxxStdenv.mkDerivation {
propagatedBuildInputs = with pkgs; [
boost
largeBoehm
];
configurePhase = ''

View file

@ -75,8 +75,6 @@ target_link_libraries(nixexpr
absl::flat_hash_set
absl::node_hash_set
absl::strings
gc
gccpp
)
configure_file("nix-expr.pc.in" "${PROJECT_BINARY_DIR}/nix-expr.pc" @ONLY)

View file

@ -3,7 +3,6 @@
#include <new>
#include <absl/container/btree_map.h>
#include <gc/gc_cpp.h>
#include <glog/logging.h>
#include "libexpr/eval-inline.hh"
@ -61,7 +60,7 @@ Bindings* Bindings::NewGC(size_t capacity) {
return &ZERO_BINDINGS;
}
return new (GC) Bindings;
return new Bindings;
}
Bindings* Bindings::Merge(const Bindings& lhs, const Bindings& rhs) {

View file

@ -2,7 +2,6 @@
#pragma once
#include <absl/container/btree_map.h>
#include <gc/gc_allocator.h>
#include "libexpr/nixexpr.hh"
#include "libexpr/symbol-table.hh"
@ -14,7 +13,7 @@ class EvalState;
struct Value;
/* Map one attribute name to its value. */
struct Attr : public gc {
struct Attr {
Symbol name;
Value* value; // TODO(tazjin): Who owns this?
Pos* pos; // TODO(tazjin): Who owns this?
@ -22,13 +21,9 @@ struct Attr : public gc {
: name(name), value(value), pos(pos){};
};
// Convenience alias for the backing map, with the garbage-collecting
// allocator explicitly specified.
using AttributeMap =
absl::btree_map<Symbol, Attr, std::less<Symbol>,
traceable_allocator<std::pair<const Symbol, Attr>>>;
using AttributeMap = absl::btree_map<Symbol, Attr>;
class Bindings : public gc {
class Bindings {
public:
typedef AttributeMap::iterator iterator;

View file

@ -11,13 +11,9 @@
#include <optional>
#include <variant>
#define GC_INCLUDE_NEW
#include <absl/base/call_once.h>
#include <absl/container/flat_hash_set.h>
#include <absl/strings/match.h>
#include <gc/gc.h>
#include <gc/gc_cpp.h>
#include <glog/logging.h>
#include <sys/resource.h>
#include <sys/time.h>
@ -38,52 +34,7 @@
namespace nix {
namespace {
// Called when the Boehm GC runs out of memory.
static void* BoehmOomHandler(size_t requested) {
/* Convert this to a proper C++ exception. */
LOG(FATAL) << "Garbage collector ran out of memory; requested " << requested
<< " bytes";
throw std::bad_alloc();
}
void ConfigureBoehmGc() {
/* Don't look for interior pointers. This reduces the odds of
misdetection a bit. */
GC_set_all_interior_pointers(0);
/* We don't have any roots in data segments, so don't scan from
there. */
GC_set_no_dls(1);
GC_INIT();
GC_set_oom_fn(BoehmOomHandler);
/* Set the initial heap size to something fairly big (25% of
physical RAM, up to a maximum of 384 MiB) so that in most cases
we don't need to garbage collect at all. (Collection has a
fairly significant overhead.) The heap size can be overridden
through libgc's GC_INITIAL_HEAP_SIZE environment variable. We
should probably also provide a nix.conf setting for this. Note
that GC_expand_hp() causes a lot of virtual, but not physical
(resident) memory to be allocated. This might be a problem on
systems that don't overcommit. */
if (getenv("GC_INITIAL_HEAP_SIZE") == nullptr) {
size_t size = 32 * 1024 * 1024;
#if HAVE_SYSCONF && defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
size_t maxSize = 384 * 1024 * 1024;
long pageSize = sysconf(_SC_PAGESIZE);
long pages = sysconf(_SC_PHYS_PAGES);
if (pageSize != -1) {
size = (pageSize * pages) / 4;
} // 25% of RAM
if (size > maxSize) {
size = maxSize;
}
#endif
DLOG(INFO) << "setting initial heap size to " << size << " bytes";
GC_expand_hp(size);
}
void ConfigureGc() { /* This function intentionally left blank. */
}
} // namespace
@ -92,13 +43,13 @@ namespace expr {
absl::once_flag gc_flag;
void InitGC() { absl::call_once(gc_flag, &ConfigureBoehmGc); }
void InitGC() { absl::call_once(gc_flag, &ConfigureGc); }
} // namespace expr
static char* dupString(const char* s) {
char* t;
t = GC_STRDUP(s);
t = strdup(s);
if (t == nullptr) {
throw std::bad_alloc();
}
@ -106,7 +57,7 @@ static char* dupString(const char* s) {
}
std::shared_ptr<Value*> allocRootValue(Value* v) {
return std::allocate_shared<Value*>(traceable_allocator<Value*>(), v);
return std::make_shared<Value*>(v);
}
static void printValue(std::ostream& str, std::set<const Value*>& active,
@ -489,7 +440,7 @@ Value* EvalState::addPrimOp(const std::string& name, size_t arity,
Symbol sym = symbols.Create(name2);
// Even though PrimOp doesn't need tracing, it needs to be collected.
v->type = tPrimOp;
v->primOp = new (GC) PrimOp(primOp, arity, sym);
v->primOp = new PrimOp(primOp, arity, sym);
staticBaseEnv.vars[symbols.Create(name)] = baseEnvDispl;
baseEnv.values[baseEnvDispl++] = v;
baseEnv.values[0]->attrs->push_back(Attr(sym, v));
@ -631,7 +582,7 @@ inline Value* EvalState::lookupVar(Env* env, const ExprVar& var, bool noEval) {
Value* EvalState::allocValue() {
nrValues++;
return new (GC) Value;
return new Value;
}
Env& EvalState::allocEnv(size_t size) {
@ -655,7 +606,7 @@ void EvalState::mkList(Value& v, NixList* list) {
}
void EvalState::mkList(Value& v, size_t size) {
EvalState::mkList(v, new (GC) NixList(size));
EvalState::mkList(v, new NixList(size));
}
unsigned long nrThunks = 0;
@ -1262,7 +1213,7 @@ void ExprOpConcatLists::eval(EvalState& state, Env& env, Value& v) {
void EvalState::concatLists(Value& v, const NixList& lists, const Pos& pos) {
nrListConcats++;
auto outlist = new (GC) NixList();
auto outlist = new NixList();
for (Value* list : lists) {
forceList(*list, pos);
@ -1344,10 +1295,7 @@ void ExprPos::eval(EvalState& state, Env& env, Value& v) {
}
template <typename T>
using traceable_flat_hash_set =
absl::flat_hash_set<T, absl::container_internal::hash_default_hash<T>,
absl::container_internal::hash_default_eq<T>,
traceable_allocator<T>>;
using traceable_flat_hash_set = absl::flat_hash_set<T>;
void EvalState::forceValueDeep(Value& v) {
traceable_flat_hash_set<const Value*> seen;
@ -1713,11 +1661,6 @@ void EvalState::printStats() {
uint64_t bAttrsets =
nrAttrsets * sizeof(Bindings) + nrAttrsInAttrsets * sizeof(Attr);
#if HAVE_BOEHMGC
GC_word heapSize;
GC_word totalBytes;
GC_get_heap_usage_safe(&heapSize, nullptr, nullptr, nullptr, &totalBytes);
#endif
if (showStats) {
auto outPath = getEnv("NIX_SHOW_STATS_PATH", "-");
std::fstream fs;
@ -1768,13 +1711,6 @@ void EvalState::printStats() {
topObj.attr("nrLookups", nrLookups);
topObj.attr("nrPrimOpCalls", nrPrimOpCalls);
topObj.attr("nrFunctionCalls", nrFunctionCalls);
#if HAVE_BOEHMGC
{
auto gc = topObj.object("gc");
gc.attr("heapSize", heapSize);
gc.attr("totalBytes", totalBytes);
}
#endif
if (countCalls) {
{

View file

@ -5,9 +5,6 @@
#include <unordered_map>
#include <vector>
#include <gc/gc_allocator.h>
#include <gc/gc_cpp.h>
#include "libexpr/attr-set.hh"
#include "libexpr/nixexpr.hh"
#include "libexpr/symbol-table.hh"
@ -39,16 +36,14 @@ struct PrimOp {
: fun(fun), arity(arity), name(name) {}
};
struct Env : public gc {
Env(unsigned short size) : size(size) {
values = std::vector<Value*, traceable_allocator<Value*>>(size);
}
struct Env {
Env(unsigned short size) : size(size) { values = std::vector<Value*>(size); }
Env* up;
unsigned short size; // used by valueSize
unsigned short prevWith : 14; // nr of levels up to next `with' environment
enum { Plain = 0, HasWithExpr, HasWithAttrs } type : 2;
std::vector<Value*, traceable_allocator<Value*>> values;
std::vector<Value*> values;
Expr* withAttrsExpr = nullptr;
};
@ -63,14 +58,12 @@ typedef std::map<Path, Path> SrcToStore;
std::ostream& operator<<(std::ostream& str, const Value& v);
typedef std::pair<std::string, std::string> SearchPathElem;
typedef std::list<SearchPathElem> SearchPath;
using SearchPathElem = std::pair<std::string, std::string>;
using SearchPath = std::list<SearchPathElem>;
typedef std::map<Path, Expr*, std::less<Path>,
traceable_allocator<std::pair<const Path, Expr*>>>
FileParseCache;
using FileParseCache = std::map<Path, Expr*>;
class EvalState : public gc {
class EvalState {
public:
SymbolTable symbols;
@ -100,9 +93,7 @@ class EvalState : public gc {
FileParseCache fileParseCache;
/* A cache from path names to values. */
typedef std::map<Path, Value, std::less<Path>,
traceable_allocator<std::pair<const Path, Value>>>
FileEvalCache;
using FileEvalCache = std::map<Path, Value>;
FileEvalCache fileEvalCache;
SearchPath searchPath;

View file

@ -7,7 +7,7 @@
namespace nix {
struct DrvInfo : public gc {
struct DrvInfo {
public:
typedef std::map<std::string, Path> Outputs;
@ -67,11 +67,7 @@ struct DrvInfo : public gc {
bool hasFailed() { return failed; };
};
#if HAVE_BOEHMGC
typedef std::list<DrvInfo, traceable_allocator<DrvInfo> > DrvInfos;
#else
typedef std::list<DrvInfo> DrvInfos;
#endif
/* If value `v' denotes a derivation, return a DrvInfo object
describing it. Otherwise return nothing. */

View file

@ -73,19 +73,16 @@ struct StaticEnv;
/* An attribute path is a sequence of attribute names. */
using AttrName = std::variant<Symbol, Expr*>;
using AttrPath = std::vector<AttrName>;
using AttrNameVector = std::vector<AttrName>;
typedef std::vector<AttrName, traceable_allocator<AttrName>> AttrPath;
using AttrNameVector =
std::vector<nix::AttrName, traceable_allocator<nix::AttrName>>;
using VectorExprs = std::vector<nix::Expr*, traceable_allocator<nix::Expr*>>;
using VectorExprs = std::vector<nix::Expr*>;
std::string showAttrPath(const AttrPath& attrPath);
/* Abstract syntax of Nix expressions. */
struct Expr : public gc {
struct Expr {
virtual ~Expr(){};
virtual void show(std::ostream& str) const;
virtual void bindVars(const StaticEnv& env);
@ -191,7 +188,7 @@ struct ExprOpHasAttr : Expr {
struct ExprAttrs : Expr {
bool recursive;
struct AttrDef : public gc {
struct AttrDef {
bool inherited;
Expr* e;
Pos pos;
@ -201,22 +198,17 @@ struct ExprAttrs : Expr {
AttrDef(){};
};
typedef absl::flat_hash_map<
Symbol, AttrDef, absl::container_internal::hash_default_hash<Symbol>,
absl::container_internal::hash_default_eq<Symbol>,
traceable_allocator<std::pair<const Symbol, AttrDef>>>
AttrDefs;
using AttrDefs = absl::flat_hash_map<Symbol, AttrDef>;
AttrDefs attrs;
struct DynamicAttrDef : public gc {
struct DynamicAttrDef {
Expr *nameExpr, *valueExpr;
Pos pos;
DynamicAttrDef(Expr* nameExpr, Expr* valueExpr, const Pos& pos)
: nameExpr(nameExpr), valueExpr(valueExpr), pos(pos){};
};
typedef std::vector<DynamicAttrDef, traceable_allocator<DynamicAttrDef>>
DynamicAttrDefs;
using DynamicAttrDefs = std::vector<DynamicAttrDef>;
DynamicAttrDefs dynamicAttrs;
ExprAttrs() : recursive(false){};
@ -229,15 +221,15 @@ struct ExprList : Expr {
COMMON_METHODS
};
struct Formal : public gc {
struct Formal {
Symbol name;
Expr* def; // def = default, not definition
Formal(const Symbol& name, Expr* def) : name(name), def(def){};
};
// Describes structured function arguments (e.g. `{ a }: ...`)
struct Formals : public gc {
typedef std::list<Formal, traceable_allocator<Formal>> Formals_;
struct Formals {
using Formals_ = std::list<Formal>;
Formals_ formals;
std::set<Symbol> argNames; // used during parsing
bool ellipsis;

View file

@ -5,7 +5,6 @@
#include <optional>
#include <variant>
#include <gc/gc.h>
#include <glog/logging.h>
#include "libexpr/eval.hh"
@ -20,7 +19,7 @@
namespace nix {
struct ParseData : public gc {
struct ParseData {
EvalState& state;
SymbolTable& symbols;
Expr* result;

View file

@ -15,12 +15,7 @@
#define YY_NO_INPUT 1 // disable unused yyinput features
#include "libexpr/parser.hh"
// Allow GC tracing of YY-allocated structures
#define YYMALLOC GC_MALLOC_UNCOLLECTABLE
#define YYFREE GC_FREE
#define YYREALLOC GC_REALLOC
struct YYSTYPE : public gc {
struct YYSTYPE {
union {
nix::Expr * e;
nix::ExprList * list;

View file

@ -290,11 +290,7 @@ struct CompareValues {
}
};
#if HAVE_BOEHMGC
typedef std::list<Value*, gc_allocator<Value*>> ValueList;
#else
typedef std::list<Value*> ValueList;
#endif
static void prim_genericClosure(EvalState& state, const Pos& pos, Value** args,
Value& v) {
@ -1603,7 +1599,7 @@ static void prim_sort(EvalState& state, const Pos& pos, Value** args,
state.forceList(*args[1], pos);
// Copy of the input list which can be sorted in place.
auto outlist = new (GC) NixList(*args[1]->list);
auto outlist = new NixList(*args[1]->list);
std::for_each(outlist->begin(), outlist->end(),
[&](Value* val) { state.forceValue(*val); });
@ -1633,8 +1629,8 @@ static void prim_partition(EvalState& state, const Pos& pos, Value** args,
state.forceFunction(*args[0], pos);
state.forceList(*args[1], pos);
NixList* right = new (GC) NixList();
NixList* wrong = new (GC) NixList();
NixList* right = new NixList();
NixList* wrong = new NixList();
for (Value* elem : *args[1]->list) {
state.forceValue(*elem, pos);
@ -1664,7 +1660,7 @@ static void prim_concatMap(EvalState& state, const Pos& pos, Value** args,
state.forceFunction(*args[0], pos);
state.forceList(*args[1], pos);
NixList* outlist = new (GC) NixList;
NixList* outlist = new NixList;
for (Value* elem : *args[1]->list) {
auto out = state.allocValue();

View file

@ -3,9 +3,6 @@
#include <tuple>
#include <vector>
#include <gc/gc_allocator.h>
#include <gc/gc_cpp.h>
#include "libexpr/symbol-table.hh"
#include "libutil/types.hh"
@ -66,32 +63,32 @@ struct Value;
the inputSrcs of the derivations.
For canonicity, the store paths should be in sorted order. */
struct NixString : public gc {
struct NixString {
const char* s;
const char** context; // must be in sorted order
};
struct NixThunk : public gc {
struct NixThunk {
Env* env;
Expr* expr;
};
struct NixApp : public gc {
struct NixApp {
Value *left, *right;
};
struct NixLambda : public gc {
struct NixLambda {
Env* env;
ExprLambda* fun;
};
struct NixPrimOpApp : public gc {
struct NixPrimOpApp {
Value *left, *right;
};
using NixList = std::vector<Value*, traceable_allocator<Value*>>;
using NixList = std::vector<Value*>;
struct Value : public gc {
struct Value {
ValueType type;
union { // TODO(tazjin): std::variant
NixInt integer;
@ -177,9 +174,7 @@ void mkPath(Value& v, const char* s);
not included. */
size_t valueSize(const Value& v);
typedef std::map<Symbol, Value*, std::less<Symbol>,
traceable_allocator<std::pair<const Symbol, Value*>>>
ValueMap;
using ValueMap = std::map<Symbol, Value*>;
std::shared_ptr<Value*> allocRootValue(Value* v);

View file

@ -48,7 +48,7 @@ struct InstallSourceInfo {
Bindings* autoArgs;
};
struct Globals : public gc {
struct Globals {
InstallSourceInfo instSource;
Path profile;
std::shared_ptr<EvalState> state;

View file

@ -22,9 +22,6 @@
#include "libutil/finally.hh"
#include "nix/command.hh"
#define GC_INCLUDE_NEW
#include <gc/gc_cpp.h>
namespace nix {
#define ESC_RED "\033[31m"
@ -35,7 +32,7 @@ namespace nix {
#define ESC_CYA "\033[36m"
#define ESC_END "\033[0m"
struct NixRepl : gc {
struct NixRepl {
std::string curDir;
EvalState state;
Bindings* autoArgs;

View file

@ -7,7 +7,6 @@
#include <absl/container/btree_map.h>
#include <bits/stdint-intn.h>
#include <gc/gc_cpp.h>
#include <gtest/gtest.h>
#include <rapidcheck.h>
#include <rapidcheck/Assertions.h>
@ -55,7 +54,7 @@ struct Arbitrary<Value*> {
static Gen<nix::Value*> arbitrary() {
return gen::apply(
[](nix::ValueType typ, int i) {
auto ret = new (GC) Value();
auto ret = new Value();
ret->type = typ;
ret->integer = i;
return ret;
@ -78,7 +77,7 @@ struct Arbitrary<nix::Pos*> {
static Gen<nix::Pos*> arbitrary() {
return gen::apply(
[](unsigned int line, unsigned int column) {
return new (GC) Pos({}, line, column);
return new Pos({}, line, column);
},
gen::arbitrary<unsigned int>(), gen::arbitrary<unsigned int>());
}