feat(tvix/eval): builtins.import without RefCell

CL/6867 added support for builtins.import, which required a cyclic
reference import->globals->builtins->import.  This was implemented
using a RefCell, which makes it possible to mutate the builtins
during evaluation.  The commit message for CL/6867 expressed a
desire to eliminate this possibility:

  This opens up a potentially dangerous footgun in which we could
  mutate the builtins at runtime leading to different compiler
  invocations seeing different builtins, so it'd be nice to have
  some kind of "finalised" status for them or some such, but I'm not
  sure how to represent that atm.

This CL replaces the RefCell with Rc::new_cyclic(), making the
globals/builtins immutable once again.  At VM runtime (once opcodes
start executing) everything is the same as before this CL, except
that the Rc<RefCell<>> introduced by CL/6867 is turned into an
rc::Weak<>.

The function passed to Rc::new_cyclic works very similarly to
overlays in nixpkgs: a function takes its own result as an argument.
However instead of laziness "breaking the cycle", Rust's
Rc::new_cyclic() instead uses an rc::Weak.  This is done to prevent
memory leaks rather than divergence.

This CL also resolves the following TODO from CL/6867:

  // TODO: encapsulate this import weirdness in builtins

The main disadvantage of this CL is the fact that the VM now must
ensure that it holds a strong reference to the globals while a
program is executing; failure to do so will cause a panic when the
weak reference in the builtins is upgrade()d.

In theory it should be possible to create strong reference cycles
the same way Rc::new_cyclic() creates weak cycles, but these cycles
would cause a permanent memory leak -- without either an rc::Weak or
RefCell there is no way to break the cycle.  At some point we will
have to implement some form of cycle collection; whatever library we
choose for that purpose is likely to provide an "immutable strong
reference cycle" primitive similar to Rc::new_cyclic(), and we
should be able to simply drop it in.

Signed-off-by: Adam Joseph <adam@westernsemico.com>
Change-Id: I34bb5821628eb97e426bdb880b02e2097402adb7
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7097
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
This commit is contained in:
Adam Joseph 2022-10-26 05:16:04 -07:00
parent 79ef6ee283
commit 8616f13a71
6 changed files with 156 additions and 139 deletions

View file

@ -313,7 +313,7 @@ fn eval(model: &Model) -> Output {
&root_expr, &root_expr,
Some("/nixbolt".into()), Some("/nixbolt".into()),
file.clone(), file.clone(),
Rc::new(RefCell::new(tvix_eval::global_builtins())), tvix_eval::prepare_globals(Box::new(tvix_eval::global_builtins(source.clone()))),
&mut compilation_observer, &mut compilation_observer,
) )
.unwrap(); .unwrap();

View file

@ -1,17 +1,17 @@
use std::{ use std::{
cell::RefCell, collections::BTreeMap,
collections::{BTreeMap, HashMap},
env, env,
fs::File, fs::File,
io::{self, Read}, io::{self, Read},
rc::Rc, rc::{Rc, Weak},
time::{SystemTime, UNIX_EPOCH}, time::{SystemTime, UNIX_EPOCH},
}; };
use crate::{ use crate::{
compiler::GlobalsMap,
errors::ErrorKind, errors::ErrorKind,
observer::NoOpObserver, observer::NoOpObserver,
value::{Builtin, NixAttrs, NixString, Thunk}, value::{Builtin, NixAttrs, Thunk},
vm::VM, vm::VM,
SourceCode, Value, SourceCode, Value,
}; };
@ -69,10 +69,10 @@ fn impure_builtins() -> Vec<Builtin> {
/// Return all impure builtins, that is all builtins which may perform I/O /// Return all impure builtins, that is all builtins which may perform I/O
/// outside of the VM and so cannot be used in all contexts (e.g. WASM). /// outside of the VM and so cannot be used in all contexts (e.g. WASM).
pub(super) fn builtins() -> BTreeMap<NixString, Value> { pub(super) fn builtins() -> BTreeMap<&'static str, Value> {
let mut map: BTreeMap<NixString, Value> = impure_builtins() let mut map: BTreeMap<&'static str, Value> = impure_builtins()
.into_iter() .into_iter()
.map(|b| (b.name().into(), Value::Builtin(b))) .map(|b| (b.name(), Value::Builtin(b)))
.collect(); .collect();
// currentTime pins the time at which evaluation was started // currentTime pins the time at which evaluation was started
@ -84,7 +84,7 @@ pub(super) fn builtins() -> BTreeMap<NixString, Value> {
Err(err) => -(err.duration().as_secs() as i64), Err(err) => -(err.duration().as_secs() as i64),
}; };
map.insert(NixString::from("currentTime"), Value::Integer(seconds)); map.insert("currentTime", Value::Integer(seconds));
} }
map map
@ -94,10 +94,13 @@ pub(super) fn builtins() -> BTreeMap<NixString, Value> {
/// it needs to capture the [crate::SourceCode] structure to correctly track /// it needs to capture the [crate::SourceCode] structure to correctly track
/// source code locations while invoking a compiler. /// source code locations while invoking a compiler.
// TODO: need to be able to pass through a CompilationObserver, too. // TODO: need to be able to pass through a CompilationObserver, too.
pub fn builtins_import( pub fn builtins_import(globals: &Weak<GlobalsMap>, source: SourceCode) -> Builtin {
globals: Rc<RefCell<HashMap<&'static str, Value>>>, // This (very cheap, once-per-compiler-startup) clone exists
source: SourceCode, // solely in order to keep the borrow checker happy. It
) -> Builtin { // resolves the tension between the requirements of
// Rc::new_cyclic() and Builtin::new()
let globals = globals.clone();
Builtin::new( Builtin::new(
"import", "import",
&[true], &[true],
@ -126,11 +129,18 @@ pub fn builtins_import(
}); });
} }
let result = crate::compile( let result = crate::compiler::compile(
&parsed.tree().expr().unwrap(), &parsed.tree().expr().unwrap(),
Some(path.clone()), Some(path.clone()),
file, file,
globals.clone(), // The VM must ensure that a strong reference to the
// globals outlives any self-references (which are
// weak) embedded within the globals. If the
// expect() below panics, it means that did not
// happen.
globals
.upgrade()
.expect("globals dropped while still in use"),
&mut NoOpObserver::default(), &mut NoOpObserver::default(),
) )
.map_err(|err| ErrorKind::ImportCompilerError { .map_err(|err| ErrorKind::ImportCompilerError {

View file

@ -3,9 +3,12 @@
//! See //tvix/eval/docs/builtins.md for a some context on the //! See //tvix/eval/docs/builtins.md for a some context on the
//! available builtins in Nix. //! available builtins in Nix.
use crate::compiler::{GlobalsMap, GlobalsMapFunc};
use crate::source::SourceCode;
use std::cmp::{self, Ordering}; use std::cmp::{self, Ordering};
use std::collections::{BTreeMap, HashMap, HashSet}; use std::collections::{BTreeMap, HashMap, HashSet};
use std::path::PathBuf; use std::path::PathBuf;
use std::rc::Rc;
use regex::Regex; use regex::Regex;
@ -690,70 +693,84 @@ fn placeholders() -> Vec<Builtin> {
// we set TVIX_CURRENT_SYSTEM in build.rs // we set TVIX_CURRENT_SYSTEM in build.rs
pub const CURRENT_PLATFORM: &str = env!("TVIX_CURRENT_SYSTEM"); pub const CURRENT_PLATFORM: &str = env!("TVIX_CURRENT_SYSTEM");
fn builtins_set() -> NixAttrs {
let mut map: BTreeMap<NixString, Value> = BTreeMap::new();
// Pure-value builtins
map.insert(
"nixVersion".into(),
Value::String("2.3-compat-tvix-0.1".into()),
);
map.insert("langVersion".into(), Value::Integer(6));
map.insert(
"currentSystem".into(),
crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(),
);
let mut add_builtins = |builtins: Vec<Builtin>| {
for builtin in builtins {
map.insert(builtin.name().into(), Value::Builtin(builtin));
}
};
add_builtins(pure_builtins());
add_builtins(placeholders());
#[cfg(feature = "impure")]
{
map.extend(impure::builtins());
}
NixAttrs::from_map(map)
}
/// Set of Nix builtins that are globally available. /// Set of Nix builtins that are globally available.
pub fn global_builtins() -> HashMap<&'static str, Value> { pub fn global_builtins(source: SourceCode) -> GlobalsMapFunc {
let builtins = builtins_set(); Box::new(move |globals: &std::rc::Weak<GlobalsMap>| {
let mut globals: HashMap<&'static str, Value> = HashMap::new(); let mut map: BTreeMap<&'static str, Value> = BTreeMap::new();
// known global builtins from the builtins set. // Pure-value builtins
for global in &[ map.insert("nixVersion", Value::String("2.3-compat-tvix-0.1".into()));
"abort",
"baseNameOf", map.insert("langVersion", Value::Integer(6));
"derivation",
"derivationStrict", map.insert(
"dirOf", "currentSystem",
"fetchGit", crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(),
"fetchMercurial", );
"fetchTarball",
"fromTOML", let mut add_builtins = |builtins: Vec<Builtin>| {
"import", for builtin in builtins {
"isNull", map.insert(builtin.name(), Value::Builtin(builtin));
"map", }
"placeholder", };
"removeAttrs",
"scopedImport", add_builtins(pure_builtins());
"throw", add_builtins(placeholders());
"toString",
] { #[cfg(feature = "impure")]
if let Some(builtin) = builtins.select(global) { {
globals.insert(global, builtin.clone()); map.extend(impure::builtins());
// We need to insert import into the builtins, but the
// builtins passed to import must have import *in it*.
let import = Value::Builtin(crate::builtins::impure::builtins_import(
globals,
source.clone(),
));
map.insert("import", import);
};
let mut globals: GlobalsMap = HashMap::new();
let builtins = Rc::new(NixAttrs::from_map(
map.into_iter().map(|(k, v)| (k.into(), v)).collect(),
));
// known global builtins from the builtins set.
for global in &[
"abort",
"baseNameOf",
"derivation",
"derivationStrict",
"dirOf",
"fetchGit",
"fetchMercurial",
"fetchTarball",
"fromTOML",
"import",
"isNull",
"map",
"placeholder",
"removeAttrs",
"scopedImport",
"throw",
"toString",
] {
if let Some(builtin) = builtins.select(global) {
let builtin = builtin.clone();
globals.insert(
global,
Rc::new(move |c, s| c.emit_constant(builtin.clone(), &s)),
);
}
} }
}
globals.insert("builtins", Value::attrs(builtins)); globals.insert(
"builtins",
Rc::new(move |c, s| c.emit_constant(Value::attrs(builtins.as_ref().clone()), &s)),
);
globals globals
})
} }

View file

@ -19,10 +19,9 @@ mod scope;
use codemap::Span; use codemap::Span;
use rnix::ast::{self, AstToken}; use rnix::ast::{self, AstToken};
use smol_str::SmolStr; use smol_str::SmolStr;
use std::cell::RefCell;
use std::collections::HashMap; use std::collections::HashMap;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::rc::Rc; use std::rc::{Rc, Weak};
use std::sync::Arc; use std::sync::Arc;
use crate::chunk::Chunk; use crate::chunk::Chunk;
@ -42,6 +41,10 @@ pub struct CompilationOutput {
pub lambda: Rc<Lambda>, pub lambda: Rc<Lambda>,
pub warnings: Vec<EvalWarning>, pub warnings: Vec<EvalWarning>,
pub errors: Vec<Error>, pub errors: Vec<Error>,
// This field must outlive the rc::Weak reference which breaks
// the builtins -> import -> builtins reference cycle.
pub globals: Rc<GlobalsMap>,
} }
/// Represents the lambda currently being compiled. /// Represents the lambda currently being compiled.
@ -69,11 +72,19 @@ impl LambdaCtx {
} }
} }
/// Alias for the map of globally available functions that should /// The map of globally available functions that should implicitly
/// implicitly be resolvable in the global scope. /// be resolvable in the global scope.
type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler, Span)>>; pub type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler, Span)>>;
struct Compiler<'observer> { /// Functions with this type are used to construct a
/// self-referential `builtins` object; it takes a weak reference to
/// its own result, similar to how nixpkgs' overlays work.
/// Rc::new_cyclic() is what "ties the knot". The heap allocation
/// (Box) and vtable (dyn) do not impair runtime or compile-time
/// performance; they exist only during compiler startup.
pub type GlobalsMapFunc = Box<dyn FnOnce(&Weak<GlobalsMap>) -> GlobalsMap>;
pub struct Compiler<'observer> {
contexts: Vec<LambdaCtx>, contexts: Vec<LambdaCtx>,
warnings: Vec<EvalWarning>, warnings: Vec<EvalWarning>,
errors: Vec<Error>, errors: Vec<Error>,
@ -85,7 +96,7 @@ struct Compiler<'observer> {
/// Each global has an associated token, which when encountered as /// Each global has an associated token, which when encountered as
/// an identifier is resolved against the scope poisoning logic, /// an identifier is resolved against the scope poisoning logic,
/// and a function that should emit code for the token. /// and a function that should emit code for the token.
globals: GlobalsMap, globals: Rc<GlobalsMap>,
/// File reference in the codemap contains all known source code /// File reference in the codemap contains all known source code
/// and is used to track the spans from which instructions where /// and is used to track the spans from which instructions where
@ -108,7 +119,7 @@ impl<'observer> Compiler<'observer> {
pub(crate) fn new( pub(crate) fn new(
location: Option<PathBuf>, location: Option<PathBuf>,
file: Arc<codemap::File>, file: Arc<codemap::File>,
globals: Rc<RefCell<HashMap<&'static str, Value>>>, globals: Rc<GlobalsMap>,
observer: &'observer mut dyn CompilerObserver, observer: &'observer mut dyn CompilerObserver,
) -> EvalResult<Self> { ) -> EvalResult<Self> {
let mut root_dir = match location { let mut root_dir = match location {
@ -136,8 +147,6 @@ impl<'observer> Compiler<'observer> {
root_dir.pop(); root_dir.pop();
} }
let globals = globals.borrow();
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
debug_assert!(root_dir.is_absolute()); debug_assert!(root_dir.is_absolute());
@ -145,7 +154,7 @@ impl<'observer> Compiler<'observer> {
root_dir, root_dir,
file, file,
observer, observer,
globals: prepare_globals(&globals), globals,
contexts: vec![LambdaCtx::new()], contexts: vec![LambdaCtx::new()],
warnings: vec![], warnings: vec![],
errors: vec![], errors: vec![],
@ -186,7 +195,7 @@ impl Compiler<'_> {
/// Emit a single constant to the current bytecode chunk and track /// Emit a single constant to the current bytecode chunk and track
/// the source span from which it was compiled. /// the source span from which it was compiled.
fn emit_constant<T: ToSpan>(&mut self, value: Value, node: &T) { pub(super) fn emit_constant<T: ToSpan>(&mut self, value: Value, node: &T) {
let idx = self.chunk().push_constant(value); let idx = self.chunk().push_constant(value);
self.push_op(OpCode::OpConstant(idx), node); self.push_op(OpCode::OpConstant(idx), node);
} }
@ -1174,54 +1183,50 @@ fn optimise_tail_call(chunk: &mut Chunk) {
/// Prepare the full set of globals from additional globals supplied /// Prepare the full set of globals from additional globals supplied
/// by the caller of the compiler, as well as the built-in globals /// by the caller of the compiler, as well as the built-in globals
/// that are always part of the language. /// that are always part of the language. This also "ties the knot"
/// required in order for import to have a reference cycle back to
/// the globals.
/// ///
/// Note that all builtin functions are *not* considered part of the /// Note that all builtin functions are *not* considered part of the
/// language in this sense and MUST be supplied as additional global /// language in this sense and MUST be supplied as additional global
/// values, including the `builtins` set itself. /// values, including the `builtins` set itself.
fn prepare_globals(additional: &HashMap<&'static str, Value>) -> GlobalsMap { pub fn prepare_globals(additional: GlobalsMapFunc) -> Rc<GlobalsMap> {
let mut globals: GlobalsMap = HashMap::new(); Rc::new_cyclic(Box::new(|weak: &Weak<GlobalsMap>| {
let mut globals = additional(weak);
globals.insert(
"true",
Rc::new(|compiler, span| {
compiler.push_op(OpCode::OpTrue, &span);
}),
);
globals.insert(
"false",
Rc::new(|compiler, span| {
compiler.push_op(OpCode::OpFalse, &span);
}),
);
globals.insert(
"null",
Rc::new(|compiler, span| {
compiler.push_op(OpCode::OpNull, &span);
}),
);
for (ident, value) in additional.iter() {
let value: Value = value.clone();
globals.insert( globals.insert(
ident, "true",
Rc::new(move |compiler, span| compiler.emit_constant(value.clone(), &span)), Rc::new(|compiler, span| {
compiler.push_op(OpCode::OpTrue, &span);
}),
); );
}
globals globals.insert(
"false",
Rc::new(|compiler, span| {
compiler.push_op(OpCode::OpFalse, &span);
}),
);
globals.insert(
"null",
Rc::new(|compiler, span| {
compiler.push_op(OpCode::OpNull, &span);
}),
);
globals
}))
} }
pub fn compile( pub fn compile(
expr: &ast::Expr, expr: &ast::Expr,
location: Option<PathBuf>, location: Option<PathBuf>,
file: Arc<codemap::File>, file: Arc<codemap::File>,
globals: Rc<RefCell<HashMap<&'static str, Value>>>, globals: Rc<GlobalsMap>,
observer: &mut dyn CompilerObserver, observer: &mut dyn CompilerObserver,
) -> EvalResult<CompilationOutput> { ) -> EvalResult<CompilationOutput> {
let mut c = Compiler::new(location, file, globals, observer)?; let mut c = Compiler::new(location, file, globals.clone(), observer)?;
let root_span = c.span_for(expr); let root_span = c.span_for(expr);
let root_slot = c.scope_mut().declare_phantom(root_span, false); let root_slot = c.scope_mut().declare_phantom(root_span, false);
@ -1240,5 +1245,6 @@ pub fn compile(
lambda, lambda,
warnings: c.warnings, warnings: c.warnings,
errors: c.errors, errors: c.errors,
globals: globals,
}) })
} }

View file

@ -1,4 +1,4 @@
use std::{cell::RefCell, path::PathBuf, rc::Rc}; use std::path::PathBuf;
use crate::{ use crate::{
builtins::global_builtins, builtins::global_builtins,
@ -80,23 +80,7 @@ pub fn interpret(code: &str, location: Option<PathBuf>, options: Options) -> Eva
println!("{}", pretty_print_expr(&root_expr)); println!("{}", pretty_print_expr(&root_expr));
} }
// TODO: encapsulate this import weirdness in builtins let builtins = crate::compiler::prepare_globals(Box::new(global_builtins(source.clone())));
let builtins = Rc::new(RefCell::new(global_builtins()));
#[cfg(feature = "impure")]
{
// We need to insert import into the builtins, but the
// builtins passed to import must have import *in it*.
let import = Value::Builtin(crate::builtins::impure::builtins_import(
builtins.clone(),
source.clone(),
));
builtins.borrow_mut().insert("import", import);
// TODO: also add it into the inner builtins set
};
let result = if options.dump_bytecode { let result = if options.dump_bytecode {
crate::compiler::compile( crate::compiler::compile(
&root_expr, &root_expr,

View file

@ -24,7 +24,7 @@ mod tests;
// Re-export the public interface used by other crates. // Re-export the public interface used by other crates.
pub use crate::builtins::global_builtins; pub use crate::builtins::global_builtins;
pub use crate::compiler::compile; pub use crate::compiler::{compile, prepare_globals};
pub use crate::errors::EvalResult; pub use crate::errors::EvalResult;
pub use crate::eval::{interpret, Options}; pub use crate::eval::{interpret, Options};
pub use crate::pretty_ast::pretty_print_expr; pub use crate::pretty_ast::pretty_print_expr;