diff --git a/corp/tvixbolt/src/main.rs b/corp/tvixbolt/src/main.rs index eaeffe38c..3aeb40348 100644 --- a/corp/tvixbolt/src/main.rs +++ b/corp/tvixbolt/src/main.rs @@ -313,7 +313,7 @@ fn eval(model: &Model) -> Output { &root_expr, Some("/nixbolt".into()), file.clone(), - Rc::new(RefCell::new(tvix_eval::global_builtins())), + tvix_eval::prepare_globals(Box::new(tvix_eval::global_builtins(source.clone()))), &mut compilation_observer, ) .unwrap(); diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs index 5edd81999..8433c04c4 100644 --- a/tvix/eval/src/builtins/impure.rs +++ b/tvix/eval/src/builtins/impure.rs @@ -1,17 +1,17 @@ use std::{ - cell::RefCell, - collections::{BTreeMap, HashMap}, + collections::BTreeMap, env, fs::File, io::{self, Read}, - rc::Rc, + rc::{Rc, Weak}, time::{SystemTime, UNIX_EPOCH}, }; use crate::{ + compiler::GlobalsMap, errors::ErrorKind, observer::NoOpObserver, - value::{Builtin, NixAttrs, NixString, Thunk}, + value::{Builtin, NixAttrs, Thunk}, vm::VM, SourceCode, Value, }; @@ -69,10 +69,10 @@ fn impure_builtins() -> Vec { /// Return all impure builtins, that is all builtins which may perform I/O /// outside of the VM and so cannot be used in all contexts (e.g. WASM). -pub(super) fn builtins() -> BTreeMap { - let mut map: BTreeMap = impure_builtins() +pub(super) fn builtins() -> BTreeMap<&'static str, Value> { + let mut map: BTreeMap<&'static str, Value> = impure_builtins() .into_iter() - .map(|b| (b.name().into(), Value::Builtin(b))) + .map(|b| (b.name(), Value::Builtin(b))) .collect(); // currentTime pins the time at which evaluation was started @@ -84,7 +84,7 @@ pub(super) fn builtins() -> BTreeMap { Err(err) => -(err.duration().as_secs() as i64), }; - map.insert(NixString::from("currentTime"), Value::Integer(seconds)); + map.insert("currentTime", Value::Integer(seconds)); } map @@ -94,10 +94,13 @@ pub(super) fn builtins() -> BTreeMap { /// it needs to capture the [crate::SourceCode] structure to correctly track /// source code locations while invoking a compiler. // TODO: need to be able to pass through a CompilationObserver, too. -pub fn builtins_import( - globals: Rc>>, - source: SourceCode, -) -> Builtin { +pub fn builtins_import(globals: &Weak, source: SourceCode) -> Builtin { + // This (very cheap, once-per-compiler-startup) clone exists + // solely in order to keep the borrow checker happy. It + // resolves the tension between the requirements of + // Rc::new_cyclic() and Builtin::new() + let globals = globals.clone(); + Builtin::new( "import", &[true], @@ -126,11 +129,18 @@ pub fn builtins_import( }); } - let result = crate::compile( + let result = crate::compiler::compile( &parsed.tree().expr().unwrap(), Some(path.clone()), file, - globals.clone(), + // The VM must ensure that a strong reference to the + // globals outlives any self-references (which are + // weak) embedded within the globals. If the + // expect() below panics, it means that did not + // happen. + globals + .upgrade() + .expect("globals dropped while still in use"), &mut NoOpObserver::default(), ) .map_err(|err| ErrorKind::ImportCompilerError { diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 91fe27aa4..7dd7ee946 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -3,9 +3,12 @@ //! See //tvix/eval/docs/builtins.md for a some context on the //! available builtins in Nix. +use crate::compiler::{GlobalsMap, GlobalsMapFunc}; +use crate::source::SourceCode; use std::cmp::{self, Ordering}; use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::PathBuf; +use std::rc::Rc; use regex::Regex; @@ -690,70 +693,84 @@ fn placeholders() -> Vec { // we set TVIX_CURRENT_SYSTEM in build.rs pub const CURRENT_PLATFORM: &str = env!("TVIX_CURRENT_SYSTEM"); -fn builtins_set() -> NixAttrs { - let mut map: BTreeMap = BTreeMap::new(); - - // Pure-value builtins - map.insert( - "nixVersion".into(), - Value::String("2.3-compat-tvix-0.1".into()), - ); - - map.insert("langVersion".into(), Value::Integer(6)); - - map.insert( - "currentSystem".into(), - crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(), - ); - - let mut add_builtins = |builtins: Vec| { - for builtin in builtins { - map.insert(builtin.name().into(), Value::Builtin(builtin)); - } - }; - - add_builtins(pure_builtins()); - add_builtins(placeholders()); - - #[cfg(feature = "impure")] - { - map.extend(impure::builtins()); - } - - NixAttrs::from_map(map) -} - /// Set of Nix builtins that are globally available. -pub fn global_builtins() -> HashMap<&'static str, Value> { - let builtins = builtins_set(); - let mut globals: HashMap<&'static str, Value> = HashMap::new(); +pub fn global_builtins(source: SourceCode) -> GlobalsMapFunc { + Box::new(move |globals: &std::rc::Weak| { + let mut map: BTreeMap<&'static str, Value> = BTreeMap::new(); - // known global builtins from the builtins set. - for global in &[ - "abort", - "baseNameOf", - "derivation", - "derivationStrict", - "dirOf", - "fetchGit", - "fetchMercurial", - "fetchTarball", - "fromTOML", - "import", - "isNull", - "map", - "placeholder", - "removeAttrs", - "scopedImport", - "throw", - "toString", - ] { - if let Some(builtin) = builtins.select(global) { - globals.insert(global, builtin.clone()); + // Pure-value builtins + map.insert("nixVersion", Value::String("2.3-compat-tvix-0.1".into())); + + map.insert("langVersion", Value::Integer(6)); + + map.insert( + "currentSystem", + crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(), + ); + + let mut add_builtins = |builtins: Vec| { + for builtin in builtins { + map.insert(builtin.name(), Value::Builtin(builtin)); + } + }; + + add_builtins(pure_builtins()); + add_builtins(placeholders()); + + #[cfg(feature = "impure")] + { + map.extend(impure::builtins()); + + // We need to insert import into the builtins, but the + // builtins passed to import must have import *in it*. + let import = Value::Builtin(crate::builtins::impure::builtins_import( + globals, + source.clone(), + )); + + map.insert("import", import); + }; + + let mut globals: GlobalsMap = HashMap::new(); + + let builtins = Rc::new(NixAttrs::from_map( + map.into_iter().map(|(k, v)| (k.into(), v)).collect(), + )); + + // known global builtins from the builtins set. + for global in &[ + "abort", + "baseNameOf", + "derivation", + "derivationStrict", + "dirOf", + "fetchGit", + "fetchMercurial", + "fetchTarball", + "fromTOML", + "import", + "isNull", + "map", + "placeholder", + "removeAttrs", + "scopedImport", + "throw", + "toString", + ] { + if let Some(builtin) = builtins.select(global) { + let builtin = builtin.clone(); + globals.insert( + global, + Rc::new(move |c, s| c.emit_constant(builtin.clone(), &s)), + ); + } } - } - globals.insert("builtins", Value::attrs(builtins)); + globals.insert( + "builtins", + Rc::new(move |c, s| c.emit_constant(Value::attrs(builtins.as_ref().clone()), &s)), + ); - globals + globals + }) } diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 2985c7e90..34a15330c 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -19,10 +19,9 @@ mod scope; use codemap::Span; use rnix::ast::{self, AstToken}; use smol_str::SmolStr; -use std::cell::RefCell; use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::rc::Rc; +use std::rc::{Rc, Weak}; use std::sync::Arc; use crate::chunk::Chunk; @@ -42,6 +41,10 @@ pub struct CompilationOutput { pub lambda: Rc, pub warnings: Vec, pub errors: Vec, + + // This field must outlive the rc::Weak reference which breaks + // the builtins -> import -> builtins reference cycle. + pub globals: Rc, } /// Represents the lambda currently being compiled. @@ -69,11 +72,19 @@ impl LambdaCtx { } } -/// Alias for the map of globally available functions that should -/// implicitly be resolvable in the global scope. -type GlobalsMap = HashMap<&'static str, Rc>; +/// The map of globally available functions that should implicitly +/// be resolvable in the global scope. +pub type GlobalsMap = HashMap<&'static str, Rc>; -struct Compiler<'observer> { +/// Functions with this type are used to construct a +/// self-referential `builtins` object; it takes a weak reference to +/// its own result, similar to how nixpkgs' overlays work. +/// Rc::new_cyclic() is what "ties the knot". The heap allocation +/// (Box) and vtable (dyn) do not impair runtime or compile-time +/// performance; they exist only during compiler startup. +pub type GlobalsMapFunc = Box) -> GlobalsMap>; + +pub struct Compiler<'observer> { contexts: Vec, warnings: Vec, errors: Vec, @@ -85,7 +96,7 @@ struct Compiler<'observer> { /// Each global has an associated token, which when encountered as /// an identifier is resolved against the scope poisoning logic, /// and a function that should emit code for the token. - globals: GlobalsMap, + globals: Rc, /// File reference in the codemap contains all known source code /// and is used to track the spans from which instructions where @@ -108,7 +119,7 @@ impl<'observer> Compiler<'observer> { pub(crate) fn new( location: Option, file: Arc, - globals: Rc>>, + globals: Rc, observer: &'observer mut dyn CompilerObserver, ) -> EvalResult { let mut root_dir = match location { @@ -136,8 +147,6 @@ impl<'observer> Compiler<'observer> { root_dir.pop(); } - let globals = globals.borrow(); - #[cfg(not(target_arch = "wasm32"))] debug_assert!(root_dir.is_absolute()); @@ -145,7 +154,7 @@ impl<'observer> Compiler<'observer> { root_dir, file, observer, - globals: prepare_globals(&globals), + globals, contexts: vec![LambdaCtx::new()], warnings: vec![], errors: vec![], @@ -186,7 +195,7 @@ impl Compiler<'_> { /// Emit a single constant to the current bytecode chunk and track /// the source span from which it was compiled. - fn emit_constant(&mut self, value: Value, node: &T) { + pub(super) fn emit_constant(&mut self, value: Value, node: &T) { let idx = self.chunk().push_constant(value); self.push_op(OpCode::OpConstant(idx), node); } @@ -1174,54 +1183,50 @@ fn optimise_tail_call(chunk: &mut Chunk) { /// Prepare the full set of globals from additional globals supplied /// by the caller of the compiler, as well as the built-in globals -/// that are always part of the language. +/// that are always part of the language. This also "ties the knot" +/// required in order for import to have a reference cycle back to +/// the globals. /// /// Note that all builtin functions are *not* considered part of the /// language in this sense and MUST be supplied as additional global /// values, including the `builtins` set itself. -fn prepare_globals(additional: &HashMap<&'static str, Value>) -> GlobalsMap { - let mut globals: GlobalsMap = HashMap::new(); +pub fn prepare_globals(additional: GlobalsMapFunc) -> Rc { + Rc::new_cyclic(Box::new(|weak: &Weak| { + let mut globals = additional(weak); - globals.insert( - "true", - Rc::new(|compiler, span| { - compiler.push_op(OpCode::OpTrue, &span); - }), - ); - - globals.insert( - "false", - Rc::new(|compiler, span| { - compiler.push_op(OpCode::OpFalse, &span); - }), - ); - - globals.insert( - "null", - Rc::new(|compiler, span| { - compiler.push_op(OpCode::OpNull, &span); - }), - ); - - for (ident, value) in additional.iter() { - let value: Value = value.clone(); globals.insert( - ident, - Rc::new(move |compiler, span| compiler.emit_constant(value.clone(), &span)), + "true", + Rc::new(|compiler, span| { + compiler.push_op(OpCode::OpTrue, &span); + }), ); - } - globals + globals.insert( + "false", + Rc::new(|compiler, span| { + compiler.push_op(OpCode::OpFalse, &span); + }), + ); + + globals.insert( + "null", + Rc::new(|compiler, span| { + compiler.push_op(OpCode::OpNull, &span); + }), + ); + + globals + })) } pub fn compile( expr: &ast::Expr, location: Option, file: Arc, - globals: Rc>>, + globals: Rc, observer: &mut dyn CompilerObserver, ) -> EvalResult { - let mut c = Compiler::new(location, file, globals, observer)?; + let mut c = Compiler::new(location, file, globals.clone(), observer)?; let root_span = c.span_for(expr); let root_slot = c.scope_mut().declare_phantom(root_span, false); @@ -1240,5 +1245,6 @@ pub fn compile( lambda, warnings: c.warnings, errors: c.errors, + globals: globals, }) } diff --git a/tvix/eval/src/eval.rs b/tvix/eval/src/eval.rs index df2c562cf..6a35630b3 100644 --- a/tvix/eval/src/eval.rs +++ b/tvix/eval/src/eval.rs @@ -1,4 +1,4 @@ -use std::{cell::RefCell, path::PathBuf, rc::Rc}; +use std::path::PathBuf; use crate::{ builtins::global_builtins, @@ -80,23 +80,7 @@ pub fn interpret(code: &str, location: Option, options: Options) -> Eva println!("{}", pretty_print_expr(&root_expr)); } - // TODO: encapsulate this import weirdness in builtins - - let builtins = Rc::new(RefCell::new(global_builtins())); - - #[cfg(feature = "impure")] - { - // We need to insert import into the builtins, but the - // builtins passed to import must have import *in it*. - let import = Value::Builtin(crate::builtins::impure::builtins_import( - builtins.clone(), - source.clone(), - )); - - builtins.borrow_mut().insert("import", import); - // TODO: also add it into the inner builtins set - }; - + let builtins = crate::compiler::prepare_globals(Box::new(global_builtins(source.clone()))); let result = if options.dump_bytecode { crate::compiler::compile( &root_expr, diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs index 70d2dbbe3..6cf3aa212 100644 --- a/tvix/eval/src/lib.rs +++ b/tvix/eval/src/lib.rs @@ -24,7 +24,7 @@ mod tests; // Re-export the public interface used by other crates. pub use crate::builtins::global_builtins; -pub use crate::compiler::compile; +pub use crate::compiler::{compile, prepare_globals}; pub use crate::errors::EvalResult; pub use crate::eval::{interpret, Options}; pub use crate::pretty_ast::pretty_print_expr;