refactor(tvix/eval): handle scope poisoning & globals dynamically

Previously, the tokens that could poison a scope (`true`, `false`,
`null`) had individual fields in the scope to track whether or not
they were poisoned.

This commit sets up new machinery that instead tracks scope poisoning
dynamically using a HashMap, and which makes it possible to introduce
additional tokens to the top-level ("global") scope that are directly
resolved by the compiler by passing a map of runtime values to be
used.

With this solution, the compiler now contains all machinery required
for wiring up builtins resolution.

The set of builtins to be exposed at runtime must, however, be
constructed *outside* of the compiler and passed in. Everything is
prepared for this, but it is not yet wired up (so the only existing
builtins are the ones we already had before).

Note that this technically opens up an optimisation potential when
compiling selection operations, where the attribute set being selected
from is `builtins`. The compiler could directly resolve the builtins
and place the right values on the stack.

Change-Id: Ia7dad3c2a98703e7ea0c6ace1a722d57cc70a65c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6253
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
This commit is contained in:
Vincent Ambo 2022-08-24 15:37:09 +03:00 committed by tazjin
parent c48aa058a7
commit ca90c0f45a

View file

@ -16,7 +16,9 @@
use path_clean::PathClean; use path_clean::PathClean;
use rnix::ast::{self, AstToken, HasEntry}; use rnix::ast::{self, AstToken, HasEntry};
use rowan::ast::AstNode; use rowan::ast::AstNode;
use std::collections::{hash_map, HashMap};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::rc::Rc;
use crate::chunk::Chunk; use crate::chunk::Chunk;
use crate::errors::{Error, ErrorKind, EvalResult}; use crate::errors::{Error, ErrorKind, EvalResult};
@ -78,15 +80,40 @@ struct Scope {
// `with`. // `with`.
with_stack: Vec<With>, with_stack: Vec<With>,
// Certain symbols are considered to be "poisoning" the scope when // Users are allowed to override globally defined symbols like
// defined. This is because users are allowed to override symbols // `true`, `false` or `null` in scopes. We call this "scope
// like 'true' or 'null'. // poisoning", as it requires runtime resolution of those tokens.
// //
// To support this efficiently, the depth at which a poisoning // To support this efficiently, the depth at which a poisoning
// occured is tracked here. // occured is tracked here.
poisoned_true: usize, poisoned_tokens: HashMap<&'static str, usize>,
poisoned_false: usize, }
poisoned_null: usize,
impl Scope {
/// Mark a globally defined token as poisoned.
fn poison(&mut self, name: &'static str, depth: usize) {
match self.poisoned_tokens.entry(name) {
hash_map::Entry::Occupied(_) => {
/* do nothing, as the token is already poisoned at a
* lower scope depth */
}
hash_map::Entry::Vacant(entry) => {
entry.insert(depth);
}
}
}
/// Check whether a given token is poisoned.
fn is_poisoned(&self, name: &str) -> bool {
self.poisoned_tokens.contains_key(name)
}
/// "Unpoison" tokens that were poisoned at a given depth. Used
/// when scopes are closed.
fn unpoison(&mut self, depth: usize) {
self.poisoned_tokens
.retain(|_, poisoned_at| *poisoned_at != depth);
}
} }
/// Represents the lambda currently being compiled. /// Represents the lambda currently being compiled.
@ -104,11 +131,21 @@ impl LambdaCtx {
} }
} }
type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler)>>;
struct Compiler { struct Compiler {
contexts: Vec<LambdaCtx>, contexts: Vec<LambdaCtx>,
warnings: Vec<EvalWarning>, warnings: Vec<EvalWarning>,
errors: Vec<Error>, errors: Vec<Error>,
root_dir: PathBuf, root_dir: PathBuf,
/// Carries all known global tokens; the full set of which is
/// created when the compiler is invoked.
///
/// Each global has an associated token, which when encountered as
/// an identifier is resolved against the scope poisoning logic,
/// and a function that should emit code for the token.
globals: GlobalsMap,
} }
// Helper functions for emitting code and metadata to the internal // Helper functions for emitting code and metadata to the internal
@ -124,7 +161,7 @@ impl Compiler {
} }
fn chunk(&mut self) -> &mut Chunk { fn chunk(&mut self) -> &mut Chunk {
std::rc::Rc::<Chunk>::get_mut(self.context_mut().lambda.chunk()) Rc::<Chunk>::get_mut(self.context_mut().lambda.chunk())
.expect("compiler flaw: long-lived chunk reference") .expect("compiler flaw: long-lived chunk reference")
} }
@ -677,41 +714,29 @@ impl Compiler {
} }
fn compile_ident(&mut self, node: ast::Ident) { fn compile_ident(&mut self, node: ast::Ident) {
match node.ident_token().unwrap().text() { let ident = node.ident_token().unwrap();
// TODO(tazjin): Nix technically allows code like
//
// let null = 1; in null
// => 1
//
// which we do *not* want to check at runtime. Once
// scoping is introduced, the compiler should carry some
// optimised information about any "weird" stuff that's
// happened to the scope (such as overrides of these
// literals, or builtins).
"true" if self.scope().poisoned_true == 0 => self.chunk().push_op(OpCode::OpTrue),
"false" if self.scope().poisoned_false == 0 => self.chunk().push_op(OpCode::OpFalse),
"null" if self.scope().poisoned_null == 0 => self.chunk().push_op(OpCode::OpNull),
name => { // If the identifier is a global, and it is not poisoned, emit
// Note: `with` and some other special scoping // the global directly.
// features are not yet implemented. if let Some(global) = self.globals.get(ident.text()) {
match self.resolve_local(name) { if !self.scope().is_poisoned(ident.text()) {
Some(idx) => self.chunk().push_op(OpCode::OpGetLocal(idx)), global.clone()(self);
None => { return;
if self.scope().with_stack.is_empty() { }
self.emit_error( }
node.syntax().clone(),
ErrorKind::UnknownStaticVariable,
);
return;
}
// Variable needs to be dynamically resolved match self.resolve_local(ident.text()) {
// at runtime. Some(idx) => self.chunk().push_op(OpCode::OpGetLocal(idx)),
self.emit_constant(Value::String(name.into())); None => {
self.chunk().push_op(OpCode::OpResolveWith) if self.scope().with_stack.is_empty() {
} self.emit_error(node.syntax().clone(), ErrorKind::UnknownStaticVariable);
return;
} }
// Variable needs to be dynamically resolved
// at runtime.
self.emit_constant(Value::String(ident.text().into()));
self.chunk().push_op(OpCode::OpResolveWith)
} }
}; };
} }
@ -815,15 +840,8 @@ impl Compiler {
// If this scope poisoned any builtins or special identifiers, // If this scope poisoned any builtins or special identifiers,
// they need to be reset. // they need to be reset.
if self.scope().poisoned_true == self.scope().scope_depth { let depth = self.scope().scope_depth;
self.scope_mut().poisoned_true = 0; self.scope_mut().unpoison(depth);
}
if self.scope().poisoned_false == self.scope().scope_depth {
self.scope_mut().poisoned_false = 0;
}
if self.scope().poisoned_null == self.scope().scope_depth {
self.scope_mut().poisoned_null = 0;
}
self.scope_mut().scope_depth -= 1; self.scope_mut().scope_depth -= 1;
@ -871,23 +889,24 @@ impl Compiler {
/// compiled by pushing it to the locals. This is used to /// compiled by pushing it to the locals. This is used to
/// determine the stack offset of variables. /// determine the stack offset of variables.
fn declare_local<S: Into<String>>(&mut self, node: rnix::SyntaxNode, name: S) { fn declare_local<S: Into<String>>(&mut self, node: rnix::SyntaxNode, name: S) {
// Set up scope poisoning if required.
let name = name.into(); let name = name.into();
let mut scope = self.scope_mut(); let depth = self.scope().scope_depth;
match name.as_str() {
"true" if scope.poisoned_true == 0 => scope.poisoned_true = scope.scope_depth,
"false" if scope.poisoned_false == 0 => scope.poisoned_false = scope.scope_depth, // Do this little dance to get ahold of the *static* key and
// use it for poisoning if required.
"null" if scope.poisoned_null == 0 => scope.poisoned_null = scope.scope_depth, let key: Option<&'static str> = match self.globals.get_key_value(name.as_str()) {
Some((key, _)) => Some(*key),
_ => {} None => None,
}; };
scope.locals.push(Local { if let Some(global_ident) = key {
self.scope_mut().poison(global_ident, depth);
}
self.scope_mut().locals.push(Local {
depth,
name: name.into(), name: name.into(),
node: Some(node), node: Some(node),
depth: scope.scope_depth,
phantom: false, phantom: false,
used: false, used: false,
}); });
@ -969,6 +988,47 @@ fn normalise_ident_path<I: Iterator<Item = ast::Attr>>(path: I) -> EvalResult<Ve
path.map(attr_to_string).collect() path.map(attr_to_string).collect()
} }
/// Prepare the full set of globals from additional globals supplied
/// by the caller of the compiler, as well as the built-in globals
/// that are always part of the language.
///
/// Note that all builtin functions are *not* considered part of the
/// language in this sense and MUST be supplied as additional global
/// values, including the `builtins` set itself.
fn prepare_globals(additional: HashMap<&'static str, Value>) -> GlobalsMap {
let mut globals: GlobalsMap = HashMap::new();
globals.insert(
"true",
Rc::new(|compiler| {
compiler.chunk().push_op(OpCode::OpTrue);
}),
);
globals.insert(
"false",
Rc::new(|compiler| {
compiler.chunk().push_op(OpCode::OpFalse);
}),
);
globals.insert(
"null",
Rc::new(|compiler| {
compiler.chunk().push_op(OpCode::OpNull);
}),
);
for (ident, value) in additional.into_iter() {
globals.insert(
ident,
Rc::new(move |compiler| compiler.emit_constant(value.clone())),
);
}
globals
}
pub fn compile(expr: ast::Expr, location: Option<PathBuf>) -> EvalResult<CompilationResult> { pub fn compile(expr: ast::Expr, location: Option<PathBuf>) -> EvalResult<CompilationResult> {
let mut root_dir = match location { let mut root_dir = match location {
Some(dir) => Ok(dir), Some(dir) => Ok(dir),
@ -984,8 +1044,12 @@ pub fn compile(expr: ast::Expr, location: Option<PathBuf>) -> EvalResult<Compila
root_dir.pop(); root_dir.pop();
} }
// TODO: accept globals as an external parameter
let globals = prepare_globals(HashMap::new());
let mut c = Compiler { let mut c = Compiler {
root_dir, root_dir,
globals,
contexts: vec![LambdaCtx::new()], contexts: vec![LambdaCtx::new()],
warnings: vec![], warnings: vec![],
errors: vec![], errors: vec![],