fix(tvix/eval): distinguish statically between StackIdx and LocalIdx

Previously the functions in the scope module returned usize values,
which - sometimes from the same function - were either indexes into
the runtime stack *or* indexes into the compiler's local stack.

This is extremely confusing because it requires the caller to be aware
of the difference, and it actually caused subtle bugs.

To avoid this, there is now a new LocalIdx wrapper type which is used
by the scope module to return indexes into the compiler's stack, as
well as helpers for accounting for the differences between these
indexes and the runtime indexes.

Change-Id: I58f0b50ad94b28a304e3372fd9731b6590b3fdb8
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6340
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
This commit is contained in:
Vincent Ambo 2022-08-28 19:38:17 +03:00 committed by tazjin
parent 68aad6d4cf
commit 24811d7655
2 changed files with 115 additions and 94 deletions

View file

@ -25,11 +25,11 @@ use std::rc::Rc;
use crate::chunk::Chunk; use crate::chunk::Chunk;
use crate::errors::{Error, ErrorKind, EvalResult}; use crate::errors::{Error, ErrorKind, EvalResult};
use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}; use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, UpvalueIdx};
use crate::value::{Closure, Lambda, Value}; use crate::value::{Closure, Lambda, Value};
use crate::warnings::{EvalWarning, WarningKind}; use crate::warnings::{EvalWarning, WarningKind};
use self::scope::{Local, LocalPosition, Scope, Upvalue}; use self::scope::{Local, LocalIdx, LocalPosition, Scope, Upvalue};
/// Represents the result of compiling a piece of Nix code. If /// Represents the result of compiling a piece of Nix code. If
/// compilation was successful, the resulting bytecode can be passed /// compilation was successful, the resulting bytecode can be passed
@ -106,7 +106,7 @@ impl Compiler {
// Actual code-emitting AST traversal methods. // Actual code-emitting AST traversal methods.
impl Compiler { impl Compiler {
fn compile(&mut self, slot: Option<usize>, expr: ast::Expr) { fn compile(&mut self, slot: Option<LocalIdx>, expr: ast::Expr) {
match expr { match expr {
ast::Expr::Literal(literal) => self.compile_literal(literal), ast::Expr::Literal(literal) => self.compile_literal(literal),
ast::Expr::Path(path) => self.compile_path(path), ast::Expr::Path(path) => self.compile_path(path),
@ -437,7 +437,8 @@ impl Compiler {
} }
LocalPosition::Known(idx) => { LocalPosition::Known(idx) => {
self.chunk().push_op(OpCode::OpGetLocal(idx)) let stack_idx = self.scope().stack_index(idx);
self.chunk().push_op(OpCode::OpGetLocal(stack_idx))
} }
LocalPosition::Recursive(_) => { LocalPosition::Recursive(_) => {
@ -624,7 +625,7 @@ impl Compiler {
ident.syntax().clone(), ident.syntax().clone(),
ident.ident_token().unwrap().text(), ident.ident_token().unwrap().text(),
); );
self.mark_initialised(idx); self.scope_mut().mark_initialised(idx);
} }
} }
@ -637,7 +638,7 @@ impl Compiler {
ident.syntax().clone(), ident.syntax().clone(),
ident.ident_token().unwrap().text(), ident.ident_token().unwrap().text(),
); );
self.mark_initialised(idx); self.scope_mut().mark_initialised(idx);
} }
} }
} }
@ -656,7 +657,7 @@ impl Compiler {
// First pass to ensure that all identifiers are known; // First pass to ensure that all identifiers are known;
// required for resolving recursion. // required for resolving recursion.
let mut entries: Vec<(usize, rnix::ast::Expr)> = vec![]; let mut entries: Vec<(LocalIdx, rnix::ast::Expr)> = vec![];
for entry in node.attrpath_values() { for entry in node.attrpath_values() {
let mut path = match normalise_ident_path(entry.attrpath().unwrap().attrs()) { let mut path = match normalise_ident_path(entry.attrpath().unwrap().attrs()) {
Ok(p) => p, Ok(p) => p,
@ -674,23 +675,25 @@ impl Compiler {
entry.attrpath().unwrap().syntax().clone(), entry.attrpath().unwrap().syntax().clone(),
path.pop().unwrap(), path.pop().unwrap(),
); );
entries.push((idx, entry.value().unwrap())); entries.push((idx, entry.value().unwrap()));
} }
// Second pass to place the values in the correct stack slots. // Second pass to place the values in the correct stack slots.
let indices: Vec<usize> = entries.iter().map(|(idx, _)| *idx).collect(); let indices: Vec<LocalIdx> = entries.iter().map(|(idx, _)| *idx).collect();
for (idx, value) in entries.into_iter() { for (idx, value) in entries.into_iter() {
self.compile(Some(idx), value); self.compile(Some(idx), value);
// Any code after this point will observe the value in the // Any code after this point will observe the value in the
// right stack slot, so mark it as initialised. // right stack slot, so mark it as initialised.
self.mark_initialised(idx); self.scope_mut().mark_initialised(idx);
} }
// Third pass to emit finaliser instructions if necessary. // Third pass to emit finaliser instructions if necessary.
for idx in indices { for idx in indices {
if self.scope().locals[idx].needs_finaliser { if self.scope()[idx].needs_finaliser {
self.chunk().push_op(OpCode::OpFinalise(StackIdx(idx))); let stack_idx = self.scope().stack_index(idx);
self.chunk().push_op(OpCode::OpFinalise(stack_idx));
} }
} }
@ -747,7 +750,10 @@ impl Compiler {
self.chunk().push_op(OpCode::OpResolveWith) self.chunk().push_op(OpCode::OpResolveWith)
} }
LocalPosition::Known(idx) => self.chunk().push_op(OpCode::OpGetLocal(idx)), LocalPosition::Known(idx) => {
let stack_idx = self.scope().stack_index(idx);
self.chunk().push_op(OpCode::OpGetLocal(stack_idx))
}
LocalPosition::Recursive(_) => panic!("TODO: unclear if this can happen"), LocalPosition::Recursive(_) => panic!("TODO: unclear if this can happen"),
}; };
@ -762,22 +768,12 @@ impl Compiler {
// resolve that directly (thus avoiding duplication on the // resolve that directly (thus avoiding duplication on the
// stack). // stack).
self.compile(None, node.namespace().unwrap()); self.compile(None, node.namespace().unwrap());
self.declare_phantom(); let local_idx = self.scope_mut().declare_phantom();
let with_idx = self.scope().stack_index(local_idx);
self.scope_mut().push_with(); self.scope_mut().push_with();
let with_idx = self self.chunk().push_op(OpCode::OpPushWith(with_idx));
.scope()
.locals
.iter()
// Calculate the with_idx without taking into account
// uninitialised variables that are not yet in their stack
// slots.
.filter(|l| l.initialised)
.count()
- 1;
self.chunk().push_op(OpCode::OpPushWith(StackIdx(with_idx)));
self.compile(None, node.body().unwrap()); self.compile(None, node.body().unwrap());
@ -786,7 +782,7 @@ impl Compiler {
self.end_scope(); self.end_scope();
} }
fn compile_lambda(&mut self, slot: Option<usize>, node: ast::Lambda) { fn compile_lambda(&mut self, slot: Option<LocalIdx>, node: ast::Lambda) {
// Open new lambda context in compiler, which has its own // Open new lambda context in compiler, which has its own
// scope etc. // scope etc.
self.contexts.push(LambdaCtx::new()); self.contexts.push(LambdaCtx::new());
@ -805,7 +801,7 @@ impl Compiler {
.to_string(); .to_string();
let idx = self.declare_local(param.syntax().clone(), &name); let idx = self.declare_local(param.syntax().clone(), &name);
self.mark_initialised(idx); self.scope_mut().mark_initialised(idx);
} }
} }
@ -841,20 +837,23 @@ impl Compiler {
self.chunk().push_op(OpCode::OpClosure(closure_idx)); self.chunk().push_op(OpCode::OpClosure(closure_idx));
for upvalue in compiled.scope.upvalues { for upvalue in compiled.scope.upvalues {
match upvalue { match upvalue {
Upvalue::Stack(idx) if slot.is_none() => { Upvalue::Local(idx) if slot.is_none() => {
self.chunk().push_op(OpCode::DataLocalIdx(idx)); let stack_idx = self.scope().stack_index(idx);
self.chunk().push_op(OpCode::DataLocalIdx(stack_idx));
} }
Upvalue::Stack(idx) => { Upvalue::Local(idx) => {
let stack_idx = self.scope().stack_index(idx);
// If the upvalue slot is located *after* the // If the upvalue slot is located *after* the
// closure, the upvalue resolution must be // closure, the upvalue resolution must be
// deferred until the scope is fully initialised // deferred until the scope is fully initialised
// and can be finalised. // and can be finalised.
if slot.unwrap() < idx.0 { if slot.unwrap() < idx {
self.chunk().push_op(OpCode::DataDeferredLocal(idx)); self.chunk().push_op(OpCode::DataDeferredLocal(stack_idx));
self.mark_needs_finaliser(slot.unwrap()); self.scope_mut().mark_needs_finaliser(slot.unwrap());
} else { } else {
self.chunk().push_op(OpCode::DataLocalIdx(idx)); self.chunk().push_op(OpCode::DataLocalIdx(stack_idx));
} }
} }
@ -960,7 +959,7 @@ impl Compiler {
/// Declare a local variable known in the scope that is being /// Declare a local variable known in the scope that is being
/// compiled by pushing it to the locals. This is used to /// compiled by pushing it to the locals. This is used to
/// determine the stack offset of variables. /// determine the stack offset of variables.
fn declare_local<S: Into<String>>(&mut self, node: rnix::SyntaxNode, name: S) -> usize { fn declare_local<S: Into<String>>(&mut self, node: rnix::SyntaxNode, name: S) -> LocalIdx {
let name = name.into(); let name = name.into();
let depth = self.scope().scope_depth; let depth = self.scope().scope_depth;
@ -991,43 +990,7 @@ impl Compiler {
); );
} }
let idx = self.scope().locals.len(); self.scope_mut().declare_local(name, node)
self.scope_mut().locals.push(Local {
name,
depth,
initialised: false,
needs_finaliser: false,
node: Some(node),
phantom: false,
used: false,
});
idx
}
/// Declare a local variable that occupies a stack slot and should
/// be accounted for, but is not directly accessible by users
/// (e.g. attribute sets used for `with`).
fn declare_phantom(&mut self) {
let depth = self.scope().scope_depth;
self.scope_mut().locals.push(Local {
depth,
initialised: true,
needs_finaliser: false,
name: "".into(),
node: None,
phantom: true,
used: true,
});
}
/// Mark local as initialised after compiling its expression.
fn mark_initialised(&mut self, local_idx: usize) {
self.scope_mut().locals[local_idx].initialised = true;
}
/// Mark local as needing a finaliser.
fn mark_needs_finaliser(&mut self, local_idx: usize) {
self.scope_mut().locals[local_idx].needs_finaliser = true;
} }
fn resolve_upvalue(&mut self, ctx_idx: usize, name: &str) -> Option<UpvalueIdx> { fn resolve_upvalue(&mut self, ctx_idx: usize, name: &str) -> Option<UpvalueIdx> {
@ -1043,7 +1006,7 @@ impl Compiler {
// guaranteed to be placed on the stack (i.e. in the right // guaranteed to be placed on the stack (i.e. in the right
// position) *during* their runtime construction // position) *during* their runtime construction
LocalPosition::Known(idx) | LocalPosition::Recursive(idx) => { LocalPosition::Known(idx) | LocalPosition::Recursive(idx) => {
return Some(self.add_upvalue(ctx_idx, Upvalue::Stack(idx))) return Some(self.add_upvalue(ctx_idx, Upvalue::Local(idx)))
} }
LocalPosition::Unknown => { /* continue below */ } LocalPosition::Unknown => { /* continue below */ }

View file

@ -10,7 +10,10 @@
//! stack indices. To do this, the compiler simulates where locals //! stack indices. To do this, the compiler simulates where locals
//! will be at runtime using the data structures implemented here. //! will be at runtime using the data structures implemented here.
use std::collections::{hash_map, HashMap}; use std::{
collections::{hash_map, HashMap},
ops::Index,
};
use smol_str::SmolStr; use smol_str::SmolStr;
@ -56,13 +59,13 @@ pub enum LocalPosition {
/// Local is not known in this scope. /// Local is not known in this scope.
Unknown, Unknown,
/// Local is known and defined at the given stack index. /// Local is known at the given local index.
Known(StackIdx), Known(LocalIdx),
/// Local is known, but is being accessed recursively within its /// Local is known, but is being accessed recursively within its
/// own initialisation. Depending on context, this is either an /// own initialisation. Depending on context, this is either an
/// error or forcing a closure/thunk. /// error or forcing a closure/thunk.
Recursive(StackIdx), Recursive(LocalIdx),
} }
/// Represents the different ways in which upvalues can be captured in /// Represents the different ways in which upvalues can be captured in
@ -70,7 +73,7 @@ pub enum LocalPosition {
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Upvalue { pub enum Upvalue {
/// This upvalue captures a local from the stack. /// This upvalue captures a local from the stack.
Stack(StackIdx), Local(LocalIdx),
/// This upvalue captures an enclosing upvalue. /// This upvalue captures an enclosing upvalue.
Upvalue(UpvalueIdx), Upvalue(UpvalueIdx),
@ -87,6 +90,13 @@ pub enum Upvalue {
}, },
} }
/// Represents the index of a local in the scope's local array, which
/// is subtly different from its `StackIdx` (which excludes
/// uninitialised values in between).
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
pub struct LocalIdx(usize);
/// Represents a scope known during compilation, which can be resolved /// Represents a scope known during compilation, which can be resolved
/// directly to stack indices. /// directly to stack indices.
/// ///
@ -113,6 +123,14 @@ pub struct Scope {
poisoned_tokens: HashMap<&'static str, usize>, poisoned_tokens: HashMap<&'static str, usize>,
} }
impl Index<LocalIdx> for Scope {
type Output = Local;
fn index(&self, index: LocalIdx) -> &Self::Output {
&self.locals[index.0]
}
}
impl Scope { impl Scope {
/// Mark a globally defined token as poisoned. /// Mark a globally defined token as poisoned.
pub fn poison(&mut self, name: &'static str, depth: usize) { pub fn poison(&mut self, name: &'static str, depth: usize) {
@ -165,29 +183,69 @@ impl Scope {
// we know its final runtime stack position, but it is // we know its final runtime stack position, but it is
// not yet on the stack. // not yet on the stack.
if !local.initialised { if !local.initialised {
return LocalPosition::Recursive(StackIdx(idx)); return LocalPosition::Recursive(LocalIdx(idx));
} }
// This local is known, but we need to account for return LocalPosition::Known(LocalIdx(idx));
// uninitialised variables in this "initialiser
// stack".
return LocalPosition::Known(self.resolve_uninit(idx));
} }
} }
LocalPosition::Unknown LocalPosition::Unknown
} }
/// Return the "initialiser stack slot" of a value, that is the /// Declare a local variable that occupies a stack slot and should
/// stack slot of a value which might only exist during the /// be accounted for, but is not directly accessible by users
/// initialisation of another. This requires accounting for the /// (e.g. attribute sets used for `with`).
/// stack offsets of any unitialised variables. pub fn declare_phantom(&mut self) -> LocalIdx {
fn resolve_uninit(&mut self, locals_idx: usize) -> StackIdx { let idx = self.locals.len();
StackIdx( self.locals.push(Local {
self.locals[..locals_idx] depth: self.scope_depth,
.iter() initialised: true,
.filter(|local| local.initialised) needs_finaliser: false,
.count(), name: "".into(),
) node: None,
phantom: true,
used: true,
});
LocalIdx(idx)
}
/// Declare an uninitialised local variable.
pub fn declare_local(&mut self, name: String, node: rnix::SyntaxNode) -> LocalIdx {
let idx = self.locals.len();
self.locals.push(Local {
name,
depth: self.scope_depth,
initialised: false,
needs_finaliser: false,
node: Some(node),
phantom: false,
used: false,
});
LocalIdx(idx)
}
/// Mark local as initialised after compiling its expression.
pub fn mark_initialised(&mut self, idx: LocalIdx) {
self.locals[idx.0].initialised = true;
}
/// Mark local as needing a finaliser.
pub fn mark_needs_finaliser(&mut self, idx: LocalIdx) {
self.locals[idx.0].needs_finaliser = true;
}
/// Compute the runtime stack index for a given local by
/// accounting for uninitialised variables at scopes below this
/// one.
pub fn stack_index(&self, idx: LocalIdx) -> StackIdx {
let uninitialised_count = self.locals[..(idx.0)]
.iter()
.filter(|l| !l.initialised && self[idx].above(l.depth))
.count();
StackIdx(idx.0 - uninitialised_count)
} }
} }