refactor(tvix/eval): capture entire with_stack in upvalues

This completely rewrites the handling of "dynamic upvalues" to,
instead of resolving them at thunk/closure instantiation time (which
forces some values too early), capture the entire with stack of parent
contexts if it exists.

There are a couple of things in here that could be written more
efficiently, but I'm first working through this to get to a bug
related to with + recursion and the code complexity of some of the
optimisations is distracting.

Change-Id: Ia538e06c9146e3bf8decb9adf02dd726d2c651cf
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6486
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
This commit is contained in:
Vincent Ambo 2022-09-06 23:13:48 +03:00 committed by tazjin
parent d75b207a63
commit 07ea30370e
5 changed files with 122 additions and 148 deletions

View file

@ -18,7 +18,6 @@ mod scope;
use path_clean::PathClean;
use rnix::ast::{self, AstToken, HasEntry};
use rowan::ast::{AstChildren, AstNode};
use smol_str::SmolStr;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::rc::Rc;
@ -45,6 +44,7 @@ pub struct CompilationOutput {
struct LambdaCtx {
lambda: Lambda,
scope: Scope,
captures_with_stack: bool,
}
impl LambdaCtx {
@ -52,6 +52,7 @@ impl LambdaCtx {
LambdaCtx {
lambda: Lambda::new_anonymous(),
scope: Default::default(),
captures_with_stack: false,
}
}
@ -59,6 +60,7 @@ impl LambdaCtx {
LambdaCtx {
lambda: Lambda::new_anonymous(),
scope: self.scope.inherit(),
captures_with_stack: false,
}
}
}
@ -869,32 +871,18 @@ impl Compiler<'_, '_> {
return;
}
// Even worse - are we dealing with a dynamic upvalue?
if let Some(idx) =
self.resolve_dynamic_upvalue(self.contexts.len() - 1, ident.text(), &node)
{
// Edge case: Current scope *also* has a non-empty
// `with`-stack. This means we need to resolve
// both in this scope, and in the upvalues.
if self.scope().has_with() {
self.emit_literal_ident(&node);
self.push_op(OpCode::OpResolveWithOrUpvalue(idx), &node);
return;
}
self.push_op(OpCode::OpGetUpvalue(idx), &node);
// If there is a non-empty `with`-stack (or a parent
// context with one), emit a runtime dynamic
// resolution instruction.
if self.has_dynamic_ancestor() {
self.emit_literal_ident(&node);
self.push_op(OpCode::OpResolveWith, &node);
return;
}
if !self.scope().has_with() {
self.emit_error(self.span_for(&node), ErrorKind::UnknownStaticVariable);
return;
}
// Variable needs to be dynamically resolved at
// runtime.
self.emit_literal_ident(&node);
self.push_op(OpCode::OpResolveWith, &node);
// Otherwise, this variable is missing.
self.emit_error(self.span_for(&node), ErrorKind::UnknownStaticVariable);
return;
}
LocalPosition::Known(idx) => {
@ -1078,6 +1066,12 @@ impl Compiler<'_, '_> {
// Check if tail-call optimisation is possible and perform it.
optimise_tail_call(&mut compiled.lambda.chunk);
// Capturing the with stack counts as an upvalue, as it is
// emitted as an upvalue data instruction.
if compiled.captures_with_stack {
compiled.lambda.upvalue_count += 1;
}
let lambda = Rc::new(compiled.lambda);
self.observer.observe_compiled_lambda(&lambda);
@ -1095,7 +1089,12 @@ impl Compiler<'_, '_> {
let blueprint_idx = self.chunk().push_constant(Value::Blueprint(lambda));
self.push_op(OpCode::OpClosure(blueprint_idx), &node);
self.emit_upvalue_data(outer_slot, compiled.scope.upvalues);
self.emit_upvalue_data(
outer_slot,
&node,
compiled.scope.upvalues,
compiled.captures_with_stack,
);
}
fn compile_apply(&mut self, slot: LocalIdx, node: ast::Apply) {
@ -1126,6 +1125,13 @@ impl Compiler<'_, '_> {
let mut thunk = self.contexts.pop().unwrap();
optimise_tail_call(&mut thunk.lambda.chunk);
// Capturing the with stack counts as an upvalue, as it is
// emitted as an upvalue data instruction.
if thunk.captures_with_stack {
thunk.lambda.upvalue_count += 1;
}
let lambda = Rc::new(thunk.lambda);
self.observer.observe_compiled_thunk(&lambda);
@ -1140,12 +1146,23 @@ impl Compiler<'_, '_> {
let blueprint_idx = self.chunk().push_constant(Value::Blueprint(lambda));
self.push_op(OpCode::OpThunk(blueprint_idx), node);
self.emit_upvalue_data(outer_slot, thunk.scope.upvalues);
self.emit_upvalue_data(
outer_slot,
node,
thunk.scope.upvalues,
thunk.captures_with_stack,
);
}
/// Emit the data instructions that the runtime needs to correctly
/// assemble the provided upvalues array.
fn emit_upvalue_data(&mut self, slot: LocalIdx, upvalues: Vec<Upvalue>) {
/// assemble the upvalues struct.
fn emit_upvalue_data<T: AstNode>(
&mut self,
slot: LocalIdx,
node: &T,
upvalues: Vec<Upvalue>,
capture_with: bool,
) {
let this_depth = self.scope()[slot].depth;
let this_stack_slot = self.scope().stack_index(slot);
@ -1170,16 +1187,13 @@ impl Compiler<'_, '_> {
UpvalueKind::Upvalue(idx) => {
self.push_op(OpCode::DataUpvalueIdx(idx), &upvalue.node);
}
UpvalueKind::Dynamic { name, up } => {
let idx = self.chunk().push_constant(Value::String(name.into()));
self.push_op(OpCode::DataDynamicIdx(idx), &upvalue.node);
if let Some(up) = up {
self.push_op(OpCode::DataDynamicAncestor(up), &upvalue.node);
}
}
};
}
if capture_with {
// TODO(tazjin): probably better to emit span for the ident that caused this
self.push_op(OpCode::DataCaptureWith, node);
}
}
/// Emit the literal string value of an identifier. Required for
@ -1322,58 +1336,25 @@ impl Compiler<'_, '_> {
None
}
/// If no static resolution for a potential upvalue was found,
/// finds the lowest lambda context that has a `with`-stack and
/// thread dynamic upvalues all the way through.
///
/// At runtime, as closures are being constructed they either
/// capture a dynamically available upvalue, take an upvalue from
/// their "ancestor" or leave a sentinel value on the stack.
///
/// As such an upvalue is actually accessed, an error is produced
/// when the sentinel is found. See the runtime's handling of
/// dynamic upvalues for details.
fn resolve_dynamic_upvalue(
&mut self,
at: usize,
name: &str,
node: &rnix::ast::Ident,
) -> Option<UpvalueIdx> {
if at == 0 {
// There can not be any upvalue at the outermost context.
return None;
}
/// Determine whether the current lambda context has any ancestors
/// that use dynamic scope resolution, and mark contexts as
/// needing to capture their enclosing `with`-stack in their
/// upvalues.
fn has_dynamic_ancestor(&mut self) -> bool {
let mut ancestor_has_with = false;
if let Some((lowest_idx, _)) = self
.contexts
.iter()
.enumerate()
.find(|(_, c)| c.scope.has_with())
{
// An enclosing lambda context has dynamic values. Each
// context in the chain from that point on now needs to
// capture dynamic upvalues because we can not statically
// know at which level the correct one is located.
let name = SmolStr::new(name);
let mut upvalue_idx = None;
for idx in lowest_idx..=at {
upvalue_idx = Some(self.add_upvalue(
idx,
node,
UpvalueKind::Dynamic {
name: name.clone(),
up: upvalue_idx,
},
));
for ctx in self.contexts.iter_mut() {
if ancestor_has_with {
// If the ancestor has an active with stack, mark this
// lambda context as needing to capture it.
ctx.captures_with_stack = true;
} else {
// otherwise, check this context and move on
ancestor_has_with = ctx.scope.has_with();
}
// Return the outermost upvalue index (i.e. the one of the
// current context).
return upvalue_idx;
}
None
ancestor_has_with
}
fn add_upvalue(

View file

@ -15,8 +15,6 @@ use std::{
ops::Index,
};
use smol_str::SmolStr;
use crate::opcode::{StackIdx, UpvalueIdx};
#[derive(Debug)]
@ -102,17 +100,6 @@ pub enum UpvalueKind {
/// This upvalue captures an enclosing upvalue.
Upvalue(UpvalueIdx),
/// This upvalue captures a dynamically resolved value (i.e.
/// `with`).
///
/// It stores the identifier with which to perform a dynamic
/// lookup, as well as the optional upvalue index in the enclosing
/// function (if any).
Dynamic {
name: SmolStr,
up: Option<UpvalueIdx>,
},
}
#[derive(Clone, Debug)]

View file

@ -125,6 +125,5 @@ pub enum OpCode {
DataLocalIdx(StackIdx),
DataDeferredLocal(StackIdx),
DataUpvalueIdx(UpvalueIdx),
DataDynamicIdx(ConstantIdx),
DataDynamicAncestor(UpvalueIdx),
DataCaptureWith,
}

View file

@ -16,12 +16,14 @@ use crate::{opcode::UpvalueIdx, Value};
#[derive(Clone, Debug)]
pub struct Upvalues {
upvalues: Vec<Value>,
with_stack: Option<Vec<Value>>,
}
impl Upvalues {
pub fn with_capacity(count: usize) -> Self {
Upvalues {
upvalues: Vec::with_capacity(count),
with_stack: None,
}
}
@ -29,6 +31,22 @@ impl Upvalues {
pub fn push(&mut self, value: Value) {
self.upvalues.push(value);
}
/// Set the captured with stack.
pub fn set_with_stack(&mut self, with_stack: Vec<Value>) {
self.with_stack = Some(with_stack);
}
pub fn with_stack(&self) -> Option<&Vec<Value>> {
self.with_stack.as_ref()
}
pub fn with_stack_len(&self) -> usize {
match &self.with_stack {
None => 0,
Some(stack) => stack.len(),
}
}
}
impl Index<UpvalueIdx> for Upvalues {

View file

@ -7,7 +7,7 @@ use crate::{
chunk::Chunk,
errors::{Error, ErrorKind, EvalResult},
observer::Observer,
opcode::{CodeIdx, ConstantIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx},
opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx},
upvalues::{UpvalueCarrier, Upvalues},
value::{Builtin, Closure, Lambda, NixAttrs, NixList, Thunk, Value},
};
@ -139,10 +139,6 @@ impl<'o> VM<'o> {
op
}
fn peek_op(&self) -> OpCode {
self.chunk().code[self.frame().ip]
}
fn pop(&mut self) -> Value {
self.stack.pop().expect("runtime stack empty")
}
@ -478,11 +474,6 @@ impl<'o> VM<'o> {
OpCode::OpGetUpvalue(upv_idx) => {
let value = self.frame().upvalue(upv_idx).clone();
if let Value::DynamicUpvalueMissing(name) = value {
return Err(self
.error(ErrorKind::UnknownDynamicVariable(name.as_str().to_string())));
}
self.push(value);
}
@ -552,8 +543,7 @@ impl<'o> VM<'o> {
OpCode::DataLocalIdx(_)
| OpCode::DataDeferredLocal(_)
| OpCode::DataUpvalueIdx(_)
| OpCode::DataDynamicIdx(_)
| OpCode::DataDynamicAncestor(_) => {
| OpCode::DataCaptureWith => {
panic!("VM bug: attempted to execute data-carrying operand")
}
}
@ -602,38 +592,6 @@ impl<'o> VM<'o> {
Ok(())
}
fn resolve_dynamic_upvalue(&mut self, ident_idx: ConstantIdx) -> EvalResult<Value> {
let chunk = self.chunk();
let ident = fallible!(self, chunk[ident_idx].to_str());
// Peek at the current instruction (note: IP has already
// advanced!) to see if it is actually data indicating a
// "fallback upvalue" in case the dynamic could not be
// resolved at this level.
let up = match self.peek_op() {
OpCode::DataDynamicAncestor(idx) => {
// advance ip past this data
self.inc_ip();
Some(idx)
}
_ => None,
};
match self.resolve_with(ident.as_str()) {
Ok(v) => Ok(v),
Err(Error {
kind: ErrorKind::UnknownDynamicVariable(_),
..
}) => match up {
Some(idx) => Ok(self.frame().upvalue(idx).clone()),
None => Ok(Value::DynamicUpvalueMissing(ident)),
},
Err(err) => Err(err),
}
}
/// Resolve a dynamic identifier through the with-stack at runtime.
fn resolve_with(&mut self, ident: &str) -> EvalResult<Value> {
// Iterate over the with_stack manually to avoid borrowing
@ -651,6 +609,24 @@ impl<'o> VM<'o> {
}
}
// Iterate over the captured with stack if one exists. This is
// extra tricky to do without a lot of cloning.
for idx in (0..self.frame().upvalues.with_stack_len()).rev() {
// This is safe because having an index here guarantees
// that the stack is present.
let with =
unsafe { self.frame().upvalues.with_stack().unwrap_unchecked()[idx].clone() };
if let Value::Thunk(thunk) = &with {
fallible!(self, thunk.force(self));
}
match fallible!(self, with.to_attrs()).select(ident) {
None => continue,
Some(val) => return Ok(val.clone()),
}
}
Err(self.error(ErrorKind::UnknownDynamicVariable(ident.to_string())))
}
@ -671,15 +647,28 @@ impl<'o> VM<'o> {
upvalues.push(self.frame().upvalue(upv_idx).clone());
}
OpCode::DataDynamicIdx(ident_idx) => {
let value = self.resolve_dynamic_upvalue(ident_idx)?;
upvalues.push(value);
}
OpCode::DataDeferredLocal(idx) => {
upvalues.push(Value::DeferredUpvalue(idx));
}
OpCode::DataCaptureWith => {
// Start the captured with_stack off of the
// current call frame's captured with_stack, ...
let mut captured_with_stack = self
.frame()
.upvalues
.with_stack()
.map(Clone::clone)
// ... or make an empty one if there isn't one already.
.unwrap_or_else(|| Vec::with_capacity(self.with_stack.len()));
for idx in &self.with_stack {
captured_with_stack.push(self.stack[*idx].clone());
}
upvalues.set_with_stack(captured_with_stack);
}
_ => panic!("compiler error: missing closure operand"),
}
}