tvl-depot/tvix/eval/src/opcode.rs

187 lines
5.8 KiB
Rust
Raw Normal View History

//! This module implements the instruction set running on the abstract
//! machine implemented by tvix.
use std::ops::{AddAssign, Sub};
/// Index of a constant in the current code chunk.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ConstantIdx(pub usize);
/// Index of an instruction in the current code chunk.
#[repr(transparent)]
#[derive(Clone, Copy, Debug)]
pub struct CodeIdx(pub usize);
impl AddAssign<usize> for CodeIdx {
fn add_assign(&mut self, rhs: usize) {
*self = CodeIdx(self.0 + rhs)
}
}
impl Sub<usize> for CodeIdx {
type Output = Self;
fn sub(self, rhs: usize) -> Self::Output {
CodeIdx(self.0 - rhs)
}
}
/// Index of a value in the runtime stack. This is an offset
/// *relative to* the VM value stack_base of the CallFrame
/// containing the opcode which contains this StackIdx.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
pub struct StackIdx(pub usize);
/// Index of an upvalue within a closure's bound-variable upvalue
/// list. This is an absolute index into the Upvalues of the
/// CallFrame containing the opcode which contains this UpvalueIdx.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct UpvalueIdx(pub usize);
/// Offset by which an instruction pointer should change in a jump.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct JumpOffset(pub usize);
/// Provided count for an instruction (could represent e.g. a number
/// of elements).
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Count(pub usize);
/// All variants of this enum carry a bounded amount of data to
/// ensure that no heap allocations are needed for an Opcode.
#[warn(variant_size_differences)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OpCode {
/// Push a constant onto the stack.
OpConstant(ConstantIdx),
/// Discard a value from the stack.
OpPop,
// Unary operators
OpInvert,
OpNegate,
// Arithmetic binary operators
OpAdd,
OpSub,
OpMul,
OpDiv,
// Comparison operators
OpEqual,
OpLess,
OpLessOrEq,
OpMore,
OpMoreOrEq,
// Logical operators & generic jumps
OpJump(JumpOffset),
OpJumpIfTrue(JumpOffset),
OpJumpIfFalse(JumpOffset),
OpJumpIfNotFound(JumpOffset),
// Attribute sets
/// Construct an attribute set from the given number of key-value pairs on the top of the stack
///
/// Note that this takes the count of *pairs*, not the number of *stack values* - the actual
/// number of values popped off the stack will be twice the argument to this op
OpAttrs(Count),
OpAttrsUpdate,
OpAttrsSelect,
OpAttrsTrySelect,
OpHasAttr,
/// Throw an error if the attribute set at the top of the stack has any attributes
/// other than those listed in the formals of the current lambda
///
/// Panics if the current frame is not a lambda with formals
OpValidateClosedFormals,
// `with`-handling
OpPushWith(StackIdx),
OpPopWith,
OpResolveWith,
// Lists
OpList(Count),
OpConcat,
// Strings
OpInterpolate(Count),
/// Force the Value on the stack and coerce it to a string, always using
/// `CoercionKind::Weak`.
OpCoerceToString,
// Paths
/// Attempt to resolve the Value on the stack using the configured [`NixSearchPath`][]
///
/// [`NixSearchPath`]: crate::nix_search_path::NixSearchPath
OpFindFile,
/// Attempt to resolve a path literal relative to the home dir
OpResolveHomePath,
// Type assertion operators
OpAssertBool,
/// Access local identifiers with statically known positions.
OpGetLocal(StackIdx),
/// Close scopes while leaving their expression value around.
OpCloseScope(Count), // number of locals to pop
/// Return an error indicating that an `assert` failed
OpAssertFail,
// Lambdas & closures
OpCall,
OpGetUpvalue(UpvalueIdx),
/// A Closure which has upvalues but no self-references
OpClosure(ConstantIdx),
/// A Closure which has self-references (direct or via upvalues)
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
OpThunkClosure(ConstantIdx),
/// A suspended thunk, used to ensure laziness
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
OpThunkSuspended(ConstantIdx),
OpForce,
/// Finalise initialisation of the upvalues of the value in the given stack
/// index (which must be a Value::Thunk) after the scope is fully bound.
OpFinalise(StackIdx),
refactor(tvix/eval): flatten call stack of VM using generators Warning: This is probably the biggest refactor in tvix-eval history, so far. This replaces all instances of trampolines and recursion during evaluation of the VM loop with generators. A generator is an asynchronous function that can be suspended to yield a message (in our case, vm::generators::GeneratorRequest) and receive a response (vm::generators::GeneratorResponsee). The `genawaiter` crate provides an interpreter for generators that can drive their execution and lets us move control flow between the VM and suspended generators. To do this, massive changes have occured basically everywhere in the code. On a high-level: 1. The VM is now organised around a frame stack. A frame is either a call frame (execution of Tvix bytecode) or a generator frame (a running or suspended generator). The VM has an outer loop that pops a frame off the frame stack, and then enters an inner loop either driving the execution of the bytecode or the execution of a generator. Both types of frames have several branches that can result in the frame re-enqueuing itself, and enqueuing some other work (in the form of a different frame) on top of itself. The VM will eventually resume the frame when everything "above" it has been suspended. In this way, the VM's new frame stack takes over much of the work that was previously achieved by recursion. 2. All methods previously taking a VM have been refactored into async functions that instead emit/receive generator messages for communication with the VM. Notably, this includes *all* builtins. This has had some other effects: - Some test have been removed or commented out, either because they tested code that was mostly already dead (nix_eq) or because they now require generator scaffolding which we do not have in place for tests (yet). - Because generator functions are technically async (though no async IO is involved), we lose the ability to use much of the Rust standard library e.g. in builtins. This has led to many algorithms being unrolled into iterative versions instead of iterator combinations, and things like sorting had to be implemented from scratch. - Many call sites that previously saw a `Result<..., ErrorKind>` bubble up now only see the result value, as the error handling is encapsulated within the generator loop. This reduces number of places inside of builtin implementations where error context can be attached to calls that can fail. Currently what we gain in this tradeoff is significantly more detailed span information (which we still need to bubble up, this commit does not change the error display). We'll need to do some analysis later of how useful the errors turn out to be and potentially introduce some methods for attaching context to a generator frame again. This change is very difficult to do in stages, as it is very much an "all or nothing" change that affects huge parts of the codebase. I've tried to isolate changes that can be isolated into the parent CLs of this one, but this change is still quite difficult to wrap one's mind and I'm available to discuss it and explain things to any reviewer. Fixes: b/238, b/237, b/251 and potentially others. Change-Id: I39244163ff5bbecd169fe7b274df19262b515699 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8104 Reviewed-by: raitobezarius <tvl@lahfa.xyz> Reviewed-by: Adam Joseph <adam@westernsemico.com> Tested-by: BuildkiteCI
2023-02-14 13:02:39 +01:00
/// Final instruction emitted in a chunk. Does not have an
/// inherent effect, but can simplify VM logic as a marker in some
/// cases.
///
/// Can be thought of as "returning" the value to the parent
/// frame, hence the name.
OpReturn,
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
// [`OpClosure`], [`OpThunkSuspended`], and [`OpThunkClosure`] have a
// variable number of arguments to the instruction, which is
// represented here by making their data part of the opcodes.
// Each of these two opcodes has a `ConstantIdx`, which must
// reference a `Value::Blueprint(Lambda)`. The `upvalue_count`
// field in that `Lambda` indicates the number of arguments it
// takes, and the opcode must be followed by exactly this number
// of `Data*` opcodes. The VM skips over these by advancing the
// instruction pointer.
//
// It is illegal for a `Data*` opcode to appear anywhere else.
/// Populate a static upvalue by copying from the stack immediately.
DataStackIdx(StackIdx),
/// Populate a static upvalue of a thunk by copying it the stack, but do
/// when the thunk is finalised (by OpFinalise) rather than immediately.
DataDeferredLocal(StackIdx),
/// Populate a static upvalue by copying it from the upvalues of an
/// enclosing scope.
DataUpvalueIdx(UpvalueIdx),
/// Populate dynamic upvalues by saving a copy of the with-stack.
DataCaptureWith,
}