tvl-depot/tvix/eval/src/opcode.rs

286 lines
9.4 KiB
Rust
Raw Normal View History

//! This module implements the instruction set running on the abstract
//! machine implemented by tvix.
use std::ops::{AddAssign, Sub};
/// Index of a constant in the current code chunk.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct ConstantIdx(pub usize);
/// Index of an instruction in the current code chunk.
#[repr(transparent)]
#[derive(Clone, Copy, Debug)]
pub struct CodeIdx(pub usize);
impl AddAssign<usize> for CodeIdx {
fn add_assign(&mut self, rhs: usize) {
*self = CodeIdx(self.0 + rhs)
}
}
impl Sub<usize> for CodeIdx {
type Output = Self;
fn sub(self, rhs: usize) -> Self::Output {
CodeIdx(self.0 - rhs)
}
}
/// Index of a value in the runtime stack. This is an offset
/// *relative to* the VM value stack_base of the CallFrame
/// containing the opcode which contains this StackIdx.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
pub struct StackIdx(pub usize);
/// Index of an upvalue within a closure's bound-variable upvalue
/// list. This is an absolute index into the Upvalues of the
/// CallFrame containing the opcode which contains this UpvalueIdx.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct UpvalueIdx(pub usize);
/// Offset by which an instruction pointer should change in a jump.
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct JumpOffset(pub usize);
/// Provided count for an instruction (could represent e.g. a number
/// of elements).
#[repr(transparent)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Count(pub usize);
/// All variants of this enum carry a bounded amount of data to
/// ensure that no heap allocations are needed for an Opcode.
///
/// In documentation comments, stack positions are referred to by
/// indices written in `{}` as such, where required:
///
/// ```notrust
/// --- top of the stack
/// /
/// v
/// [ ... | 3 | 2 | 1 | 0 ]
/// ^
/// /
/// 2 values deep ---
/// ```
///
/// Unless otherwise specified, operations leave their result at the
/// top of the stack.
#[warn(variant_size_differences)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum OpCode {
/// Push a constant onto the stack.
OpConstant(ConstantIdx),
// Unary operators
/// Discard a value from the stack.
OpPop,
/// Invert the boolean at the top of the stack.
OpInvert,
// Binary operators
/// Invert the sign of the number at the top of the stack.
OpNegate,
/// Sum up the two numbers at the top of the stack.
OpAdd,
/// Subtract the number at {1} from the number at {2}.
OpSub,
/// Multiply the two numbers at the top of the stack.
OpMul,
/// Divide the two numbers at the top of the stack.
OpDiv,
// Comparison operators
/// Check the two values at the top of the stack for Nix-equality.
OpEqual,
/// Check whether the value at {2} is less than {1}.
OpLess,
/// Check whether the value at {2} is less than or equal to {1}.
OpLessOrEq,
/// Check whether the value at {2} is greater than {1}.
OpMore,
/// Check whether the value at {2} is greater than or equal to {1}.
OpMoreOrEq,
// Logical operators & generic jumps
/// Jump forward in the bytecode specified by the number of
/// instructions in its usize operand.
OpJump(JumpOffset),
/// Jump forward in the bytecode specified by the number of
/// instructions in its usize operand, *if* the value at the top
/// of the stack is `true`.
OpJumpIfTrue(JumpOffset),
/// Jump forward in the bytecode specified by the number of
/// instructions in its usize operand, *if* the value at the top
/// of the stack is `false`.
OpJumpIfFalse(JumpOffset),
/// Pop one stack item and jump forward in the bytecode
/// specified by the number of instructions in its usize
/// operand, *if* the value at the top of the stack is a
/// Value::Catchable.
OpJumpIfCatchable(JumpOffset),
/// Jump forward in the bytecode specified by the number of
/// instructions in its usize operand, *if* the value at the top
/// of the stack is the internal value representing a missing
/// attribute set key.
OpJumpIfNotFound(JumpOffset),
/// Jump forward in the bytecode specified by the number of
/// instructions in its usize operand, *if* the value at the top
/// of the stack is *not* the internal value requesting a
/// stack value finalisation.
fix(tvix/eval): only finalise formal arguments if defaulting When dealing with a formal argument in a function argument pattern that has a default expression, there are two different things that can happen at runtime: Either we select its value from the passed attribute successfully or we need to use the default expression. Both of these may be thunks and both of these may need finalisers. However, in the former case this is taken care of elsewhere, the value will always be finalised already if necessary. In the latter case we may need to finalise the thunk resulting from the default expression. However, the thunk corresponding to the expression may never end up in the local's stack slot. Since finalisation goes by stack slot (and not constants), we need to prevent a case where we don't fall back to the default expression, but finalise anyways. Previously, we worked around this by making `OpFinalise` ignore non-thunks. Since finalisation of already evaluated thunks still crashed, the faulty compilation of function pattern arguments could still cause a crash. As a new approach, we reinstate the old behavior of `OpFinalise` to crash whenever encountering something that is either not a thunk or doesn't need finalisation. This can also help catching (similar) miscompilations in the future. To then prevent the crash, we need to track whether we have fallen back or not at runtime. This is done using an additional phantom on the stack that holds a new `FinaliseRequest` value. When it comes to finalisation we check this value and conditionally execute `OpFinalise` based on its value. Resolves b/261 and b/265 (partially). Change-Id: Ic04fb80ec671a2ba11fa645090769c335fb7f58b Reviewed-on: https://cl.tvl.fyi/c/depot/+/8705 Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI Autosubmit: sterni <sternenseemann@systemli.org>
2023-06-03 02:10:31 +02:00
OpJumpIfNoFinaliseRequest(JumpOffset),
// Attribute sets
/// Construct an attribute set from the given number of key-value pairs on the top of the stack
///
/// Note that this takes the count of *pairs*, not the number of *stack values* - the actual
/// number of values popped off the stack will be twice the argument to this op
OpAttrs(Count),
/// Merge the attribute set at {2} into the attribute set at {1},
/// and leave the new set at the top of the stack.
OpAttrsUpdate,
/// Select the attribute with the name at {1} from the set at {2}.
OpAttrsSelect,
/// Select the attribute with the name at {1} from the set at {2}, but leave
/// a `Value::AttrNotFound` in the stack instead of failing if it is
/// missing.
OpAttrsTrySelect,
/// Check for the presence of the attribute with the name at {1} in the set
/// at {2}.
OpHasAttr,
/// Throw an error if the attribute set at the top of the stack has any attributes
/// other than those listed in the formals of the current lambda
///
/// Panics if the current frame is not a lambda with formals
OpValidateClosedFormals,
// `with`-handling
/// Push a value onto the runtime `with`-stack to enable dynamic identifier
/// resolution. The absolute stack index of the value is supplied as a usize
/// operand.
OpPushWith(StackIdx),
/// Pop the last runtime `with`-stack element.
OpPopWith,
/// Dynamically resolve an identifier with the name at {1} from the runtime
/// `with`-stack.
OpResolveWith,
// Lists
/// Construct a list from the given number of values at the top of the
/// stack.
OpList(Count),
/// Concatenate the lists at {2} and {1}.
OpConcat,
// Strings
/// Interpolate the given number of string fragments into a single string.
OpInterpolate(Count),
/// Force the Value on the stack and coerce it to a string, always using
/// `CoercionKind::Weak`.
OpCoerceToString,
// Paths
/// Attempt to resolve the Value on the stack using the configured [`NixSearchPath`][]
///
/// [`NixSearchPath`]: crate::nix_search_path::NixSearchPath
OpFindFile,
/// Attempt to resolve a path literal relative to the home dir
OpResolveHomePath,
// Type assertion operators
/// Assert that the value at {1} is a boolean, and fail with a runtime error
/// otherwise.
OpAssertBool,
OpAssertAttrs,
/// Access local identifiers with statically known positions.
OpGetLocal(StackIdx),
/// Close scopes while leaving their expression value around.
OpCloseScope(Count), // number of locals to pop
/// Return an error indicating that an `assert` failed
OpAssertFail,
// Lambdas & closures
/// Call the value at {1} in a new VM callframe
OpCall,
/// Retrieve the upvalue at the given index from the closure or thunk
/// currently under evaluation.
OpGetUpvalue(UpvalueIdx),
/// Construct a closure which has upvalues but no self-references
OpClosure(ConstantIdx),
/// Construct a closure which has self-references (direct or via upvalues)
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
OpThunkClosure(ConstantIdx),
/// Construct a suspended thunk, used to delay a computation for laziness.
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
OpThunkSuspended(ConstantIdx),
/// Force the value at {1} until it is a `Thunk::Evaluated`.
OpForce,
/// Finalise initialisation of the upvalues of the value in the given stack
/// index (which must be a Value::Thunk) after the scope is fully bound.
OpFinalise(StackIdx),
refactor(tvix/eval): flatten call stack of VM using generators Warning: This is probably the biggest refactor in tvix-eval history, so far. This replaces all instances of trampolines and recursion during evaluation of the VM loop with generators. A generator is an asynchronous function that can be suspended to yield a message (in our case, vm::generators::GeneratorRequest) and receive a response (vm::generators::GeneratorResponsee). The `genawaiter` crate provides an interpreter for generators that can drive their execution and lets us move control flow between the VM and suspended generators. To do this, massive changes have occured basically everywhere in the code. On a high-level: 1. The VM is now organised around a frame stack. A frame is either a call frame (execution of Tvix bytecode) or a generator frame (a running or suspended generator). The VM has an outer loop that pops a frame off the frame stack, and then enters an inner loop either driving the execution of the bytecode or the execution of a generator. Both types of frames have several branches that can result in the frame re-enqueuing itself, and enqueuing some other work (in the form of a different frame) on top of itself. The VM will eventually resume the frame when everything "above" it has been suspended. In this way, the VM's new frame stack takes over much of the work that was previously achieved by recursion. 2. All methods previously taking a VM have been refactored into async functions that instead emit/receive generator messages for communication with the VM. Notably, this includes *all* builtins. This has had some other effects: - Some test have been removed or commented out, either because they tested code that was mostly already dead (nix_eq) or because they now require generator scaffolding which we do not have in place for tests (yet). - Because generator functions are technically async (though no async IO is involved), we lose the ability to use much of the Rust standard library e.g. in builtins. This has led to many algorithms being unrolled into iterative versions instead of iterator combinations, and things like sorting had to be implemented from scratch. - Many call sites that previously saw a `Result<..., ErrorKind>` bubble up now only see the result value, as the error handling is encapsulated within the generator loop. This reduces number of places inside of builtin implementations where error context can be attached to calls that can fail. Currently what we gain in this tradeoff is significantly more detailed span information (which we still need to bubble up, this commit does not change the error display). We'll need to do some analysis later of how useful the errors turn out to be and potentially introduce some methods for attaching context to a generator frame again. This change is very difficult to do in stages, as it is very much an "all or nothing" change that affects huge parts of the codebase. I've tried to isolate changes that can be isolated into the parent CLs of this one, but this change is still quite difficult to wrap one's mind and I'm available to discuss it and explain things to any reviewer. Fixes: b/238, b/237, b/251 and potentially others. Change-Id: I39244163ff5bbecd169fe7b274df19262b515699 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8104 Reviewed-by: raitobezarius <tvl@lahfa.xyz> Reviewed-by: Adam Joseph <adam@westernsemico.com> Tested-by: BuildkiteCI
2023-02-14 13:02:39 +01:00
/// Final instruction emitted in a chunk. Does not have an
/// inherent effect, but can simplify VM logic as a marker in some
/// cases.
///
/// Can be thought of as "returning" the value to the parent
/// frame, hence the name.
OpReturn,
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
// [`OpClosure`], [`OpThunkSuspended`], and [`OpThunkClosure`] have a
// variable number of arguments to the instruction, which is
// represented here by making their data part of the opcodes.
// Each of these two opcodes has a `ConstantIdx`, which must
// reference a `Value::Blueprint(Lambda)`. The `upvalue_count`
// field in that `Lambda` indicates the number of arguments it
// takes, and the opcode must be followed by exactly this number
// of `Data*` opcodes. The VM skips over these by advancing the
// instruction pointer.
//
// It is illegal for a `Data*` opcode to appear anywhere else.
/// Populate a static upvalue by copying from the stack immediately.
DataStackIdx(StackIdx),
/// Populate a static upvalue of a thunk by copying it the stack, but do
/// when the thunk is finalised (by OpFinalise) rather than immediately.
DataDeferredLocal(StackIdx),
/// Populate a static upvalue by copying it from the upvalues of an
/// enclosing scope.
DataUpvalueIdx(UpvalueIdx),
/// Populate dynamic upvalues by saving a copy of the with-stack.
DataCaptureWith,
}