tvl-depot/tvix/eval/src/chunk.rs

177 lines
5 KiB
Rust
Raw Normal View History

use std::io::Write;
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
use std::ops::{Index, IndexMut};
use crate::opcode::{CodeIdx, ConstantIdx, OpCode};
use crate::value::Value;
use crate::SourceCode;
/// Represents a source location from which one or more operations
/// were compiled.
///
/// The span itself is an index into a [codemap::Codemap], and the
/// structure tracks the number of operations that were yielded from
/// the same span.
///
/// At error reporting time, it becomes possible to either just fetch
/// the textual representation of that span from the codemap, or to
/// even re-parse the AST using rnix to create more semantically
/// interesting errors.
#[derive(Clone, Debug, PartialEq)]
struct SourceSpan {
/// Span into the [codemap::Codemap].
span: codemap::Span,
/// Index of the first operation covered by this span.
start: usize,
}
/// A chunk is a representation of a sequence of bytecode
/// instructions, associated constants and additional metadata as
/// emitted by the compiler.
#[derive(Debug, Default)]
pub struct Chunk {
pub code: Vec<OpCode>,
pub constants: Vec<Value>,
spans: Vec<SourceSpan>,
}
impl Index<ConstantIdx> for Chunk {
type Output = Value;
fn index(&self, index: ConstantIdx) -> &Self::Output {
&self.constants[index.0]
}
}
impl Index<CodeIdx> for Chunk {
type Output = OpCode;
fn index(&self, index: CodeIdx) -> &Self::Output {
&self.code[index.0]
}
}
feat(tvix/eval): deduplicate overlap between Closure and Thunk This commit deduplicates the Thunk-like functionality from Closure and unifies it with Thunk. Specifically, we now have one and only one way of breaking reference cycles in the Value-graph: Thunk. No other variant contains a RefCell. This should make it easier to reason about the behavior of the VM. InnerClosure and UpvaluesCarrier are no longer necessary. This refactoring allowed an improvement in code generation: `Rc<RefCell<>>`s are now created only for closures which do not have self-references or deferred upvalues, instead of for all closures. OpClosure has been split into two separate opcodes: - OpClosure creates non-recursive closures with no deferred upvalues. The VM will not create an `Rc<RefCell<>>` when executing this instruction. - OpThunkClosure is used for closures with self-references or deferred upvalues. The VM will create a Thunk when executing this opcode, but the Thunk will start out already in the `ThunkRepr::Evaluated` state, rather than in the `ThunkRepr::Suspeneded` state. To avoid confusion, OpThunk has been renamed OpThunkSuspended. Thanks to @sterni for suggesting that all this could be done without adding an additional variant to ThunkRepr. This does however mean that there will be mutating accesses to `ThunkRepr::Evaluated`, which was not previously the case. The field `is_finalised:bool` has been added to `Closure` to ensure that these mutating accesses are performed only on finalised Closures. Both the check and the field are present only if `#[cfg(debug_assertions)]`. Change-Id: I04131501029772f30e28da8281d864427685097f Signed-off-by: Adam Joseph <adam@westernsemico.com> Reviewed-on: https://cl.tvl.fyi/c/depot/+/7019 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
2022-10-16 01:10:10 +02:00
impl IndexMut<CodeIdx> for Chunk {
fn index_mut(&mut self, index: CodeIdx) -> &mut Self::Output {
&mut self.code[index.0]
}
}
impl Chunk {
pub fn push_op(&mut self, data: OpCode, span: codemap::Span) -> CodeIdx {
let idx = self.code.len();
self.code.push(data);
self.push_span(span, idx);
CodeIdx(idx)
}
/// Get the first span of a chunk, no questions asked.
pub fn first_span(&self) -> codemap::Span {
self.spans[0].span
}
/// Pop the last operation from the chunk and clean up its tracked
/// span. Used when the compiler backtracks.
pub fn pop_op(&mut self) {
// Simply drop the last op.
self.code.pop();
if let Some(span) = self.spans.last() {
// If the last span started at this op, drop it.
if span.start == self.code.len() {
self.spans.pop();
}
}
}
pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
let idx = self.constants.len();
self.constants.push(data);
ConstantIdx(idx)
}
// Span tracking implementation
fn push_span(&mut self, span: codemap::Span, start: usize) {
match self.spans.last_mut() {
// We do not need to insert the same span again, as this
// instruction was compiled from the same span as the last
// one.
Some(last) if last.span == span => {}
// In all other cases, this is a new source span.
_ => self.spans.push(SourceSpan { span, start }),
}
}
/// Retrieve the [codemap::Span] from which the instruction at
/// `offset` was compiled.
pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
let position = self
.spans
.binary_search_by(|span| span.start.cmp(&offset.0));
let span = match position {
Ok(index) => &self.spans[index],
Err(index) => {
if index == 0 {
&self.spans[0]
} else {
&self.spans[index - 1]
}
}
};
span.span
}
/// Write the disassembler representation of the operation at
/// `idx` to the specified writer.
pub fn disassemble_op<W: Write>(
&self,
writer: &mut W,
source: &SourceCode,
width: usize,
idx: CodeIdx,
) -> Result<(), std::io::Error> {
write!(writer, "{:#width$x}\t ", idx.0, width = width)?;
// Print continuation character if the previous operation was at
// the same line, otherwise print the line.
let line = source.get_line(self.get_span(idx));
if idx.0 > 0 && source.get_line(self.get_span(CodeIdx(idx.0 - 1))) == line {
write!(writer, " |\t")?;
} else {
write!(writer, "{:4}\t", line)?;
}
match self[idx] {
OpCode::OpConstant(idx) => {
let val_str = match &self[idx] {
Value::Thunk(t) => t.debug_repr(),
Value::Closure(c) => format!("closure({:p})", c.lambda),
val => format!("{}", val),
};
writeln!(writer, "OpConstant({}@{})", val_str, idx.0)
}
op => writeln!(writer, "{:?}", op),
}?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use crate::test_utils::dummy_span;
use super::*;
#[test]
fn push_op() {
let mut chunk = Chunk::default();
chunk.push_op(OpCode::OpAdd, dummy_span());
assert_eq!(chunk.code.last().unwrap(), &OpCode::OpAdd);
}
}