tvl-depot/tvix/eval/src/chunk.rs

use crate::opcode::{CodeIdx, ConstantIdx, Op, OpArg};
use crate::value::Value;
use crate::{CoercionKind, SourceCode};
use std::io::Write;

/// Maximum size of a u64 encoded in the vu128 varint encoding.
const U64_VARINT_SIZE: usize = 9;

/// Represents a source location from which one or more operations
/// were compiled.
///
/// The span itself is an index into a [codemap::CodeMap], and the
/// structure tracks the number of operations that were yielded from
/// the same span.
///
/// At error reporting time, it becomes possible to either just fetch
/// the textual representation of that span from the codemap, or to
/// even re-parse the AST using rnix to create more semantically
/// interesting errors.
#[derive(Clone, Debug, PartialEq)]
struct SourceSpan {
    /// Span into the [codemap::CodeMap].
    span: codemap::Span,

    /// Index of the first operation covered by this span.
    start: usize,
}

/// A chunk is a representation of a sequence of bytecode
/// instructions, associated constants and additional metadata as
/// emitted by the compiler.
#[derive(Debug, Default)]
pub struct Chunk {
    pub code: Vec<u8>,
    pub constants: Vec<Value>,
    spans: Vec<SourceSpan>,

    /// Index of the last operation (i.e. not data) written to the code vector.
    /// Some operations (e.g. jump patching) need to know this.
    last_op: usize,
}

impl Chunk {
    pub fn push_op(&mut self, op: Op, span: codemap::Span) -> usize {
        self.last_op = self.code.len();
        self.code.push(op as u8);
        self.push_span(span, self.last_op);
        self.last_op
    }

    pub fn push_uvarint(&mut self, data: u64) {
        let mut encoded = [0u8; U64_VARINT_SIZE];
        let bytes_written = vu128::encode_u64(&mut encoded, data);
        self.code.extend_from_slice(&encoded[..bytes_written]);
    }

    pub fn read_uvarint(&self, idx: usize) -> (u64, usize) {
        debug_assert!(
            idx < self.code.len(),
            "invalid bytecode (missing varint operand)",
        );

        if self.code.len() - idx >= U64_VARINT_SIZE {
            vu128::decode_u64(
                &self.code[idx..idx + U64_VARINT_SIZE]
                    .try_into()
                    .expect("size statically checked"),
            )
        } else {
            let mut tmp = [0u8; U64_VARINT_SIZE];
            tmp[..self.code.len() - idx].copy_from_slice(&self.code[idx..]);
            vu128::decode_u64(&tmp)
        }
    }

    pub fn push_u16(&mut self, data: u16) {
        self.code.extend_from_slice(&data.to_le_bytes())
    }

    /// Patches the argument to the jump operand of the jump at the given index
    /// to point to the *next* instruction that will be emitted.
    pub fn patch_jump(&mut self, idx: usize) {
        let offset = (self.code.len() - idx - /* arg idx = */ 1 - /* jump arg size = */ 2) as u16;
        self.code[idx + 1..idx + 3].copy_from_slice(&offset.to_le_bytes())
    }

    pub fn read_u16(&self, idx: usize) -> u16 {
        if idx + 2 > self.code.len() {
            panic!("Tvix bug: invalid bytecode (expected u16 operand not found)")
        }

        let byte_array: &[u8; 2] = &self.code[idx..idx + 2]
            .try_into()
            .expect("fixed-size slice can not fail to convert to array");

        u16::from_le_bytes(*byte_array)
    }

    /// Get the first span of a chunk, no questions asked.
    pub fn first_span(&self) -> codemap::Span {
        self.spans[0].span
    }

    /// Return the last op in the chunk together with its index, if any.
    pub fn last_op(&self) -> Option<(Op, usize)> {
        if self.code.is_empty() {
            return None;
        }

        Some((self.code[self.last_op].into(), self.last_op))
    }

    pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
        let idx = self.constants.len();
        self.constants.push(data);
        ConstantIdx(idx)
    }

    /// Return a reference to the constant at the given [`ConstantIdx`]
    pub fn get_constant(&self, constant: ConstantIdx) -> Option<&Value> {
        self.constants.get(constant.0)
    }

    fn push_span(&mut self, span: codemap::Span, start: usize) {
        match self.spans.last_mut() {
            // We do not need to insert the same span again, as this
            // instruction was compiled from the same span as the last
            // one.
            Some(last) if last.span == span => {}

            // In all other cases, this is a new source span.
            _ => self.spans.push(SourceSpan { span, start }),
        }
    }

    /// Retrieve the [codemap::Span] from which the instruction at
    /// `offset` was compiled.
    pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
        let position = self
            .spans
            .binary_search_by(|span| span.start.cmp(&offset.0));

        let span = match position {
            Ok(index) => &self.spans[index],
            Err(index) => {
                if index == 0 {
                    &self.spans[0]
                } else {
                    &self.spans[index - 1]
                }
            }
        };

        span.span
    }

    /// Write the disassembler representation of the operation at
    /// `idx` to the specified writer, and return how many bytes in the code to
    /// skip for the next op.
    pub fn disassemble_op<W: Write>(
        &self,
        writer: &mut W,
        source: &SourceCode,
        width: usize,
        idx: CodeIdx,
    ) -> Result<usize, std::io::Error> {
        write!(writer, "{:#width$x}\t ", idx.0, width = width)?;

        // Print continuation character if the previous operation was at
        // the same line, otherwise print the line.
        let line = source.get_line(self.get_span(idx));
        if idx.0 > 0 && source.get_line(self.get_span(idx - 1)) == line {
            write!(writer, "   |\t")?;
        } else {
            write!(writer, "{:4}\t", line)?;
        }

        let _fmt_constant = |idx: ConstantIdx| match &self.constants[idx.0] {
            Value::Thunk(t) => t.debug_repr(),
            Value::Closure(c) => format!("closure({:p})", c.lambda),
            Value::Blueprint(b) => format!("blueprint({:p})", b),
            val => format!("{}", val),
        };

        let op: Op = self.code[idx.0].into();

        match op.arg_type() {
            OpArg::None => {
                writeln!(writer, "Op{:?}", op)?;
                Ok(1)
            }

            OpArg::Fixed => {
                let arg = self.read_u16(idx.0 + 1);
                writeln!(writer, "Op{:?}({})", op, arg)?;
                Ok(3)
            }

            OpArg::Uvarint => {
                let (arg, size) = self.read_uvarint(idx.0 + 1);
                writeln!(writer, "Op{:?}({})", op, arg)?;
                Ok(1 + size)
            }

            _ => match op {
                Op::CoerceToString => {
                    let kind: CoercionKind = self.code[idx.0 + 1].into();
                    writeln!(writer, "Op{:?}({:?})", op, kind)?;
                    Ok(2)
                }

                Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => {
                    let mut cidx = idx.0 + 1;

                    let (bp_idx, size) = self.read_uvarint(cidx);
                    cidx += size;

                    let (packed_count, size) = self.read_uvarint(cidx);
                    cidx += size;

                    let captures_with = packed_count & 0b1 == 1;
                    let count = packed_count >> 1;

                    write!(writer, "Op{:?}(BP @ {}, ", op, bp_idx)?;
                    if captures_with {
                        write!(writer, "captures with, ")?;
                    }
                    writeln!(writer, "{} upvalues)", count)?;

                    for _ in 0..count {
                        let (_, size) = self.read_uvarint(cidx);
                        cidx += size;
                    }

                    Ok(cidx - idx.0)
                }
                _ => panic!("Tvix bug: don't know how to format argument for Op{:?}", op),
            },
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_utils::dummy_span;

    // Note: These tests are about the functionality of the `Chunk` type, the
    // opcodes used below do *not* represent valid, executable Tvix code (and
    // don't need to).

    #[test]
    fn push_op() {
        let mut chunk = Chunk::default();
        let idx = chunk.push_op(Op::Add, dummy_span());
        assert_eq!(*chunk.code.last().unwrap(), Op::Add as u8);
        assert_eq!(chunk.code[idx], Op::Add as u8);
    }

    #[test]
    fn push_op_with_arg() {
        let mut chunk = Chunk::default();
        let mut idx = chunk.push_op(Op::Constant, dummy_span());
        chunk.push_uvarint(42);

        assert_eq!(chunk.code[idx], Op::Constant as u8);

        idx += 1;
        let (arg, size) = chunk.read_uvarint(idx);
        assert_eq!(idx + size, chunk.code.len());
        assert_eq!(arg, 42);
    }

    #[test]
    fn push_jump() {
        let mut chunk = Chunk::default();

        chunk.push_op(Op::Constant, dummy_span());
        chunk.push_uvarint(0);

        let idx = chunk.push_op(Op::Jump, dummy_span());
        chunk.push_u16(0);

        chunk.push_op(Op::Constant, dummy_span());
        chunk.push_uvarint(1);

        chunk.patch_jump(idx);
        chunk.push_op(Op::Return, dummy_span());

        #[rustfmt::skip]
        let expected: Vec<u8> = vec![
            Op::Constant as u8, 0,
            Op::Jump as u8, 2, 0,
            Op::Constant as u8, 1,
            Op::Return as u8,
        ];

        assert_eq!(chunk.code, expected);
    }
}