diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 558f6bf96..d99936e80 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -4935,6 +4935,7 @@ dependencies = [ "test-strategy", "toml 0.6.0", "tvix-eval-builtin-macros", + "vu128", ] [[package]] @@ -5326,6 +5327,12 @@ dependencies = [ "quote", ] +[[package]] +name = "vu128" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c51a178c8f3f425d86542b14f3dce9e16e86bb86328e2293745e6744ebd62e11" + [[package]] name = "wait-timeout" version = "0.2.0" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 3fadfa05e..002e59c08 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -16327,6 +16327,10 @@ rec { packageId = "tvix-eval-builtin-macros"; rename = "builtin-macros"; } + { + name = "vu128"; + packageId = "vu128"; + } ]; devDependencies = [ { @@ -17586,6 +17590,17 @@ rec { } ]; + }; + "vu128" = rec { + crateName = "vu128"; + version = "1.0.0"; + edition = "2018"; + sha256 = "049fsvml8rsyfj9j53ijhsxqcvp1x7fg651baj35shiziy61f6n5"; + libPath = "vu128/vu128.rs"; + authors = [ + "John Millikin " + ]; + }; "wait-timeout" = rec { crateName = "wait-timeout"; diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 70a356376..f8ed80072 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -36,6 +36,7 @@ md-5 = "0.10.6" data-encoding = "2.6.0" rustc-hash = "2.0.0" nohash-hasher = "0.2.0" +vu128 = "1.0.0" [dev-dependencies] criterion = "0.5" diff --git a/tvix/eval/src/chunk.rs b/tvix/eval/src/chunk.rs index e9c65db72..2a5446a78 100644 --- a/tvix/eval/src/chunk.rs +++ b/tvix/eval/src/chunk.rs @@ -1,9 +1,10 @@ -use std::io::Write; -use std::ops::{Index, IndexMut}; - -use crate::opcode::{CodeIdx, ConstantIdx, OpCode}; +use crate::opcode::{CodeIdx, ConstantIdx, Op, OpArg}; use crate::value::Value; -use crate::SourceCode; +use crate::{CoercionKind, SourceCode}; +use std::io::Write; + +/// Maximum size of a u64 encoded in the vu128 varint encoding. +const U64_VARINT_SIZE: usize = 9; /// Represents a source location from which one or more operations /// were compiled. @@ -30,39 +31,69 @@ struct SourceSpan { /// emitted by the compiler. #[derive(Debug, Default)] pub struct Chunk { - pub code: Vec, + pub code: Vec, pub constants: Vec, spans: Vec, -} -impl Index for Chunk { - type Output = Value; - - fn index(&self, index: ConstantIdx) -> &Self::Output { - &self.constants[index.0] - } -} - -impl Index for Chunk { - type Output = OpCode; - - fn index(&self, index: CodeIdx) -> &Self::Output { - &self.code[index.0] - } -} - -impl IndexMut for Chunk { - fn index_mut(&mut self, index: CodeIdx) -> &mut Self::Output { - &mut self.code[index.0] - } + /// Index of the last operation (i.e. not data) written to the code vector. + /// Some operations (e.g. jump patching) need to know this. + last_op: usize, } impl Chunk { - pub fn push_op(&mut self, data: OpCode, span: codemap::Span) -> CodeIdx { - let idx = self.code.len(); - self.code.push(data); - self.push_span(span, idx); - CodeIdx(idx) + pub fn push_op(&mut self, op: Op, span: codemap::Span) -> usize { + self.last_op = self.code.len(); + self.code.push(op as u8); + self.push_span(span, self.last_op); + self.last_op + } + + pub fn push_uvarint(&mut self, data: u64) { + let mut encoded = [0u8; U64_VARINT_SIZE]; + let bytes_written = vu128::encode_u64(&mut encoded, data); + self.code.extend_from_slice(&encoded[..bytes_written]); + } + + pub fn read_uvarint(&self, idx: usize) -> (u64, usize) { + debug_assert!( + idx < self.code.len(), + "invalid bytecode (missing varint operand)", + ); + + if self.code.len() - idx >= U64_VARINT_SIZE { + vu128::decode_u64( + &self.code[idx..idx + U64_VARINT_SIZE] + .try_into() + .expect("size statically checked"), + ) + } else { + let mut tmp = [0u8; U64_VARINT_SIZE]; + tmp[..self.code.len() - idx].copy_from_slice(&self.code[idx..]); + vu128::decode_u64(&tmp) + } + } + + pub fn push_u16(&mut self, data: u16) { + self.code.extend_from_slice(&data.to_le_bytes()) + } + + /// Patches the argument to the jump operand of the jump at the given index + /// to point to the *next* instruction that will be emitted. + pub fn patch_jump(&mut self, idx: usize) { + let offset = (self.code.len() - idx - /* arg idx = */ 1 - /* jump arg size = */ 2) as u16; + self.code[idx + 1..idx + 3].copy_from_slice(&offset.to_le_bytes()) + } + + pub fn read_u16(&self, idx: usize) -> u16 { + if idx + 2 > self.code.len() { + panic!("Tvix bug: invalid bytecode (expected u16 operand not found)") + } + + let byte_array: &[u8; 2] = &self.code[idx..idx + 2] + .try_into() + .expect("fixed-size slice can not fail to convert to array"); + + u16::from_le_bytes(*byte_array) } /// Get the first span of a chunk, no questions asked. @@ -70,23 +101,13 @@ impl Chunk { self.spans[0].span } - /// Return a reference to the last op in the chunk, if any - pub fn last_op(&self) -> Option<&OpCode> { - self.code.last() - } - - /// Pop the last operation from the chunk and clean up its tracked - /// span. Used when the compiler backtracks. - pub fn pop_op(&mut self) { - // Simply drop the last op. - self.code.pop(); - - if let Some(span) = self.spans.last() { - // If the last span started at this op, drop it. - if span.start == self.code.len() { - self.spans.pop(); - } + /// Return the last op in the chunk together with its index, if any. + pub fn last_op(&self) -> Option<(Op, usize)> { + if self.code.is_empty() { + return None; } + + Some((self.code[self.last_op].into(), self.last_op)) } pub fn push_constant(&mut self, data: Value) -> ConstantIdx { @@ -100,8 +121,6 @@ impl Chunk { self.constants.get(constant.0) } - // Span tracking implementation - fn push_span(&mut self, span: codemap::Span, start: usize) { match self.spans.last_mut() { // We do not need to insert the same span again, as this @@ -136,76 +155,88 @@ impl Chunk { } /// Write the disassembler representation of the operation at - /// `idx` to the specified writer. + /// `idx` to the specified writer, and return how many bytes in the code to + /// skip for the next op. pub fn disassemble_op( &self, writer: &mut W, source: &SourceCode, width: usize, idx: CodeIdx, - ) -> Result<(), std::io::Error> { + ) -> Result { write!(writer, "{:#width$x}\t ", idx.0, width = width)?; // Print continuation character if the previous operation was at // the same line, otherwise print the line. let line = source.get_line(self.get_span(idx)); - if idx.0 > 0 && source.get_line(self.get_span(CodeIdx(idx.0 - 1))) == line { + if idx.0 > 0 && source.get_line(self.get_span(idx - 1)) == line { write!(writer, " |\t")?; } else { write!(writer, "{:4}\t", line)?; } - let a = |idx| match &self[idx] { + let _fmt_constant = |idx: ConstantIdx| match &self.constants[idx.0] { Value::Thunk(t) => t.debug_repr(), Value::Closure(c) => format!("closure({:p})", c.lambda), Value::Blueprint(b) => format!("blueprint({:p})", b), val => format!("{}", val), }; - match self[idx] { - OpCode::OpConstant(idx) => { - writeln!(writer, "OpConstant({}@{})", a(idx), idx.0) - } - OpCode::OpClosure(idx) => { - writeln!(writer, "OpClosure({}@{})", a(idx), idx.0) - } - OpCode::OpThunkClosure(idx) => { - writeln!(writer, "OpThunkClosure({}@{})", a(idx), idx.0) - } - OpCode::OpThunkSuspended(idx) => { - writeln!(writer, "OpThunkSuspended({}@{})", a(idx), idx.0) - } - op => writeln!(writer, "{:?}", op), - }?; + let op: Op = self.code[idx.0].into(); - Ok(()) - } + match op.arg_type() { + OpArg::None => { + writeln!(writer, "Op{:?}", op)?; + Ok(1) + } - /// Extend this chunk with the content of another, moving out of the other - /// in the process. - /// - /// This is used by the compiler when it detects that it unnecessarily - /// thunked a nested expression. - pub fn extend(&mut self, other: Self) { - // Some operations need to be modified in certain ways before being - // valid as part of the new chunk. - let const_count = self.constants.len(); - for (idx, op) in other.code.iter().enumerate() { - let span = other.get_span(CodeIdx(idx)); - match op { - // As the constants shift, the index needs to be moved relatively. - OpCode::OpConstant(ConstantIdx(idx)) => { - self.push_op(OpCode::OpConstant(ConstantIdx(idx + const_count)), span) + OpArg::Fixed => { + let arg = self.read_u16(idx.0 + 1); + writeln!(writer, "Op{:?}({})", op, arg)?; + Ok(3) + } + + OpArg::Uvarint => { + let (arg, size) = self.read_uvarint(idx.0 + 1); + writeln!(writer, "Op{:?}({})", op, arg)?; + Ok(1 + size) + } + + _ => match op { + Op::CoerceToString => { + let kind: CoercionKind = self.code[idx.0 + 1].into(); + writeln!(writer, "Op{:?}({:?})", op, kind)?; + Ok(2) } - // Other operations either operate on relative offsets, or no - // offsets, and are safe to keep as-is. - _ => self.push_op(*op, span), - }; - } + Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => { + let mut cidx = idx.0 + 1; - self.constants.extend(other.constants); - self.spans.extend(other.spans); + let (bp_idx, size) = self.read_uvarint(cidx); + cidx += size; + + let (packed_count, size) = self.read_uvarint(cidx); + cidx += size; + + let captures_with = packed_count & 0b1 == 1; + let count = packed_count >> 1; + + write!(writer, "Op{:?}(BP @ {}, ", op, bp_idx)?; + if captures_with { + write!(writer, "captures with, ")?; + } + writeln!(writer, "{} upvalues)", count)?; + + for _ in 0..count { + let (_, size) = self.read_uvarint(cidx); + cidx += size; + } + + Ok(cidx - idx.0) + } + _ => panic!("Tvix bug: don't know how to format argument for Op{:?}", op), + }, + } } } @@ -221,79 +252,49 @@ mod tests { #[test] fn push_op() { let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, dummy_span()); - assert_eq!(chunk.code.last().unwrap(), &OpCode::OpAdd); + let idx = chunk.push_op(Op::Add, dummy_span()); + assert_eq!(*chunk.code.last().unwrap(), Op::Add as u8); + assert_eq!(chunk.code[idx], Op::Add as u8); } #[test] - fn extend_empty() { + fn push_op_with_arg() { let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, dummy_span()); + let mut idx = chunk.push_op(Op::Constant, dummy_span()); + chunk.push_uvarint(42); - let other = Chunk::default(); - chunk.extend(other); + assert_eq!(chunk.code[idx], Op::Constant as u8); - assert_eq!( - chunk.code, - vec![OpCode::OpAdd], - "code should not have changed" - ); + idx += 1; + let (arg, size) = chunk.read_uvarint(idx); + assert_eq!(idx + size, chunk.code.len()); + assert_eq!(arg, 42); } #[test] - fn extend_simple() { - let span = dummy_span(); + fn push_jump() { let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, span); - let mut other = Chunk::default(); - other.push_op(OpCode::OpSub, span); - other.push_op(OpCode::OpMul, span); + chunk.push_op(Op::Constant, dummy_span()); + chunk.push_uvarint(0); - let expected_code = vec![OpCode::OpAdd, OpCode::OpSub, OpCode::OpMul]; + let idx = chunk.push_op(Op::Jump, dummy_span()); + chunk.push_u16(0); - chunk.extend(other); + chunk.push_op(Op::Constant, dummy_span()); + chunk.push_uvarint(1); - assert_eq!(chunk.code, expected_code, "code should have been extended"); - } + chunk.patch_jump(idx); + chunk.push_op(Op::Return, dummy_span()); - #[test] - fn extend_with_constant() { - let span = dummy_span(); - let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, span); - let cidx = chunk.push_constant(Value::Integer(0)); - assert_eq!( - cidx.0, 0, - "first constant in main chunk should have index 0" - ); - chunk.push_op(OpCode::OpConstant(cidx), span); - - let mut other = Chunk::default(); - other.push_op(OpCode::OpSub, span); - let other_cidx = other.push_constant(Value::Integer(1)); - assert_eq!( - other_cidx.0, 0, - "first constant in other chunk should have index 0" - ); - other.push_op(OpCode::OpConstant(other_cidx), span); - - chunk.extend(other); - - let expected_code = vec![ - OpCode::OpAdd, - OpCode::OpConstant(ConstantIdx(0)), - OpCode::OpSub, - OpCode::OpConstant(ConstantIdx(1)), // <- note: this was rewritten + #[rustfmt::skip] + let expected: Vec = vec![ + Op::Constant as u8, 0, + Op::Jump as u8, 2, 0, + Op::Constant as u8, 1, + Op::Return as u8, ]; - assert_eq!( - chunk.code, expected_code, - "code should have been extended and rewritten" - ); - - assert_eq!(chunk.constants.len(), 2); - assert!(matches!(chunk.constants[0], Value::Integer(0))); - assert!(matches!(chunk.constants[1], Value::Integer(1))); + assert_eq!(chunk.code, expected); } } diff --git a/tvix/eval/src/compiler/bindings.rs b/tvix/eval/src/compiler/bindings.rs index f5c6376eb..6a3ae4859 100644 --- a/tvix/eval/src/compiler/bindings.rs +++ b/tvix/eval/src/compiler/bindings.rs @@ -605,7 +605,7 @@ impl Compiler<'_, '_> { c.emit_force(&namespace); c.emit_constant(name.as_str().into(), &span); - c.push_op(OpCode::OpAttrsSelect, &span); + c.push_op(Op::AttrsSelect, &span); }) } @@ -632,7 +632,8 @@ impl Compiler<'_, '_> { if self.scope()[idx].needs_finaliser { let stack_idx = self.scope().stack_index(idx); let span = self.scope()[idx].span; - self.push_op(OpCode::OpFinalise(stack_idx), &OrEntireFile(span)); + self.push_op(Op::Finalise, &OrEntireFile(span)); + self.push_uvarint(stack_idx.0 as u64) } } } @@ -667,11 +668,8 @@ impl Compiler<'_, '_> { self.bind_values(bindings); if kind.is_attrs() { - self.push_op(OpCode::OpAttrs(Count(count)), node); - } - - if count == 0 { - self.unthunk(); + self.push_op(Op::Attrs, node); + self.push_uvarint(count as u64); } } @@ -697,7 +695,7 @@ impl Compiler<'_, '_> { self.scope_mut().end_scope(); self.emit_constant("body".into(), node); - self.push_op(OpCode::OpAttrsSelect, node); + self.push_op(Op::AttrsSelect, node); } /// Is the given identifier defined *by the user* in any current scope? @@ -718,8 +716,9 @@ impl Compiler<'_, '_> { match self.scope_mut().resolve_local(ident) { LocalPosition::Unknown => { // Are we possibly dealing with an upvalue? - if let Some(idx) = self.resolve_upvalue(self.contexts.len() - 1, ident, node) { - self.push_op(OpCode::OpGetUpvalue(idx), node); + if let Some(idx) = self.resolve_upvalue(self.contexts.len() - 1, ident) { + self.push_op(Op::GetUpvalue, node); + self.push_uvarint(idx.0 as u64); return; } @@ -742,7 +741,7 @@ impl Compiler<'_, '_> { self.thunk(slot, node, |c, _| { c.context_mut().captures_with_stack = true; c.emit_constant(ident.into(), node); - c.push_op(OpCode::OpResolveWith, node); + c.push_op(Op::ResolveWith, node); }); return; } @@ -753,18 +752,17 @@ impl Compiler<'_, '_> { LocalPosition::Known(idx) => { let stack_idx = self.scope().stack_index(idx); - self.push_op(OpCode::OpGetLocal(stack_idx), node); + self.push_op(Op::GetLocal, node); + self.push_uvarint(stack_idx.0 as u64); } // This identifier is referring to a value from the same scope which // is not yet defined. This identifier access must be thunked. LocalPosition::Recursive(idx) => self.thunk(slot, node, move |compiler, _| { - let upvalue_idx = compiler.add_upvalue( - compiler.contexts.len() - 1, - node, - UpvalueKind::Local(idx), - ); - compiler.push_op(OpCode::OpGetUpvalue(upvalue_idx), node); + let upvalue_idx = + compiler.add_upvalue(compiler.contexts.len() - 1, UpvalueKind::Local(idx)); + compiler.push_op(Op::GetUpvalue, node); + compiler.push_uvarint(upvalue_idx.0 as u64); }), }; } @@ -777,12 +775,7 @@ impl Compiler<'_, '_> { /// Private compiler helpers related to bindings. impl Compiler<'_, '_> { - fn resolve_upvalue( - &mut self, - ctx_idx: usize, - name: &str, - node: &N, - ) -> Option { + fn resolve_upvalue(&mut self, ctx_idx: usize, name: &str) -> Option { if ctx_idx == 0 { // There can not be any upvalue at the outermost context. return None; @@ -795,7 +788,7 @@ impl Compiler<'_, '_> { // stack (i.e. in the right position) *during* their runtime // construction LocalPosition::Known(idx) | LocalPosition::Recursive(idx) => { - return Some(self.add_upvalue(ctx_idx, node, UpvalueKind::Local(idx))) + return Some(self.add_upvalue(ctx_idx, UpvalueKind::Local(idx))) } LocalPosition::Unknown => { /* continue below */ } @@ -803,19 +796,14 @@ impl Compiler<'_, '_> { // If the upvalue comes from even further up, we need to recurse to make // sure that the upvalues are created at each level. - if let Some(idx) = self.resolve_upvalue(ctx_idx - 1, name, node) { - return Some(self.add_upvalue(ctx_idx, node, UpvalueKind::Upvalue(idx))); + if let Some(idx) = self.resolve_upvalue(ctx_idx - 1, name) { + return Some(self.add_upvalue(ctx_idx, UpvalueKind::Upvalue(idx))); } None } - fn add_upvalue( - &mut self, - ctx_idx: usize, - node: &N, - kind: UpvalueKind, - ) -> UpvalueIdx { + fn add_upvalue(&mut self, ctx_idx: usize, kind: UpvalueKind) -> UpvalueIdx { // If there is already an upvalue closing over the specified index, // retrieve that instead. for (idx, existing) in self.contexts[ctx_idx].scope.upvalues.iter().enumerate() { @@ -824,11 +812,7 @@ impl Compiler<'_, '_> { } } - let span = self.span_for(node); - self.contexts[ctx_idx] - .scope - .upvalues - .push(Upvalue { kind, span }); + self.contexts[ctx_idx].scope.upvalues.push(Upvalue { kind }); let idx = UpvalueIdx(self.contexts[ctx_idx].lambda.upvalue_count); self.contexts[ctx_idx].lambda.upvalue_count += 1; diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 4878db1f1..33b70b87c 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -29,7 +29,7 @@ use std::rc::{Rc, Weak}; use crate::chunk::Chunk; use crate::errors::{CatchableErrorKind, Error, ErrorKind, EvalResult}; use crate::observer::CompilerObserver; -use crate::opcode::{CodeIdx, ConstantIdx, Count, JumpOffset, OpCode, UpvalueIdx}; +use crate::opcode::{CodeIdx, Op, Position, UpvalueIdx}; use crate::spans::ToSpan; use crate::value::{Closure, Formals, Lambda, NixAttrs, Thunk, Value}; use crate::warnings::{EvalWarning, WarningKind}; @@ -52,7 +52,6 @@ struct LambdaCtx { lambda: Lambda, scope: Scope, captures_with_stack: bool, - unthunk: bool, } impl LambdaCtx { @@ -61,7 +60,6 @@ impl LambdaCtx { lambda: Lambda::default(), scope: Default::default(), captures_with_stack: false, - unthunk: false, } } @@ -70,7 +68,6 @@ impl LambdaCtx { lambda: Lambda::default(), scope: self.scope.inherit(), captures_with_stack: false, - unthunk: false, } } } @@ -270,13 +267,37 @@ impl Compiler<'_, '_> { /// Push a single instruction to the current bytecode chunk and /// track the source span from which it was compiled. - fn push_op(&mut self, data: OpCode, node: &T) -> CodeIdx { + fn push_op(&mut self, data: Op, node: &T) -> CodeIdx { if self.dead_scope > 0 { return CodeIdx(0); } let span = self.span_for(node); - self.chunk().push_op(data, span) + CodeIdx(self.chunk().push_op(data, span)) + } + + fn push_u8(&mut self, data: u8) { + if self.dead_scope > 0 { + return; + } + + self.chunk().code.push(data); + } + + fn push_uvarint(&mut self, data: u64) { + if self.dead_scope > 0 { + return; + } + + self.chunk().push_uvarint(data); + } + + fn push_u16(&mut self, data: u16) { + if self.dead_scope > 0 { + return; + } + + self.chunk().push_u16(data); } /// Emit a single constant to the current bytecode chunk and track @@ -287,7 +308,8 @@ impl Compiler<'_, '_> { } let idx = self.chunk().push_constant(value); - self.push_op(OpCode::OpConstant(idx), node); + self.push_op(Op::Constant, node); + self.push_uvarint(idx.0 as u64); } } @@ -400,7 +422,7 @@ impl Compiler<'_, '_> { Value::UnresolvedPath(Box::new(home_relative_path.into())), node, ); - self.push_op(OpCode::OpResolveHomePath, node); + self.push_op(Op::ResolveHomePath, node); return; } else if raw_path.starts_with('<') { // TODO: decide what to do with findFile @@ -416,7 +438,7 @@ impl Compiler<'_, '_> { // Make a thunk to resolve the path (without using `findFile`, at least for now?) return self.thunk(slot, node, move |c, _| { c.emit_constant(Value::UnresolvedPath(Box::new(path.into())), node); - c.push_op(OpCode::OpFindFile, node); + c.push_op(Op::FindFile, node); }); } else { let mut buf = self.root_dir.clone(); @@ -452,13 +474,15 @@ impl Compiler<'_, '_> { ast::InterpolPart::Interpolation(ipol) => { self.compile(slot, ipol.expr().unwrap()); // implicitly forces as well - self.push_op( - OpCode::OpCoerceToString(CoercionKind { - strong: false, - import_paths: true, - }), - ipol, - ); + self.push_op(Op::CoerceToString, ipol); + + let encoded: u8 = CoercionKind { + strong: false, + import_paths: true, + } + .into(); + + self.push_u8(encoded); } ast::InterpolPart::Literal(lit) => { @@ -468,7 +492,8 @@ impl Compiler<'_, '_> { } if parts.len() != 1 { - self.push_op(OpCode::OpInterpolate(Count(parts.len())), parent_node); + self.push_op(Op::Interpolate, parent_node); + self.push_uvarint(parts.len() as u64); } } @@ -494,8 +519,8 @@ impl Compiler<'_, '_> { self.emit_force(op); let opcode = match op.operator().unwrap() { - ast::UnaryOpKind::Invert => OpCode::OpInvert, - ast::UnaryOpKind::Negate => OpCode::OpNegate, + ast::UnaryOpKind::Invert => Op::Invert, + ast::UnaryOpKind::Negate => Op::Negate, }; self.push_op(opcode, op); @@ -526,21 +551,21 @@ impl Compiler<'_, '_> { self.emit_force(&op.rhs().unwrap()); match op.operator().unwrap() { - BinOpKind::Add => self.push_op(OpCode::OpAdd, op), - BinOpKind::Sub => self.push_op(OpCode::OpSub, op), - BinOpKind::Mul => self.push_op(OpCode::OpMul, op), - BinOpKind::Div => self.push_op(OpCode::OpDiv, op), - BinOpKind::Update => self.push_op(OpCode::OpAttrsUpdate, op), - BinOpKind::Equal => self.push_op(OpCode::OpEqual, op), - BinOpKind::Less => self.push_op(OpCode::OpLess, op), - BinOpKind::LessOrEq => self.push_op(OpCode::OpLessOrEq, op), - BinOpKind::More => self.push_op(OpCode::OpMore, op), - BinOpKind::MoreOrEq => self.push_op(OpCode::OpMoreOrEq, op), - BinOpKind::Concat => self.push_op(OpCode::OpConcat, op), + BinOpKind::Add => self.push_op(Op::Add, op), + BinOpKind::Sub => self.push_op(Op::Sub, op), + BinOpKind::Mul => self.push_op(Op::Mul, op), + BinOpKind::Div => self.push_op(Op::Div, op), + BinOpKind::Update => self.push_op(Op::AttrsUpdate, op), + BinOpKind::Equal => self.push_op(Op::Equal, op), + BinOpKind::Less => self.push_op(Op::Less, op), + BinOpKind::LessOrEq => self.push_op(Op::LessOrEq, op), + BinOpKind::More => self.push_op(Op::More, op), + BinOpKind::MoreOrEq => self.push_op(Op::MoreOrEq, op), + BinOpKind::Concat => self.push_op(Op::Concat, op), BinOpKind::NotEqual => { - self.push_op(OpCode::OpEqual, op); - self.push_op(OpCode::OpInvert, op) + self.push_op(Op::Equal, op); + self.push_op(Op::Invert, op) } // Handled by separate branch above. @@ -561,20 +586,22 @@ impl Compiler<'_, '_> { self.compile(slot, node.lhs().unwrap()); self.emit_force(&node.lhs().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); // If this value is false, jump over the right-hand side - the // whole expression is false. - let end_idx = self.push_op(OpCode::OpJumpIfFalse(JumpOffset(0)), node); + let end_idx = self.push_op(Op::JumpIfFalse, node); + self.push_u16(0); // Otherwise, remove the previous value and leave the // right-hand side on the stack. Its result is now the value // of the whole expression. - self.push_op(OpCode::OpPop, node); + self.push_op(Op::Pop, node); self.compile(slot, node.rhs().unwrap()); self.emit_force(&node.rhs().unwrap()); self.patch_jump(end_idx); - self.push_op(OpCode::OpAssertBool, node); + self.push_op(Op::AssertBool, node); self.patch_jump(throw_idx); } @@ -589,16 +616,18 @@ impl Compiler<'_, '_> { self.compile(slot, node.lhs().unwrap()); self.emit_force(&node.lhs().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); // Opposite of above: If this value is **true**, we can // short-circuit the right-hand side. - let end_idx = self.push_op(OpCode::OpJumpIfTrue(JumpOffset(0)), node); - self.push_op(OpCode::OpPop, node); + let end_idx = self.push_op(Op::JumpIfTrue, node); + self.push_u16(0); + self.push_op(Op::Pop, node); self.compile(slot, node.rhs().unwrap()); self.emit_force(&node.rhs().unwrap()); self.patch_jump(end_idx); - self.push_op(OpCode::OpAssertBool, node); + self.push_op(Op::AssertBool, node); self.patch_jump(throw_idx); } @@ -612,17 +641,20 @@ impl Compiler<'_, '_> { // Leave left-hand side value on the stack and invert it. self.compile(slot, node.lhs().unwrap()); self.emit_force(&node.lhs().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); - self.push_op(OpCode::OpInvert, node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); + self.push_op(Op::Invert, node); // Exactly as `||` (because `a -> b` = `!a || b`). - let end_idx = self.push_op(OpCode::OpJumpIfTrue(JumpOffset(0)), node); - self.push_op(OpCode::OpPop, node); + let end_idx = self.push_op(Op::JumpIfTrue, node); + self.push_u16(0); + + self.push_op(Op::Pop, node); self.compile(slot, node.rhs().unwrap()); self.emit_force(&node.rhs().unwrap()); self.patch_jump(end_idx); - self.push_op(OpCode::OpAssertBool, node); + self.push_op(Op::AssertBool, node); self.patch_jump(throw_idx); } @@ -657,11 +689,8 @@ impl Compiler<'_, '_> { self.scope_mut().mark_initialised(item_slot); } - if count == 0 { - self.unthunk(); - } - - self.push_op(OpCode::OpList(Count(count)), node); + self.push_op(Op::List, node); + self.push_uvarint(count as u64); self.scope_mut().end_scope(); } @@ -690,7 +719,7 @@ impl Compiler<'_, '_> { // next nested element, for all fragments except the last one. for (count, fragment) in node.attrpath().unwrap().attrs().enumerate() { if count > 0 { - self.push_op(OpCode::OpAttrsTrySelect, &fragment); + self.push_op(Op::AttrsTrySelect, &fragment); self.emit_force(&fragment); } @@ -699,7 +728,7 @@ impl Compiler<'_, '_> { // After the last fragment, emit the actual instruction that // leaves a boolean on the stack. - self.push_op(OpCode::OpHasAttr, node); + self.push_op(Op::HasAttr, node); } /// When compiling select or select_or expressions, an optimisation is @@ -723,8 +752,9 @@ impl Compiler<'_, '_> { // set that is lacking a key, because that thunk is never // evaluated). If anything is missing, just move on. We may // want to emit warnings here in the future. - if let Some(OpCode::OpConstant(ConstantIdx(idx))) = self.chunk().code.last().cloned() { - let constant = &mut self.chunk().constants[idx]; + if let Some((Op::Constant, op_idx)) = self.chunk().last_op() { + let (idx, _) = self.chunk().read_uvarint(op_idx + 1); + let constant = &mut self.chunk().constants[idx as usize]; if let Value::Attrs(attrs) = constant { let mut path_iter = path.attrs(); @@ -736,10 +766,6 @@ impl Compiler<'_, '_> { if let Some(ident) = expr_static_attr_str(&attr) { if let Some(selected_value) = attrs.select(ident.as_bytes()) { *constant = selected_value.clone(); - - // If this worked, we can unthunk the current thunk. - self.unthunk(); - return true; } } @@ -773,7 +799,7 @@ impl Compiler<'_, '_> { self.emit_force(&set); self.compile_attr(slot, &fragment); - self.push_op(OpCode::OpAttrsSelect, &fragment); + self.push_op(Op::AttrsSelect, &fragment); } } @@ -823,11 +849,13 @@ impl Compiler<'_, '_> { for fragment in path.attrs() { self.emit_force(&fragment); self.compile_attr(slot, &fragment.clone()); - self.push_op(OpCode::OpAttrsTrySelect, &fragment); - jumps.push(self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), &fragment)); + self.push_op(Op::AttrsTrySelect, &fragment); + jumps.push(self.push_op(Op::JumpIfNotFound, &fragment)); + self.push_u16(0); } - let final_jump = self.push_op(OpCode::OpJump(JumpOffset(0)), &path); + let final_jump = self.push_op(Op::Jump, &path); + self.push_u16(0); for jump in jumps { self.patch_jump(jump); @@ -855,17 +883,22 @@ impl Compiler<'_, '_> { // Compile the assertion condition to leave its value on the stack. self.compile(slot, node.condition().unwrap()); self.emit_force(&node.condition().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); - let then_idx = self.push_op(OpCode::OpJumpIfFalse(JumpOffset(0)), node); - self.push_op(OpCode::OpPop, node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); + + let then_idx = self.push_op(Op::JumpIfFalse, node); + self.push_u16(0); + + self.push_op(Op::Pop, node); self.compile(slot, node.body().unwrap()); - let else_idx = self.push_op(OpCode::OpJump(JumpOffset(0)), node); + let else_idx = self.push_op(Op::Jump, node); + self.push_u16(0); self.patch_jump(then_idx); - self.push_op(OpCode::OpPop, node); - self.push_op(OpCode::OpAssertFail, &node.condition().unwrap()); + self.push_op(Op::Pop, node); + self.push_op(Op::AssertFail, &node.condition().unwrap()); self.patch_jump(else_idx); self.patch_jump(throw_idx); @@ -887,22 +920,20 @@ impl Compiler<'_, '_> { self.compile(slot, node.condition().unwrap()); self.emit_force(&node.condition().unwrap()); - let throw_idx = self.push_op( - OpCode::OpJumpIfCatchable(JumpOffset(0)), - &node.condition().unwrap(), - ); - let then_idx = self.push_op( - OpCode::OpJumpIfFalse(JumpOffset(0)), - &node.condition().unwrap(), - ); + let throw_idx = self.push_op(Op::JumpIfCatchable, &node.condition().unwrap()); + self.push_u16(0); - self.push_op(OpCode::OpPop, node); // discard condition value + let then_idx = self.push_op(Op::JumpIfFalse, &node.condition().unwrap()); + self.push_u16(0); + + self.push_op(Op::Pop, node); // discard condition value self.compile(slot, node.body().unwrap()); - let else_idx = self.push_op(OpCode::OpJump(JumpOffset(0)), node); + let else_idx = self.push_op(Op::Jump, node); + self.push_u16(0); self.patch_jump(then_idx); // patch jump *to* else_body - self.push_op(OpCode::OpPop, node); // discard condition value + self.push_op(Op::Pop, node); // discard condition value self.compile(slot, node.else_body().unwrap()); self.patch_jump(else_idx); // patch jump *over* else body @@ -931,11 +962,12 @@ impl Compiler<'_, '_> { self.scope_mut().push_with(); - self.push_op(OpCode::OpPushWith(with_idx), &node.namespace().unwrap()); + self.push_op(Op::PushWith, &node.namespace().unwrap()); + self.push_uvarint(with_idx.0 as u64); self.compile(slot, node.body().unwrap()); - self.push_op(OpCode::OpPopWith, node); + self.push_op(Op::PopWith, node); self.scope_mut().pop_with(); self.cleanup_scope(node); } @@ -995,13 +1027,15 @@ impl Compiler<'_, '_> { // At call time, the attribute set is already at the top of the stack. self.scope_mut().mark_initialised(set_idx); self.emit_force(pattern); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), pattern); + let throw_idx = self.push_op(Op::JumpIfCatchable, pattern); + self.push_u16(0); + // Evaluation fails on a type error, even if the argument(s) are unused. - self.push_op(OpCode::OpAssertAttrs, pattern); + self.push_op(Op::AssertAttrs, pattern); let ellipsis = pattern.ellipsis_token().is_some(); if !ellipsis { - self.push_op(OpCode::OpValidateClosedFormals, pattern); + self.push_op(Op::ValidateClosedFormals, pattern); } // Similar to `let ... in ...`, we now do multiple passes over @@ -1041,7 +1075,8 @@ impl Compiler<'_, '_> { // attempt to select from it. let stack_idx = self.scope().stack_index(set_idx); for tracked_formal in entries.iter() { - self.push_op(OpCode::OpGetLocal(stack_idx), pattern); + self.push_op(Op::GetLocal, pattern); + self.push_uvarint(stack_idx.0 as u64); self.emit_literal_ident(&tracked_formal.pattern_entry().ident().unwrap()); let idx = tracked_formal.local_idx(); @@ -1070,14 +1105,14 @@ impl Compiler<'_, '_> { // we only know better after compiling the default expression, so // avoiding unnecessary locals would mean we'd need to modify the chunk // after the fact. - self.push_op(OpCode::OpAttrsTrySelect, &pattern_entry.ident().unwrap()); - let jump_to_default = - self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), default_expr); + self.push_op(Op::AttrsTrySelect, &pattern_entry.ident().unwrap()); + let jump_to_default = self.push_op(Op::JumpIfNotFound, default_expr); + self.push_u16(0); self.emit_constant(Value::FinaliseRequest(false), default_expr); - let jump_over_default = - self.push_op(OpCode::OpJump(JumpOffset(0)), default_expr); + let jump_over_default = self.push_op(Op::Jump, default_expr); + self.push_u16(0); self.patch_jump(jump_to_default); @@ -1089,7 +1124,7 @@ impl Compiler<'_, '_> { self.patch_jump(jump_over_default); } TrackedFormal::NoDefault { pattern_entry, .. } => { - self.push_op(OpCode::OpAttrsSelect, &pattern_entry.ident().unwrap()); + self.push_op(Op::AttrsSelect, &pattern_entry.ident().unwrap()); } } @@ -1113,23 +1148,16 @@ impl Compiler<'_, '_> { let finalise_request_stack_idx = self.scope().stack_index(*finalise_request_idx); // TODO(sterni): better spans - self.push_op( - OpCode::OpGetLocal(finalise_request_stack_idx), - pattern - ); + self.push_op(Op::GetLocal, pattern); + self.push_uvarint(finalise_request_stack_idx.0 as u64); let jump_over_finalise = - self.push_op( - OpCode::OpJumpIfNoFinaliseRequest( - JumpOffset(0)), - pattern - ); - self.push_op( - OpCode::OpFinalise(stack_idx), - pattern, - ); + self.push_op(Op::JumpIfNoFinaliseRequest, pattern); + self.push_u16(0); + self.push_op(Op::Finalise, pattern); + self.push_uvarint(stack_idx.0 as u64); self.patch_jump(jump_over_finalise); // Get rid of finaliser request value on the stack - self.push_op(OpCode::OpPop, pattern); + self.push_op(Op::Pop, pattern); } } } @@ -1188,12 +1216,6 @@ impl Compiler<'_, '_> { }) } - /// Mark the current thunk as redundant, i.e. possible to merge directly - /// into its parent lambda context without affecting runtime behaviour. - fn unthunk(&mut self) { - self.context_mut().unthunk = true; - } - /// Compile an expression into a runtime closure or thunk fn compile_lambda_or_thunk( &mut self, @@ -1222,31 +1244,15 @@ impl Compiler<'_, '_> { self.patch_jump(throw_idx); } - // TODO: determine and insert enclosing name, if available. - // Pop the lambda context back off, and emit the finished // lambda as a constant. let mut compiled = self.contexts.pop().unwrap(); - // The compiler might have decided to unthunk, i.e. raise the compiled - // code to the parent context. In that case we do so and return right - // away. - if compiled.unthunk && is_suspended_thunk { - self.chunk().extend(compiled.lambda.chunk); - return; - } - // Emit an instruction to inform the VM that the chunk has ended. compiled .lambda .chunk - .push_op(OpCode::OpReturn, self.span_for(node)); - - // Capturing the with stack counts as an upvalue, as it is - // emitted as an upvalue data instruction. - if compiled.captures_with_stack { - compiled.lambda.upvalue_count += 1; - } + .push_op(Op::Return, self.span_for(node)); let lambda = Rc::new(compiled.lambda); if is_suspended_thunk { @@ -1256,7 +1262,7 @@ impl Compiler<'_, '_> { } // If no upvalues are captured, emit directly and move on. - if lambda.upvalue_count == 0 { + if lambda.upvalue_count == 0 && !compiled.captures_with_stack { self.emit_constant( if is_suspended_thunk { Value::Thunk(Thunk::new_suspended(lambda, span)) @@ -1276,12 +1282,13 @@ impl Compiler<'_, '_> { let code_idx = self.push_op( if is_suspended_thunk { - OpCode::OpThunkSuspended(blueprint_idx) + Op::ThunkSuspended } else { - OpCode::OpThunkClosure(blueprint_idx) + Op::ThunkClosure }, node, ); + self.push_uvarint(blueprint_idx.0 as u64); self.emit_upvalue_data( outer_slot, @@ -1292,18 +1299,21 @@ impl Compiler<'_, '_> { if !is_suspended_thunk && !self.scope()[outer_slot].needs_finaliser { if !self.scope()[outer_slot].must_thunk { - // The closure has upvalues, but is not recursive. Therefore no thunk is required, - // which saves us the overhead of Rc> - self.chunk()[code_idx] = OpCode::OpClosure(blueprint_idx); + // The closure has upvalues, but is not recursive. Therefore no + // thunk is required, which saves us the overhead of + // Rc> + self.chunk().code[code_idx.0] = Op::Closure as u8; } else { - // This case occurs when a closure has upvalue-references to itself but does not need a - // finaliser. Since no OpFinalise will be emitted later on we synthesize one here. - // It is needed here only to set [`Closure::is_finalised`] which is used for sanity checks. + // This case occurs when a closure has upvalue-references to + // itself but does not need a finaliser. Since no OpFinalise + // will be emitted later on we synthesize one here. It is needed + // here only to set [`Closure::is_finalised`] which is used for + // sanity checks. #[cfg(debug_assertions)] - self.push_op( - OpCode::OpFinalise(self.scope().stack_index(outer_slot)), - &self.span_for(node), - ); + { + self.push_op(Op::Finalise, &self.span_for(node)); + self.push_uvarint(self.scope().stack_index(outer_slot).0 as u64); + } } } } @@ -1316,7 +1326,7 @@ impl Compiler<'_, '_> { self.compile(slot, node.argument().unwrap()); self.compile(slot, node.lambda().unwrap()); self.emit_force(&node.lambda().unwrap()); - self.push_op(OpCode::OpCall, node); + self.push_op(Op::Call, node); } /// Emit the data instructions that the runtime needs to correctly @@ -1324,10 +1334,18 @@ impl Compiler<'_, '_> { fn emit_upvalue_data( &mut self, slot: LocalIdx, - node: &T, + _: &T, // TODO upvalues: Vec, capture_with: bool, ) { + // Push the count of arguments to be expected, with one bit set to + // indicate whether the with stack needs to be captured. + let mut count = (upvalues.len() as u64) << 1; + if capture_with { + count |= 1; + } + self.push_uvarint(count); + for upvalue in upvalues { match upvalue.kind { UpvalueKind::Local(idx) => { @@ -1337,27 +1355,22 @@ impl Compiler<'_, '_> { // If the target is not yet initialised, we need to defer // the local access if !target.initialised { - self.push_op(OpCode::DataDeferredLocal(stack_idx), &upvalue.span); + self.push_uvarint(Position::deferred_local(stack_idx).0); self.scope_mut().mark_needs_finaliser(slot); } else { // a self-reference if slot == idx { self.scope_mut().mark_must_thunk(slot); } - self.push_op(OpCode::DataStackIdx(stack_idx), &upvalue.span); + self.push_uvarint(Position::stack_index(stack_idx).0); } } UpvalueKind::Upvalue(idx) => { - self.push_op(OpCode::DataUpvalueIdx(idx), &upvalue.span); + self.push_uvarint(Position::upvalue_index(idx).0); } }; } - - if capture_with { - // TODO(tazjin): probably better to emit span for the ident that caused this - self.push_op(OpCode::DataCaptureWith, node); - } } /// Emit the literal string value of an identifier. Required for @@ -1374,20 +1387,7 @@ impl Compiler<'_, '_> { /// not known at the time when the jump operation itself is /// emitted. fn patch_jump(&mut self, idx: CodeIdx) { - let offset = JumpOffset(self.chunk().code.len() - 1 - idx.0); - - match &mut self.chunk().code[idx.0] { - OpCode::OpJump(n) - | OpCode::OpJumpIfFalse(n) - | OpCode::OpJumpIfTrue(n) - | OpCode::OpJumpIfCatchable(n) - | OpCode::OpJumpIfNotFound(n) - | OpCode::OpJumpIfNoFinaliseRequest(n) => { - *n = offset; - } - - op => panic!("attempted to patch unsupported op: {:?}", op), - } + self.chunk().patch_jump(idx.0); } /// Decrease scope depth of the current function and emit @@ -1403,7 +1403,8 @@ impl Compiler<'_, '_> { } if popcount > 0 { - self.push_op(OpCode::OpCloseScope(Count(popcount)), node); + self.push_op(Op::CloseScope, node); + self.push_uvarint(popcount as u64); } } @@ -1461,16 +1462,7 @@ impl Compiler<'_, '_> { } fn emit_force(&mut self, node: &N) { - if let Some(&OpCode::OpConstant(c)) = self.chunk().last_op() { - if !self.chunk().get_constant(c).unwrap().is_thunk() { - // Optimization: Don't emit a force op for non-thunk constants, since they don't - // need one! - // TODO: this is probably doable for more ops (?) - return; - } - } - - self.push_op(OpCode::OpForce, node); + self.push_op(Op::Force, node); } fn emit_warning(&mut self, node: &N, kind: WarningKind) { @@ -1673,10 +1665,11 @@ pub fn compile( c.emit_force(expr); if let Some(env) = env { if !env.is_empty() { - c.push_op(OpCode::OpCloseScope(Count(env.len())), &root_span); + c.push_op(Op::CloseScope, &root_span); + c.push_uvarint(env.len() as u64); } } - c.push_op(OpCode::OpReturn, &root_span); + c.push_op(Op::Return, &root_span); let lambda = Rc::new(c.contexts.pop().unwrap().lambda); c.observer.observe_compiled_toplevel(&lambda); diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs index 1087e0153..bb1784e67 100644 --- a/tvix/eval/src/compiler/scope.rs +++ b/tvix/eval/src/compiler/scope.rs @@ -105,7 +105,6 @@ pub enum UpvalueKind { #[derive(Clone, Debug)] pub struct Upvalue { pub kind: UpvalueKind, - pub span: codemap::Span, } /// The index of a local in the scope's local array at compile time. diff --git a/tvix/eval/src/observer.rs b/tvix/eval/src/observer.rs index f5de39931..5e6526418 100644 --- a/tvix/eval/src/observer.rs +++ b/tvix/eval/src/observer.rs @@ -13,7 +13,7 @@ use tabwriter::TabWriter; use crate::chunk::Chunk; use crate::generators::VMRequest; -use crate::opcode::{CodeIdx, OpCode}; +use crate::opcode::{CodeIdx, Op}; use crate::value::Lambda; use crate::SourceCode; use crate::Value; @@ -73,7 +73,7 @@ pub trait RuntimeObserver { /// Called when the runtime *begins* executing an instruction. The /// provided stack is the state at the beginning of the operation. - fn observe_execute_op(&mut self, _ip: CodeIdx, _: &OpCode, _: &[Value]) {} + fn observe_execute_op(&mut self, _ip: CodeIdx, _: &Op, _: &[Value]) {} } #[derive(Default)] @@ -112,8 +112,12 @@ impl DisassemblingObserver { // calculate width of the widest address in the chunk let width = format!("{:#x}", chunk.code.len() - 1).len(); - for (idx, _) in chunk.code.iter().enumerate() { - let _ = chunk.disassemble_op(&mut self.writer, &self.source, width, CodeIdx(idx)); + let mut idx = 0; + while idx < chunk.code.len() { + let size = chunk + .disassemble_op(&mut self.writer, &self.source, width, CodeIdx(idx)) + .expect("writing debug output should work"); + idx += size; } } } @@ -304,7 +308,7 @@ impl RuntimeObserver for TracingObserver { ); } - fn observe_execute_op(&mut self, ip: CodeIdx, op: &OpCode, stack: &[Value]) { + fn observe_execute_op(&mut self, ip: CodeIdx, op: &Op, stack: &[Value]) { self.maybe_write_time(); let _ = write!(&mut self.writer, "{:04} {:?}\t", ip.0, op); self.write_stack(stack); diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs index f89c1c12e..ddf1304b3 100644 --- a/tvix/eval/src/opcode.rs +++ b/tvix/eval/src/opcode.rs @@ -52,8 +52,7 @@ pub struct JumpOffset(pub usize); #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Count(pub usize); -/// All variants of this enum carry a bounded amount of data to -/// ensure that no heap allocations are needed for an Opcode. +/// Op represents all instructions in the Tvix abstract machine. /// /// In documentation comments, stack positions are referred to by /// indices written in `{}` as such, where required: @@ -70,187 +69,182 @@ pub struct Count(pub usize); /// /// Unless otherwise specified, operations leave their result at the /// top of the stack. -#[warn(variant_size_differences)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum OpCode { +#[repr(u8)] +#[derive(Debug, PartialEq, Eq)] +pub enum Op { /// Push a constant onto the stack. - OpConstant(ConstantIdx), + Constant, - // Unary operators - /// Discard a value from the stack. - OpPop, + /// Discard the value on top of the stack. + Pop, /// Invert the boolean at the top of the stack. - OpInvert, + Invert, - // Binary operators /// Invert the sign of the number at the top of the stack. - OpNegate, + Negate, /// Sum up the two numbers at the top of the stack. - OpAdd, + Add, /// Subtract the number at {1} from the number at {2}. - OpSub, + Sub, /// Multiply the two numbers at the top of the stack. - OpMul, + Mul, /// Divide the two numbers at the top of the stack. - OpDiv, + Div, - // Comparison operators /// Check the two values at the top of the stack for Nix-equality. - OpEqual, + Equal, /// Check whether the value at {2} is less than {1}. - OpLess, + Less, /// Check whether the value at {2} is less than or equal to {1}. - OpLessOrEq, + LessOrEq, /// Check whether the value at {2} is greater than {1}. - OpMore, + More, /// Check whether the value at {2} is greater than or equal to {1}. - OpMoreOrEq, + MoreOrEq, - // Logical operators & generic jumps /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand. - OpJump(JumpOffset), + Jump, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is `true`. - OpJumpIfTrue(JumpOffset), + JumpIfTrue, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is `false`. - OpJumpIfFalse(JumpOffset), + JumpIfFalse, /// Pop one stack item and jump forward in the bytecode /// specified by the number of instructions in its usize /// operand, *if* the value at the top of the stack is a /// Value::Catchable. - OpJumpIfCatchable(JumpOffset), + JumpIfCatchable, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is the internal value representing a missing /// attribute set key. - OpJumpIfNotFound(JumpOffset), + JumpIfNotFound, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is *not* the internal value requesting a /// stack value finalisation. - OpJumpIfNoFinaliseRequest(JumpOffset), + JumpIfNoFinaliseRequest, + + /// Construct an attribute set from the given number of key-value pairs on + /// the top of the stack. The operand gives the count of *pairs*, not the + /// number of *stack values* - the actual number of values popped off the + /// stack will be twice the argument to this op. + Attrs, - // Attribute sets - /// Construct an attribute set from the given number of key-value pairs on the top of the stack - /// - /// Note that this takes the count of *pairs*, not the number of *stack values* - the actual - /// number of values popped off the stack will be twice the argument to this op - OpAttrs(Count), /// Merge the attribute set at {2} into the attribute set at {1}, /// and leave the new set at the top of the stack. - OpAttrsUpdate, + AttrsUpdate, /// Select the attribute with the name at {1} from the set at {2}. - OpAttrsSelect, + AttrsSelect, /// Select the attribute with the name at {1} from the set at {2}, but leave /// a `Value::AttrNotFound` in the stack instead of failing if it is /// missing. - OpAttrsTrySelect, + AttrsTrySelect, /// Check for the presence of the attribute with the name at {1} in the set /// at {2}. - OpHasAttr, + HasAttr, /// Throw an error if the attribute set at the top of the stack has any attributes /// other than those listed in the formals of the current lambda /// /// Panics if the current frame is not a lambda with formals - OpValidateClosedFormals, + ValidateClosedFormals, - // `with`-handling /// Push a value onto the runtime `with`-stack to enable dynamic identifier /// resolution. The absolute stack index of the value is supplied as a usize /// operand. - OpPushWith(StackIdx), + PushWith, /// Pop the last runtime `with`-stack element. - OpPopWith, + PopWith, /// Dynamically resolve an identifier with the name at {1} from the runtime /// `with`-stack. - OpResolveWith, + ResolveWith, // Lists /// Construct a list from the given number of values at the top of the /// stack. - OpList(Count), + List, /// Concatenate the lists at {2} and {1}. - OpConcat, + Concat, // Strings /// Interpolate the given number of string fragments into a single string. - OpInterpolate(Count), + Interpolate, /// Force the Value on the stack and coerce it to a string - OpCoerceToString(crate::CoercionKind), + CoerceToString, // Paths /// Attempt to resolve the Value on the stack using the configured [`NixSearchPath`][] /// /// [`NixSearchPath`]: crate::nix_search_path::NixSearchPath - OpFindFile, + FindFile, /// Attempt to resolve a path literal relative to the home dir - OpResolveHomePath, + ResolveHomePath, // Type assertion operators /// Assert that the value at {1} is a boolean, and fail with a runtime error /// otherwise. - OpAssertBool, - OpAssertAttrs, + AssertBool, + AssertAttrs, /// Access local identifiers with statically known positions. - OpGetLocal(StackIdx), + GetLocal, /// Close scopes while leaving their expression value around. - OpCloseScope(Count), // number of locals to pop + CloseScope, /// Return an error indicating that an `assert` failed - OpAssertFail, + AssertFail, // Lambdas & closures /// Call the value at {1} in a new VM callframe - OpCall, + Call, /// Retrieve the upvalue at the given index from the closure or thunk /// currently under evaluation. - OpGetUpvalue(UpvalueIdx), + GetUpvalue, /// Construct a closure which has upvalues but no self-references - OpClosure(ConstantIdx), + Closure, /// Construct a closure which has self-references (direct or via upvalues) - OpThunkClosure(ConstantIdx), + ThunkClosure, /// Construct a suspended thunk, used to delay a computation for laziness. - OpThunkSuspended(ConstantIdx), + ThunkSuspended, /// Force the value at {1} until it is a `Thunk::Evaluated`. - OpForce, + Force, /// Finalise initialisation of the upvalues of the value in the given stack /// index (which must be a Value::Thunk) after the scope is fully bound. - OpFinalise(StackIdx), + Finalise, /// Final instruction emitted in a chunk. Does not have an /// inherent effect, but can simplify VM logic as a marker in some @@ -258,27 +252,140 @@ pub enum OpCode { /// /// Can be thought of as "returning" the value to the parent /// frame, hence the name. - OpReturn, + Return, - // [`OpClosure`], [`OpThunkSuspended`], and [`OpThunkClosure`] have a - // variable number of arguments to the instruction, which is - // represented here by making their data part of the opcodes. - // Each of these two opcodes has a `ConstantIdx`, which must - // reference a `Value::Blueprint(Lambda)`. The `upvalue_count` - // field in that `Lambda` indicates the number of arguments it - // takes, and the opcode must be followed by exactly this number - // of `Data*` opcodes. The VM skips over these by advancing the - // instruction pointer. - // - // It is illegal for a `Data*` opcode to appear anywhere else. - /// Populate a static upvalue by copying from the stack immediately. - DataStackIdx(StackIdx), - /// Populate a static upvalue of a thunk by copying it the stack, but do - /// when the thunk is finalised (by OpFinalise) rather than immediately. - DataDeferredLocal(StackIdx), - /// Populate a static upvalue by copying it from the upvalues of an - /// enclosing scope. - DataUpvalueIdx(UpvalueIdx), - /// Populate dynamic upvalues by saving a copy of the with-stack. - DataCaptureWith, + /// Sentinel value to signal invalid bytecode. This MUST always be the last + /// value in the enum. Do not move it! + Invalid, +} + +const _ASSERT_SMALL_OP: () = assert!(std::mem::size_of::() == 1); + +impl From for Op { + fn from(num: u8) -> Self { + if num >= Self::Invalid as u8 { + return Self::Invalid; + } + + // SAFETY: As long as `Invalid` remains the last variant of the enum, + // and as long as variant values are not specified manually, this + // conversion is safe. + unsafe { std::mem::transmute(num) } + } +} + +pub enum OpArg { + None, + Uvarint, + Fixed, + Custom, +} + +impl Op { + pub fn arg_type(&self) -> OpArg { + match self { + Op::Constant + | Op::Attrs + | Op::PushWith + | Op::List + | Op::Interpolate + | Op::GetLocal + | Op::CloseScope + | Op::GetUpvalue + | Op::Finalise => OpArg::Uvarint, + + Op::Jump + | Op::JumpIfTrue + | Op::JumpIfFalse + | Op::JumpIfCatchable + | Op::JumpIfNotFound + | Op::JumpIfNoFinaliseRequest => OpArg::Fixed, + + Op::CoerceToString | Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => { + OpArg::Custom + } + _ => OpArg::None, + } + } +} + +/// Position is used to represent where to capture an upvalue from. +#[derive(Clone, Copy)] +pub struct Position(pub u64); + +impl Position { + pub fn stack_index(idx: StackIdx) -> Self { + Position((idx.0 as u64) << 2) + } + + pub fn deferred_local(idx: StackIdx) -> Self { + Position(((idx.0 as u64) << 2) | 1) + } + + pub fn upvalue_index(idx: UpvalueIdx) -> Self { + Position(((idx.0 as u64) << 2) | 2) + } + + pub fn runtime_stack_index(&self) -> Option { + if (self.0 & 0b11) == 0 { + return Some(StackIdx((self.0 >> 2) as usize)); + } + + None + } + + pub fn runtime_deferred_local(&self) -> Option { + if (self.0 & 0b11) == 1 { + return Some(StackIdx((self.0 >> 2) as usize)); + } + + None + } + + pub fn runtime_upvalue_index(&self) -> Option { + if (self.0 & 0b11) == 2 { + return Some(UpvalueIdx((self.0 >> 2) as usize)); + } + + None + } +} + +#[cfg(test)] +mod position_tests { + use super::Position; // he-he + use super::{StackIdx, UpvalueIdx}; + + #[test] + fn test_stack_index_position() { + let idx = StackIdx(42); + let pos = Position::stack_index(idx); + let result = pos.runtime_stack_index(); + + assert_eq!(result, Some(idx)); + assert_eq!(pos.runtime_deferred_local(), None); + assert_eq!(pos.runtime_upvalue_index(), None); + } + + #[test] + fn test_deferred_local_position() { + let idx = StackIdx(42); + let pos = Position::deferred_local(idx); + let result = pos.runtime_deferred_local(); + + assert_eq!(result, Some(idx)); + assert_eq!(pos.runtime_stack_index(), None); + assert_eq!(pos.runtime_upvalue_index(), None); + } + + #[test] + fn test_upvalue_index_position() { + let idx = UpvalueIdx(42); + let pos = Position::upvalue_index(idx); + let result = pos.runtime_upvalue_index(); + + assert_eq!(result, Some(idx)); + assert_eq!(pos.runtime_stack_index(), None); + assert_eq!(pos.runtime_deferred_local(), None); + } } diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index 27c198ca9..6109a0da2 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -186,6 +186,21 @@ pub struct CoercionKind { pub import_paths: bool, } +impl From for u8 { + fn from(k: CoercionKind) -> u8 { + k.strong as u8 | (k.import_paths as u8) << 1 + } +} + +impl From for CoercionKind { + fn from(byte: u8) -> Self { + CoercionKind { + strong: byte & 0x01 != 0, + import_paths: byte & 0x02 != 0, + } + } +} + impl From for Value where T: Into, diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs index 2019e8ab3..4b915019d 100644 --- a/tvix/eval/src/value/thunk.rs +++ b/tvix/eval/src/value/thunk.rs @@ -27,7 +27,7 @@ use std::{ use crate::{ errors::ErrorKind, - opcode::OpCode, + opcode::Op, upvalues::Upvalues, value::Closure, vm::generators::{self, GenCo}, @@ -170,13 +170,15 @@ impl Thunk { // This is basically a recreation of compile_apply(): // We need to push the argument onto the stack and then the function. // The function (not the argument) needs to be forced before calling. - lambda.chunk.push_op(OpCode::OpConstant(arg_idx), span); - lambda.chunk().push_op(OpCode::OpConstant(f_idx), span); - lambda.chunk.push_op(OpCode::OpForce, span); - lambda.chunk.push_op(OpCode::OpCall, span); + lambda.chunk.push_op(Op::Constant, span); + lambda.chunk.push_uvarint(arg_idx.0 as u64); + lambda.chunk.push_op(Op::Constant, span); + lambda.chunk.push_uvarint(f_idx.0 as u64); + lambda.chunk.push_op(Op::Force, span); + lambda.chunk.push_op(Op::Call, span); // Inform the VM that the chunk has ended - lambda.chunk.push_op(OpCode::OpReturn, span); + lambda.chunk.push_op(Op::Return, span); Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { upvalues: Rc::new(Upvalues::with_capacity(0)), diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index a6d0941e8..7ac6d493f 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -28,7 +28,7 @@ use crate::{ lifted_pop, nix_search_path::NixSearchPath, observer::RuntimeObserver, - opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}, + opcode::{CodeIdx, Op, Position, UpvalueIdx}, upvalues::Upvalues, value::{ Builtin, BuiltinResult, Closure, CoercionKind, Lambda, NixAttrs, NixContext, NixList, @@ -146,10 +146,32 @@ impl CallFrame { /// Increment this frame's instruction pointer and return the operation that /// the pointer moved past. - fn inc_ip(&mut self) -> OpCode { - let op = self.chunk()[self.ip]; + fn inc_ip(&mut self) -> Op { + debug_assert!( + self.ip.0 < self.chunk().code.len(), + "out of bounds code at IP {} in {:p}", + self.ip.0, + self.lambda + ); + + let op = self.chunk().code[self.ip.0]; self.ip += 1; - op + op.into() + } + + /// Read a varint-encoded operand and return it. The frame pointer is + /// incremented internally. + fn read_uvarint(&mut self) -> u64 { + let (arg, size) = self.chunk().read_uvarint(self.ip.0); + self.ip += size; + arg + } + + /// Read a fixed-size u16 and increment the frame pointer. + fn read_u16(&mut self) -> u16 { + let arg = self.chunk().read_u16(self.ip.0); + self.ip += 2; + arg } /// Construct an error result from the given ErrorKind and the source span @@ -443,17 +465,25 @@ where self.observer.observe_execute_op(frame.ip, &op, &self.stack); match op { - OpCode::OpThunkSuspended(idx) | OpCode::OpThunkClosure(idx) => { - let blueprint = match &frame.chunk()[idx] { + Op::ThunkSuspended | Op::ThunkClosure => { + let idx = frame.read_uvarint() as usize; + + let blueprint = match &frame.chunk().constants[idx] { Value::Blueprint(lambda) => lambda.clone(), _ => panic!("compiler bug: non-blueprint in blueprint slot"), }; - let upvalue_count = blueprint.upvalue_count; - let thunk = if matches!(op, OpCode::OpThunkClosure(_)) { + let upvalue_count = frame.read_uvarint(); + + debug_assert!( + (upvalue_count >> 1) == blueprint.upvalue_count as u64, + "TODO: new upvalue count not correct", + ); + + let thunk = if op == Op::ThunkClosure { debug_assert!( - upvalue_count > 0, - "OpThunkClosure should not be called for plain lambdas" + (((upvalue_count >> 1) > 0) || (upvalue_count & 0b1 == 1)), + "OpThunkClosure should not be called for plain lambdas", ); Thunk::new_closure(blueprint) } else { @@ -470,7 +500,7 @@ where self.populate_upvalues(&mut frame, upvalue_count, upvalues)?; } - OpCode::OpForce => { + Op::Force => { if let Some(Value::Thunk(_)) = self.stack.last() { let thunk = match self.stack_pop() { Value::Thunk(t) => t, @@ -488,25 +518,35 @@ where } } - OpCode::OpGetUpvalue(upv_idx) => { - let value = frame.upvalue(upv_idx).clone(); + Op::GetUpvalue => { + let idx = UpvalueIdx(frame.read_uvarint() as usize); + let value = frame.upvalue(idx).clone(); self.stack.push(value); } // Discard the current frame. - OpCode::OpReturn => { + Op::Return => { // TODO(amjoseph): I think this should assert `==` rather // than `<=` but it fails with the stricter condition. debug_assert!(self.stack.len() - 1 <= frame.stack_offset); return Ok(true); } - OpCode::OpConstant(idx) => { - let c = frame.chunk()[idx].clone(); + Op::Constant => { + let idx = frame.read_uvarint() as usize; + + debug_assert!( + idx < frame.chunk().constants.len(), + "out of bounds constant at IP {} in {:p}", + frame.ip.0, + frame.lambda + ); + + let c = frame.chunk().constants[idx].clone(); self.stack.push(c); } - OpCode::OpCall => { + Op::Call => { let callable = self.stack_pop(); self.call_value(frame.current_span(), Some((span, frame)), callable)?; @@ -516,7 +556,8 @@ where // Remove the given number of elements from the stack, // but retain the top value. - OpCode::OpCloseScope(Count(count)) => { + Op::CloseScope => { + let count = frame.read_uvarint() as usize; // Immediately move the top value into the right // position. let target_idx = self.stack.len() - 1 - count; @@ -528,15 +569,22 @@ where } } - OpCode::OpClosure(idx) => { - let blueprint = match &frame.chunk()[idx] { + Op::Closure => { + let idx = frame.read_uvarint() as usize; + let blueprint = match &frame.chunk().constants[idx] { Value::Blueprint(lambda) => lambda.clone(), _ => panic!("compiler bug: non-blueprint in blueprint slot"), }; - let upvalue_count = blueprint.upvalue_count; + let upvalue_count = frame.read_uvarint(); + debug_assert!( - upvalue_count > 0, + (upvalue_count >> 1) == blueprint.upvalue_count as u64, + "TODO: new upvalue count not correct in closure", + ); + + debug_assert!( + ((upvalue_count >> 1) > 0 || (upvalue_count & 0b1 == 1)), "OpClosure should not be called for plain lambdas" ); @@ -549,7 +597,7 @@ where )))); } - OpCode::OpAttrsSelect => lifted_pop! { + Op::AttrsSelect => lifted_pop! { self(key, attrs) => { let key = key.to_str().with_span(&frame, self)?; let attrs = attrs.to_attrs().with_span(&frame, self)?; @@ -569,21 +617,24 @@ where } }, - OpCode::OpJumpIfFalse(JumpOffset(offset)) => { + Op::JumpIfFalse => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if !self.stack_peek(0).as_bool().with_span(&frame, self)? { frame.ip += offset; } } - OpCode::OpJumpIfCatchable(JumpOffset(offset)) => { + Op::JumpIfCatchable => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if self.stack_peek(0).is_catchable() { frame.ip += offset; } } - OpCode::OpJumpIfNoFinaliseRequest(JumpOffset(offset)) => { + Op::JumpIfNoFinaliseRequest => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); match self.stack_peek(0) { Value::FinaliseRequest(finalise) => { @@ -595,11 +646,11 @@ where } } - OpCode::OpPop => { + Op::Pop => { self.stack.pop(); } - OpCode::OpAttrsTrySelect => { + Op::AttrsTrySelect => { let key = self.stack_pop().to_str().with_span(&frame, self)?; let value = match self.stack_pop() { Value::Attrs(attrs) => match attrs.select(&key) { @@ -613,12 +664,14 @@ where self.stack.push(value); } - OpCode::OpGetLocal(StackIdx(local_idx)) => { + Op::GetLocal => { + let local_idx = frame.read_uvarint() as usize; let idx = frame.stack_offset + local_idx; self.stack.push(self.stack[idx].clone()); } - OpCode::OpJumpIfNotFound(JumpOffset(offset)) => { + Op::JumpIfNotFound => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if matches!(self.stack_peek(0), Value::AttrNotFound) { self.stack_pop(); @@ -626,12 +679,13 @@ where } } - OpCode::OpJump(JumpOffset(offset)) => { + Op::Jump => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); frame.ip += offset; } - OpCode::OpEqual => lifted_pop! { + Op::Equal => lifted_pop! { self(b, a) => { let gen_span = frame.current_span(); self.push_call_frame(span, frame); @@ -646,7 +700,7 @@ where // top is not of the expected type. This is necessary // to implement some specific behaviours of Nix // exactly. - OpCode::OpAssertBool => { + Op::AssertBool => { let val = self.stack_peek(0); // TODO(edef): propagate this into is_bool, since bottom values *are* values of any type if !val.is_catchable() && !val.is_bool() { @@ -660,7 +714,7 @@ where } } - OpCode::OpAssertAttrs => { + Op::AssertAttrs => { let val = self.stack_peek(0); // TODO(edef): propagate this into is_attrs, since bottom values *are* values of any type if !val.is_catchable() && !val.is_attrs() { @@ -674,9 +728,9 @@ where } } - OpCode::OpAttrs(Count(count)) => self.run_attrset(&frame, count)?, + Op::Attrs => self.run_attrset(frame.read_uvarint() as usize, &frame)?, - OpCode::OpAttrsUpdate => lifted_pop! { + Op::AttrsUpdate => lifted_pop! { self(rhs, lhs) => { let rhs = rhs.to_attrs().with_span(&frame, self)?; let lhs = lhs.to_attrs().with_span(&frame, self)?; @@ -684,28 +738,30 @@ where } }, - OpCode::OpInvert => lifted_pop! { + Op::Invert => lifted_pop! { self(v) => { let v = v.as_bool().with_span(&frame, self)?; self.stack.push(Value::Bool(!v)); } }, - OpCode::OpList(Count(count)) => { + Op::List => { + let count = frame.read_uvarint() as usize; let list = NixList::construct(count, self.stack.split_off(self.stack.len() - count)); self.stack.push(Value::List(list)); } - OpCode::OpJumpIfTrue(JumpOffset(offset)) => { + Op::JumpIfTrue => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if self.stack_peek(0).as_bool().with_span(&frame, self)? { frame.ip += offset; } } - OpCode::OpHasAttr => lifted_pop! { + Op::HasAttr => lifted_pop! { self(key, attrs) => { let key = key.to_str().with_span(&frame, self)?; let result = match attrs { @@ -720,7 +776,7 @@ where } }, - OpCode::OpConcat => lifted_pop! { + Op::Concat => lifted_pop! { self(rhs, lhs) => { let rhs = rhs.to_list().with_span(&frame, self)?.into_inner(); let lhs = lhs.to_list().with_span(&frame, self)?.into_inner(); @@ -728,7 +784,7 @@ where } }, - OpCode::OpResolveWith => { + Op::ResolveWith => { let ident = self.stack_pop().to_str().with_span(&frame, self)?; // Re-enqueue this frame. @@ -755,13 +811,19 @@ where return Ok(false); } - OpCode::OpFinalise(StackIdx(idx)) => match &self.stack[frame.stack_offset + idx] { - Value::Closure(_) => panic!("attempted to finalise a closure"), - Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), - _ => panic!("attempted to finalise a non-thunk"), - }, + Op::Finalise => { + let idx = frame.read_uvarint() as usize; + match &self.stack[frame.stack_offset + idx] { + Value::Closure(_) => panic!("attempted to finalise a closure"), + Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), + _ => panic!("attempted to finalise a non-thunk"), + } + } + + Op::CoerceToString => { + let kind: CoercionKind = frame.chunk().code[frame.ip.0].into(); + frame.ip.0 += 1; - OpCode::OpCoerceToString(kind) => { let value = self.stack_pop(); let gen_span = frame.current_span(); self.push_call_frame(span, frame); @@ -773,9 +835,9 @@ where return Ok(false); } - OpCode::OpInterpolate(Count(count)) => self.run_interpolate(&frame, count)?, + Op::Interpolate => self.run_interpolate(frame.read_uvarint(), &frame)?, - OpCode::OpValidateClosedFormals => { + Op::ValidateClosedFormals => { let formals = frame.lambda.formals.as_ref().expect( "OpValidateClosedFormals called within the frame of a lambda without formals", ); @@ -799,7 +861,7 @@ where } } - OpCode::OpAdd => lifted_pop! { + Op::Add => lifted_pop! { self(b, a) => { let gen_span = frame.current_span(); self.push_call_frame(span, frame); @@ -812,21 +874,21 @@ where } }, - OpCode::OpSub => lifted_pop! { + Op::Sub => lifted_pop! { self(b, a) => { let result = arithmetic_op!(&a, &b, -).with_span(&frame, self)?; self.stack.push(result); } }, - OpCode::OpMul => lifted_pop! { + Op::Mul => lifted_pop! { self(b, a) => { let result = arithmetic_op!(&a, &b, *).with_span(&frame, self)?; self.stack.push(result); } }, - OpCode::OpDiv => lifted_pop! { + Op::Div => lifted_pop! { self(b, a) => { match b { Value::Integer(0) => return frame.error(self, ErrorKind::DivisionByZero), @@ -841,7 +903,7 @@ where } }, - OpCode::OpNegate => match self.stack_pop() { + Op::Negate => match self.stack_pop() { Value::Integer(i) => self.stack.push(Value::Integer(-i)), Value::Float(f) => self.stack.push(Value::Float(-f)), Value::Catchable(cex) => self.stack.push(Value::Catchable(cex)), @@ -856,12 +918,12 @@ where } }, - OpCode::OpLess => cmp_op!(self, frame, span, <), - OpCode::OpLessOrEq => cmp_op!(self, frame, span, <=), - OpCode::OpMore => cmp_op!(self, frame, span, >), - OpCode::OpMoreOrEq => cmp_op!(self, frame, span, >=), + Op::Less => cmp_op!(self, frame, span, <), + Op::LessOrEq => cmp_op!(self, frame, span, <=), + Op::More => cmp_op!(self, frame, span, >), + Op::MoreOrEq => cmp_op!(self, frame, span, >=), - OpCode::OpFindFile => match self.stack_pop() { + Op::FindFile => match self.stack_pop() { Value::UnresolvedPath(path) => { let resolved = self .nix_search_path @@ -873,7 +935,7 @@ where _ => panic!("tvix compiler bug: OpFindFile called on non-UnresolvedPath"), }, - OpCode::OpResolveHomePath => match self.stack_pop() { + Op::ResolveHomePath => match self.stack_pop() { Value::UnresolvedPath(path) => { match dirs::home_dir() { None => { @@ -896,24 +958,23 @@ where } }, - OpCode::OpPushWith(StackIdx(idx)) => self.with_stack.push(frame.stack_offset + idx), + Op::PushWith => self + .with_stack + .push(frame.stack_offset + frame.read_uvarint() as usize), - OpCode::OpPopWith => { + Op::PopWith => { self.with_stack.pop(); } - OpCode::OpAssertFail => { + Op::AssertFail => { self.stack .push(Value::from(CatchableErrorKind::AssertionFailed)); } - // Data-carrying operands should never be executed, - // that is a critical error in the VM/compiler. - OpCode::DataStackIdx(_) - | OpCode::DataDeferredLocal(_) - | OpCode::DataUpvalueIdx(_) - | OpCode::DataCaptureWith => { - panic!("Tvix bug: attempted to execute data-carrying operand") + // Encountering an invalid opcode is a critical error in the + // VM/compiler. + Op::Invalid => { + panic!("Tvix bug: attempted to execute invalid opcode") } } } @@ -933,7 +994,7 @@ where &self.stack[self.stack.len() - 1 - offset] } - fn run_attrset(&mut self, frame: &CallFrame, count: usize) -> EvalResult<()> { + fn run_attrset(&mut self, count: usize, frame: &CallFrame) -> EvalResult<()> { let attrs = NixAttrs::construct(count, self.stack.split_off(self.stack.len() - count * 2)) .with_span(frame, self)? .map(Value::attrs) @@ -971,7 +1032,7 @@ where /// Interpolate string fragments by popping the specified number of /// fragments of the stack, evaluating them to strings, and pushing /// the concatenated result string back on the stack. - fn run_interpolate(&mut self, frame: &CallFrame, count: usize) -> EvalResult<()> { + fn run_interpolate(&mut self, count: u64, frame: &CallFrame) -> EvalResult<()> { let mut out = BString::default(); // Interpolation propagates the context and union them. let mut context: NixContext = NixContext::new(); @@ -1090,64 +1151,74 @@ where } /// Populate the upvalue fields of a thunk or closure under construction. + /// + /// See the closely tied function `emit_upvalue_data` in the compiler + /// implementation for details on the argument processing. fn populate_upvalues( &mut self, frame: &mut CallFrame, - count: usize, + count: u64, mut upvalues: impl DerefMut, ) -> EvalResult<()> { - for _ in 0..count { - match frame.inc_ip() { - OpCode::DataStackIdx(StackIdx(stack_idx)) => { - let idx = frame.stack_offset + stack_idx; + // Determine whether to capture the with stack, and then shift the + // actual count of upvalues back. + let capture_with = count & 0b1 == 1; + let count = count >> 1; + if capture_with { + // Start the captured with_stack off of the + // current call frame's captured with_stack, ... + let mut captured_with_stack = frame + .upvalues + .with_stack() + .cloned() + // ... or make an empty one if there isn't one already. + .unwrap_or_else(|| Vec::with_capacity(self.with_stack.len())); - let val = match self.stack.get(idx) { - Some(val) => val.clone(), - None => { - return frame.error( - self, - ErrorKind::TvixBug { - msg: "upvalue to be captured was missing on stack", - metadata: Some(Rc::new(json!({ - "ip": format!("{:#x}", frame.ip.0 - 1), - "stack_idx(relative)": stack_idx, - "stack_idx(absolute)": idx, - }))), - }, - ); - } - }; - - upvalues.deref_mut().push(val); - } - - OpCode::DataUpvalueIdx(upv_idx) => { - upvalues.deref_mut().push(frame.upvalue(upv_idx).clone()); - } - - OpCode::DataDeferredLocal(idx) => { - upvalues.deref_mut().push(Value::DeferredUpvalue(idx)); - } - - OpCode::DataCaptureWith => { - // Start the captured with_stack off of the - // current call frame's captured with_stack, ... - let mut captured_with_stack = frame - .upvalues - .with_stack() - .cloned() - // ... or make an empty one if there isn't one already. - .unwrap_or_else(|| Vec::with_capacity(self.with_stack.len())); - - for idx in &self.with_stack { - captured_with_stack.push(self.stack[*idx].clone()); - } - - upvalues.deref_mut().set_with_stack(captured_with_stack); - } - - _ => panic!("compiler error: missing closure operand"), + for idx in &self.with_stack { + captured_with_stack.push(self.stack[*idx].clone()); } + + upvalues.deref_mut().set_with_stack(captured_with_stack); + } + + for _ in 0..count { + let pos = Position(frame.read_uvarint()); + + if let Some(stack_idx) = pos.runtime_stack_index() { + let idx = frame.stack_offset + stack_idx.0; + + let val = match self.stack.get(idx) { + Some(val) => val.clone(), + None => { + return frame.error( + self, + ErrorKind::TvixBug { + msg: "upvalue to be captured was missing on stack", + metadata: Some(Rc::new(json!({ + "ip": format!("{:#x}", frame.ip.0 - 1), + "stack_idx(relative)": stack_idx.0, + "stack_idx(absolute)": idx, + }))), + }, + ); + } + }; + + upvalues.deref_mut().push(val); + continue; + } + + if let Some(idx) = pos.runtime_deferred_local() { + upvalues.deref_mut().push(Value::DeferredUpvalue(idx)); + continue; + } + + if let Some(idx) = pos.runtime_upvalue_index() { + upvalues.deref_mut().push(frame.upvalue(idx).clone()); + continue; + } + + panic!("Tvix bug: invalid capture position emitted") } Ok(()) diff --git a/web/tvixbolt/Cargo.lock b/web/tvixbolt/Cargo.lock index c12647b97..995ea3e54 100644 --- a/web/tvixbolt/Cargo.lock +++ b/web/tvixbolt/Cargo.lock @@ -1583,6 +1583,7 @@ dependencies = [ "tabwriter", "toml", "tvix-eval-builtin-macros", + "vu128", ] [[package]] @@ -1636,6 +1637,12 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vu128" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c51a178c8f3f425d86542b14f3dce9e16e86bb86328e2293745e6744ebd62e11" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/web/tvixbolt/Cargo.nix b/web/tvixbolt/Cargo.nix index 68c34901a..5a79c9759 100644 --- a/web/tvixbolt/Cargo.nix +++ b/web/tvixbolt/Cargo.nix @@ -4711,6 +4711,10 @@ rec { packageId = "tvix-eval-builtin-macros"; rename = "builtin-macros"; } + { + name = "vu128"; + packageId = "vu128"; + } ]; devDependencies = [ { @@ -4851,6 +4855,17 @@ rec { "Sergio Benitez " ]; + }; + "vu128" = rec { + crateName = "vu128"; + version = "1.0.0"; + edition = "2018"; + sha256 = "049fsvml8rsyfj9j53ijhsxqcvp1x7fg651baj35shiziy61f6n5"; + libPath = "vu128/vu128.rs"; + authors = [ + "John Millikin " + ]; + }; "wasi" = rec { crateName = "wasi"; @@ -5973,52 +5988,41 @@ rec { testPostRun ]); in - pkgs.runCommand "run-tests-${testCrate.name}" - { - inherit testCrateFlags; - buildInputs = testInputs; - } '' - set -e + pkgs.stdenvNoCC.mkDerivation { + name = "run-tests-${testCrate.name}"; - export RUST_BACKTRACE=1 + inherit (crate) src; - # recreate a file hierarchy as when running tests with cargo + inherit testCrateFlags; - # the source for test data - # It's necessary to locate the source in $NIX_BUILD_TOP/source/ - # instead of $NIX_BUILD_TOP/ - # because we compiled those test binaries in the former and not the latter. - # So all paths will expect source tree to be there and not in the build top directly. - # For example: $NIX_BUILD_TOP := /build in general, if you ask yourself. - # NOTE: There could be edge cases if `crate.sourceRoot` does exist but - # it's very hard to reason about them. - # Open a bug if you run into this! - mkdir -p source/ - cd source/ + buildInputs = testInputs; - ${pkgs.buildPackages.xorg.lndir}/bin/lndir ${crate.src} + buildPhase = '' + set -e + export RUST_BACKTRACE=1 - # build outputs - testRoot=target/debug - mkdir -p $testRoot + # build outputs + testRoot=target/debug + mkdir -p $testRoot - # executables of the crate - # we copy to prevent std::env::current_exe() to resolve to a store location - for i in ${crate}/bin/*; do - cp "$i" "$testRoot" - done - chmod +w -R . + # executables of the crate + # we copy to prevent std::env::current_exe() to resolve to a store location + for i in ${crate}/bin/*; do + cp "$i" "$testRoot" + done + chmod +w -R . - # test harness executables are suffixed with a hash, like cargo does - # this allows to prevent name collision with the main - # executables of the crate - hash=$(basename $out) - for file in ${drv}/tests/*; do - f=$testRoot/$(basename $file)-$hash - cp $file $f - ${testCommand} - done - ''; + # test harness executables are suffixed with a hash, like cargo does + # this allows to prevent name collision with the main + # executables of the crate + hash=$(basename $out) + for file in ${drv}/tests/*; do + f=$testRoot/$(basename $file)-$hash + cp $file $f + ${testCommand} + done + ''; + }; in pkgs.runCommand "${crate.name}-linked" {