2022-08-07 23:40:29 +03:00
|
|
|
//! This module implements the instruction set running on the abstract
|
|
|
|
//! machine implemented by tvix.
|
|
|
|
|
2022-09-13 15:58:55 +03:00
|
|
|
use std::ops::{AddAssign, Sub};
|
|
|
|
|
2022-08-26 20:48:39 +03:00
|
|
|
/// Index of a constant in the current code chunk.
|
2022-08-26 20:46:43 +03:00
|
|
|
#[repr(transparent)]
|
2022-09-18 12:38:53 -04:00
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
2022-08-07 23:40:29 +03:00
|
|
|
pub struct ConstantIdx(pub usize);
|
|
|
|
|
2022-08-26 20:48:39 +03:00
|
|
|
/// Index of an instruction in the current code chunk.
|
2022-08-26 20:46:43 +03:00
|
|
|
#[repr(transparent)]
|
2022-08-07 23:40:29 +03:00
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
|
|
pub struct CodeIdx(pub usize);
|
|
|
|
|
2022-09-13 15:58:55 +03:00
|
|
|
impl AddAssign<usize> for CodeIdx {
|
|
|
|
fn add_assign(&mut self, rhs: usize) {
|
|
|
|
*self = CodeIdx(self.0 + rhs)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Sub<usize> for CodeIdx {
|
|
|
|
type Output = Self;
|
|
|
|
|
|
|
|
fn sub(self, rhs: usize) -> Self::Output {
|
|
|
|
CodeIdx(self.0 - rhs)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-13 21:07:40 -07:00
|
|
|
/// Index of a value in the runtime stack. This is an offset
|
|
|
|
/// *relative to* the VM value stack_base of the CallFrame
|
|
|
|
/// containing the opcode which contains this StackIdx.
|
2022-08-26 20:54:39 +03:00
|
|
|
#[repr(transparent)]
|
2022-09-03 03:06:20 +03:00
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
|
2022-08-26 20:54:39 +03:00
|
|
|
pub struct StackIdx(pub usize);
|
|
|
|
|
2022-10-13 21:07:40 -07:00
|
|
|
/// Index of an upvalue within a closure's bound-variable upvalue
|
|
|
|
/// list. This is an absolute index into the Upvalues of the
|
|
|
|
/// CallFrame containing the opcode which contains this UpvalueIdx.
|
2022-08-26 21:48:51 +03:00
|
|
|
#[repr(transparent)]
|
2022-08-31 04:56:02 +03:00
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
2022-08-26 21:48:51 +03:00
|
|
|
pub struct UpvalueIdx(pub usize);
|
|
|
|
|
2022-08-26 20:48:39 +03:00
|
|
|
/// Offset by which an instruction pointer should change in a jump.
|
2022-08-26 20:46:43 +03:00
|
|
|
#[repr(transparent)]
|
2022-09-18 12:38:53 -04:00
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
2022-08-26 20:46:43 +03:00
|
|
|
pub struct JumpOffset(pub usize);
|
|
|
|
|
2022-08-26 20:58:18 +03:00
|
|
|
/// Provided count for an instruction (could represent e.g. a number
|
|
|
|
/// of elements).
|
|
|
|
#[repr(transparent)]
|
2022-09-18 12:38:53 -04:00
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
2022-08-26 20:58:18 +03:00
|
|
|
pub struct Count(pub usize);
|
|
|
|
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
/// Op represents all instructions in the Tvix abstract machine.
|
2023-01-31 22:30:14 +03:00
|
|
|
///
|
|
|
|
/// In documentation comments, stack positions are referred to by
|
|
|
|
/// indices written in `{}` as such, where required:
|
|
|
|
///
|
|
|
|
/// ```notrust
|
|
|
|
/// --- top of the stack
|
|
|
|
/// /
|
|
|
|
/// v
|
|
|
|
/// [ ... | 3 | 2 | 1 | 0 ]
|
|
|
|
/// ^
|
|
|
|
/// /
|
|
|
|
/// 2 values deep ---
|
|
|
|
/// ```
|
|
|
|
///
|
|
|
|
/// Unless otherwise specified, operations leave their result at the
|
|
|
|
/// top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
#[repr(u8)]
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub enum Op {
|
2022-09-05 01:30:58 +03:00
|
|
|
/// Push a constant onto the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Constant,
|
2022-08-07 23:40:29 +03:00
|
|
|
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
/// Discard the value on top of the stack.
|
|
|
|
Pop,
|
2022-08-11 13:12:07 +03:00
|
|
|
|
2023-01-31 22:30:14 +03:00
|
|
|
/// Invert the boolean at the top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Invert,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Invert the sign of the number at the top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Negate,
|
2022-08-08 02:51:28 +03:00
|
|
|
|
2023-01-31 22:30:14 +03:00
|
|
|
/// Sum up the two numbers at the top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Add,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Subtract the number at {1} from the number at {2}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Sub,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Multiply the two numbers at the top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Mul,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Divide the two numbers at the top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Div,
|
2022-08-08 02:32:07 +03:00
|
|
|
|
2023-01-31 22:30:14 +03:00
|
|
|
/// Check the two values at the top of the stack for Nix-equality.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Equal,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Check whether the value at {2} is less than {1}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Less,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Check whether the value at {2} is less than or equal to {1}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
LessOrEq,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Check whether the value at {2} is greater than {1}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
More,
|
2023-01-31 22:30:14 +03:00
|
|
|
|
|
|
|
/// Check whether the value at {2} is greater than or equal to {1}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
MoreOrEq,
|
2022-08-09 16:53:09 +03:00
|
|
|
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Jump forward in the bytecode specified by the number of
|
|
|
|
/// instructions in its usize operand.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Jump,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Jump forward in the bytecode specified by the number of
|
|
|
|
/// instructions in its usize operand, *if* the value at the top
|
|
|
|
/// of the stack is `true`.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
JumpIfTrue,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Jump forward in the bytecode specified by the number of
|
|
|
|
/// instructions in its usize operand, *if* the value at the top
|
|
|
|
/// of the stack is `false`.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
JumpIfFalse,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
2023-12-11 21:17:22 -08:00
|
|
|
/// Pop one stack item and jump forward in the bytecode
|
|
|
|
/// specified by the number of instructions in its usize
|
|
|
|
/// operand, *if* the value at the top of the stack is a
|
|
|
|
/// Value::Catchable.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
JumpIfCatchable,
|
2023-12-11 21:17:22 -08:00
|
|
|
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Jump forward in the bytecode specified by the number of
|
|
|
|
/// instructions in its usize operand, *if* the value at the top
|
|
|
|
/// of the stack is the internal value representing a missing
|
|
|
|
/// attribute set key.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
JumpIfNotFound,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Jump forward in the bytecode specified by the number of
|
|
|
|
/// instructions in its usize operand, *if* the value at the top
|
|
|
|
/// of the stack is *not* the internal value requesting a
|
|
|
|
/// stack value finalisation.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
JumpIfNoFinaliseRequest,
|
|
|
|
|
|
|
|
/// Construct an attribute set from the given number of key-value pairs on
|
|
|
|
/// the top of the stack. The operand gives the count of *pairs*, not the
|
|
|
|
/// number of *stack values* - the actual number of values popped off the
|
|
|
|
/// stack will be twice the argument to this op.
|
|
|
|
Attrs,
|
2022-08-11 13:12:07 +03:00
|
|
|
|
2023-02-01 00:24:34 +03:00
|
|
|
/// Merge the attribute set at {2} into the attribute set at {1},
|
|
|
|
/// and leave the new set at the top of the stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
AttrsUpdate,
|
2023-02-01 00:24:34 +03:00
|
|
|
|
|
|
|
/// Select the attribute with the name at {1} from the set at {2}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
AttrsSelect,
|
2023-02-01 00:24:34 +03:00
|
|
|
|
|
|
|
/// Select the attribute with the name at {1} from the set at {2}, but leave
|
|
|
|
/// a `Value::AttrNotFound` in the stack instead of failing if it is
|
|
|
|
/// missing.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
AttrsTrySelect,
|
2023-02-01 00:24:34 +03:00
|
|
|
|
|
|
|
/// Check for the presence of the attribute with the name at {1} in the set
|
|
|
|
/// at {2}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
HasAttr,
|
2022-08-09 17:11:02 +03:00
|
|
|
|
2022-10-12 23:53:03 -04:00
|
|
|
/// Throw an error if the attribute set at the top of the stack has any attributes
|
|
|
|
/// other than those listed in the formals of the current lambda
|
|
|
|
///
|
|
|
|
/// Panics if the current frame is not a lambda with formals
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
ValidateClosedFormals,
|
2022-10-12 23:53:03 -04:00
|
|
|
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Push a value onto the runtime `with`-stack to enable dynamic identifier
|
|
|
|
/// resolution. The absolute stack index of the value is supplied as a usize
|
|
|
|
/// operand.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
PushWith,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Pop the last runtime `with`-stack element.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
PopWith,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Dynamically resolve an identifier with the name at {1} from the runtime
|
|
|
|
/// `with`-stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
ResolveWith,
|
2022-08-15 00:13:57 +03:00
|
|
|
|
2022-08-09 17:11:02 +03:00
|
|
|
// Lists
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Construct a list from the given number of values at the top of the
|
|
|
|
/// stack.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
List,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Concatenate the lists at {2} and {1}.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Concat,
|
2022-08-09 17:44:34 +03:00
|
|
|
|
|
|
|
// Strings
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Interpolate the given number of string fragments into a single string.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Interpolate,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
2023-12-12 20:47:31 -08:00
|
|
|
/// Force the Value on the stack and coerce it to a string
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
CoerceToString,
|
2022-08-11 14:56:27 +03:00
|
|
|
|
2022-10-09 23:46:51 -04:00
|
|
|
// Paths
|
2022-10-10 11:43:51 -07:00
|
|
|
/// Attempt to resolve the Value on the stack using the configured [`NixSearchPath`][]
|
2022-10-09 23:46:51 -04:00
|
|
|
///
|
2022-10-10 11:43:51 -07:00
|
|
|
/// [`NixSearchPath`]: crate::nix_search_path::NixSearchPath
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
FindFile,
|
2022-10-09 23:46:51 -04:00
|
|
|
|
2022-10-15 16:42:27 +02:00
|
|
|
/// Attempt to resolve a path literal relative to the home dir
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
ResolveHomePath,
|
2022-10-15 16:42:27 +02:00
|
|
|
|
2022-08-11 14:56:27 +03:00
|
|
|
// Type assertion operators
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Assert that the value at {1} is a boolean, and fail with a runtime error
|
|
|
|
/// otherwise.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
AssertBool,
|
|
|
|
AssertAttrs,
|
2022-08-13 17:34:20 +03:00
|
|
|
|
2022-09-05 01:30:58 +03:00
|
|
|
/// Access local identifiers with statically known positions.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
GetLocal,
|
2022-08-13 20:17:25 +03:00
|
|
|
|
2022-09-05 01:30:58 +03:00
|
|
|
/// Close scopes while leaving their expression value around.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
CloseScope,
|
2022-08-16 15:53:35 +03:00
|
|
|
|
2022-10-10 12:56:11 -04:00
|
|
|
/// Return an error indicating that an `assert` failed
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
AssertFail,
|
2022-08-24 02:26:58 +03:00
|
|
|
|
2022-08-27 00:21:08 +03:00
|
|
|
// Lambdas & closures
|
2023-02-01 00:30:10 +03:00
|
|
|
/// Call the value at {1} in a new VM callframe
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Call,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Retrieve the upvalue at the given index from the closure or thunk
|
|
|
|
/// currently under evaluation.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
GetUpvalue,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Construct a closure which has upvalues but no self-references
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Closure,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Construct a closure which has self-references (direct or via upvalues)
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
ThunkClosure,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Construct a suspended thunk, used to delay a computation for laziness.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
ThunkSuspended,
|
2023-02-01 00:30:10 +03:00
|
|
|
|
|
|
|
/// Force the value at {1} until it is a `Thunk::Evaluated`.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Force,
|
2022-08-28 23:53:20 +03:00
|
|
|
|
2022-10-25 02:14:39 -07:00
|
|
|
/// Finalise initialisation of the upvalues of the value in the given stack
|
|
|
|
/// index (which must be a Value::Thunk) after the scope is fully bound.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Finalise,
|
2022-08-28 17:28:20 +03:00
|
|
|
|
refactor(tvix/eval): flatten call stack of VM using generators
Warning: This is probably the biggest refactor in tvix-eval history,
so far.
This replaces all instances of trampolines and recursion during
evaluation of the VM loop with generators. A generator is an
asynchronous function that can be suspended to yield a message (in our
case, vm::generators::GeneratorRequest) and receive a
response (vm::generators::GeneratorResponsee).
The `genawaiter` crate provides an interpreter for generators that can
drive their execution and lets us move control flow between the VM and
suspended generators.
To do this, massive changes have occured basically everywhere in the
code. On a high-level:
1. The VM is now organised around a frame stack. A frame is either a
call frame (execution of Tvix bytecode) or a generator frame (a
running or suspended generator).
The VM has an outer loop that pops a frame off the frame stack, and
then enters an inner loop either driving the execution of the
bytecode or the execution of a generator.
Both types of frames have several branches that can result in the
frame re-enqueuing itself, and enqueuing some other work (in the
form of a different frame) on top of itself. The VM will eventually
resume the frame when everything "above" it has been suspended.
In this way, the VM's new frame stack takes over much of the work
that was previously achieved by recursion.
2. All methods previously taking a VM have been refactored into async
functions that instead emit/receive generator messages for
communication with the VM.
Notably, this includes *all* builtins.
This has had some other effects:
- Some test have been removed or commented out, either because they
tested code that was mostly already dead (nix_eq) or because they
now require generator scaffolding which we do not have in place for
tests (yet).
- Because generator functions are technically async (though no async
IO is involved), we lose the ability to use much of the Rust
standard library e.g. in builtins. This has led to many algorithms
being unrolled into iterative versions instead of iterator
combinations, and things like sorting had to be implemented from scratch.
- Many call sites that previously saw a `Result<..., ErrorKind>`
bubble up now only see the result value, as the error handling is
encapsulated within the generator loop.
This reduces number of places inside of builtin implementations
where error context can be attached to calls that can fail.
Currently what we gain in this tradeoff is significantly more
detailed span information (which we still need to bubble up, this
commit does not change the error display).
We'll need to do some analysis later of how useful the errors turn
out to be and potentially introduce some methods for attaching
context to a generator frame again.
This change is very difficult to do in stages, as it is very much an
"all or nothing" change that affects huge parts of the codebase. I've
tried to isolate changes that can be isolated into the parent CLs of
this one, but this change is still quite difficult to wrap one's mind
and I'm available to discuss it and explain things to any reviewer.
Fixes: b/238, b/237, b/251 and potentially others.
Change-Id: I39244163ff5bbecd169fe7b274df19262b515699
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8104
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Reviewed-by: Adam Joseph <adam@westernsemico.com>
Tested-by: BuildkiteCI
2023-02-14 15:02:39 +03:00
|
|
|
/// Final instruction emitted in a chunk. Does not have an
|
|
|
|
/// inherent effect, but can simplify VM logic as a marker in some
|
|
|
|
/// cases.
|
|
|
|
///
|
|
|
|
/// Can be thought of as "returning" the value to the parent
|
|
|
|
/// frame, hence the name.
|
refactor(tvix/eval): ensure VM operations fit in a single byte
This replaces the OpCode enum with a new Op enum which is guaranteed to fit in a
single byte. Instead of carrying enum variants with data, every variant that has
runtime data encodes it into the `Vec<u8>` that a `Chunk` now carries.
This has several advantages:
* Less stack space is required at runtime, and fewer allocations are required
while compiling.
* The OpCode doesn't need to carry "weird" special-cased data variants anymore.
* It is faster (albeit, not by much). On my laptop, results consistently look
approximately like this:
Benchmark 1: ./before -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.224 s ± 0.272 s [User: 7.149 s, System: 0.688 s]
Range (min … max): 7.759 s … 8.583 s 10 runs
Benchmark 2: ./after -E '(import <nixpkgs> {}).firefox.outPath' --log-level ERROR --no-warnings
Time (mean ± σ): 8.000 s ± 0.198 s [User: 7.036 s, System: 0.633 s]
Range (min … max): 7.718 s … 8.334 s 10 runs
See notes below for why the performance impact might be less than expected.
* It is faster while at the same time dropping some optimisations we previously
performed.
This has several disadvantages:
* The code is closer to how one would write it in C or Go.
* Bit shifting!
* There is (for now) slightly more code than before.
On performance I have the following thoughts at the moment:
In order to prepare for adding GC, there's a couple of places in Tvix where I'd
like to fence off certain kinds of complexity (such as mutating bytecode, which,
for various reaons, also has to be part of data that is subject to GC). With
this change, we can drop optimisations like retroactively modifying existing
bytecode and *still* achieve better performance than before.
I believe that this is currently worth it to pave the way for changes that are
more significant for performance.
In general this also opens other avenues of optimisation: For example, we can
profile which argument sizes actually exist and remove the copy overhead of
varint decoding (which does show up in profiles) by using more adequately sized
types for, e.g., constant indices.
Known regressions:
* Op::Constant is no longer printing its values in disassembly (this can be
fixed, I just didn't get around to it, will do separately).
Change-Id: Id9b3a4254623a45de03069dbdb70b8349e976743
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12191
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
2024-08-10 23:59:38 +03:00
|
|
|
Return,
|
|
|
|
|
|
|
|
/// Sentinel value to signal invalid bytecode. This MUST always be the last
|
|
|
|
/// value in the enum. Do not move it!
|
|
|
|
Invalid,
|
|
|
|
}
|
|
|
|
|
|
|
|
const _ASSERT_SMALL_OP: () = assert!(std::mem::size_of::<Op>() == 1);
|
|
|
|
|
|
|
|
impl From<u8> for Op {
|
|
|
|
fn from(num: u8) -> Self {
|
|
|
|
if num >= Self::Invalid as u8 {
|
|
|
|
return Self::Invalid;
|
|
|
|
}
|
|
|
|
|
|
|
|
// SAFETY: As long as `Invalid` remains the last variant of the enum,
|
|
|
|
// and as long as variant values are not specified manually, this
|
|
|
|
// conversion is safe.
|
|
|
|
unsafe { std::mem::transmute(num) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub enum OpArg {
|
|
|
|
None,
|
|
|
|
Uvarint,
|
|
|
|
Fixed,
|
|
|
|
Custom,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Op {
|
|
|
|
pub fn arg_type(&self) -> OpArg {
|
|
|
|
match self {
|
|
|
|
Op::Constant
|
|
|
|
| Op::Attrs
|
|
|
|
| Op::PushWith
|
|
|
|
| Op::List
|
|
|
|
| Op::Interpolate
|
|
|
|
| Op::GetLocal
|
|
|
|
| Op::CloseScope
|
|
|
|
| Op::GetUpvalue
|
|
|
|
| Op::Finalise => OpArg::Uvarint,
|
|
|
|
|
|
|
|
Op::Jump
|
|
|
|
| Op::JumpIfTrue
|
|
|
|
| Op::JumpIfFalse
|
|
|
|
| Op::JumpIfCatchable
|
|
|
|
| Op::JumpIfNotFound
|
|
|
|
| Op::JumpIfNoFinaliseRequest => OpArg::Fixed,
|
|
|
|
|
|
|
|
Op::CoerceToString | Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => {
|
|
|
|
OpArg::Custom
|
|
|
|
}
|
|
|
|
_ => OpArg::None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Position is used to represent where to capture an upvalue from.
|
|
|
|
#[derive(Clone, Copy)]
|
|
|
|
pub struct Position(pub u64);
|
|
|
|
|
|
|
|
impl Position {
|
|
|
|
pub fn stack_index(idx: StackIdx) -> Self {
|
|
|
|
Position((idx.0 as u64) << 2)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn deferred_local(idx: StackIdx) -> Self {
|
|
|
|
Position(((idx.0 as u64) << 2) | 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn upvalue_index(idx: UpvalueIdx) -> Self {
|
|
|
|
Position(((idx.0 as u64) << 2) | 2)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn runtime_stack_index(&self) -> Option<StackIdx> {
|
|
|
|
if (self.0 & 0b11) == 0 {
|
|
|
|
return Some(StackIdx((self.0 >> 2) as usize));
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn runtime_deferred_local(&self) -> Option<StackIdx> {
|
|
|
|
if (self.0 & 0b11) == 1 {
|
|
|
|
return Some(StackIdx((self.0 >> 2) as usize));
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn runtime_upvalue_index(&self) -> Option<UpvalueIdx> {
|
|
|
|
if (self.0 & 0b11) == 2 {
|
|
|
|
return Some(UpvalueIdx((self.0 >> 2) as usize));
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod position_tests {
|
|
|
|
use super::Position; // he-he
|
|
|
|
use super::{StackIdx, UpvalueIdx};
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_stack_index_position() {
|
|
|
|
let idx = StackIdx(42);
|
|
|
|
let pos = Position::stack_index(idx);
|
|
|
|
let result = pos.runtime_stack_index();
|
|
|
|
|
|
|
|
assert_eq!(result, Some(idx));
|
|
|
|
assert_eq!(pos.runtime_deferred_local(), None);
|
|
|
|
assert_eq!(pos.runtime_upvalue_index(), None);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_deferred_local_position() {
|
|
|
|
let idx = StackIdx(42);
|
|
|
|
let pos = Position::deferred_local(idx);
|
|
|
|
let result = pos.runtime_deferred_local();
|
|
|
|
|
|
|
|
assert_eq!(result, Some(idx));
|
|
|
|
assert_eq!(pos.runtime_stack_index(), None);
|
|
|
|
assert_eq!(pos.runtime_upvalue_index(), None);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_upvalue_index_position() {
|
|
|
|
let idx = UpvalueIdx(42);
|
|
|
|
let pos = Position::upvalue_index(idx);
|
|
|
|
let result = pos.runtime_upvalue_index();
|
|
|
|
|
|
|
|
assert_eq!(result, Some(idx));
|
|
|
|
assert_eq!(pos.runtime_stack_index(), None);
|
|
|
|
assert_eq!(pos.runtime_deferred_local(), None);
|
|
|
|
}
|
2022-08-07 23:40:29 +03:00
|
|
|
}
|