refactor(tvix/eval): improve representation of chunk/span mapping

This switches out the previous compressed representation (count of
instructions per span) with a representation where the chunk's span
list stores the index of the first operation that belongs to a span,
and finds the right span by using a binary search when looking them
up.

This improves the lookup complexity from O(n) to O(log n).

This improvement was suggested and (mostly) implemented by GPT-4. I
only fixed up some names and updated the logic for deleting
spans (which it only did not do because I didn't tell it about that).

The code was verified by producing a complex error before/after the
change and ensuring that all spans in the error match exactly.

Co-Authored-By: GPT-4
Change-Id: Ibfa12cc6973af1c9b0ae55bb464d1975209771f5
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8385
Reviewed-by: ezemtsov <eugene.zemtsov@gmail.com>
Tested-by: BuildkiteCI
Autosubmit: tazjin <tazjin@tvl.su>
This commit is contained in:
Vincent Ambo 2023-03-31 01:19:01 +03:00 committed by tazjin
parent 5bf9134324
commit ce502bdc89

View file

@ -21,8 +21,8 @@ struct SourceSpan {
/// Span into the [codemap::Codemap].
span: codemap::Span,
/// Number of instructions derived from this span.
count: usize,
/// Index of the first operation covered by this span.
start: usize,
}
/// A chunk is a representation of a sequence of bytecode
@ -61,7 +61,7 @@ impl Chunk {
pub fn push_op(&mut self, data: OpCode, span: codemap::Span) -> CodeIdx {
let idx = self.code.len();
self.code.push(data);
self.push_span(span);
self.push_span(span, idx);
CodeIdx(idx)
}
@ -76,19 +76,11 @@ impl Chunk {
// Simply drop the last op.
self.code.pop();
// If the last span only had this op, drop it, otherwise
// decrease its operation counter.
match self.spans.last_mut() {
// If the last span had more than one op, decrease the
// counter.
Some(span) if span.count > 1 => span.count -= 1,
// Otherwise, drop it.
Some(_) => {
if let Some(span) = self.spans.last() {
// If the last span started at this op, drop it.
if span.start == self.code.len() {
self.spans.pop();
}
None => unreachable!(),
}
}
@ -100,31 +92,37 @@ impl Chunk {
// Span tracking implementation
fn push_span(&mut self, span: codemap::Span) {
fn push_span(&mut self, span: codemap::Span, start: usize) {
match self.spans.last_mut() {
// We do not need to insert the same span again, as this
// instruction was compiled from the same span as the last
// one.
Some(last) if last.span == span => last.count += 1,
Some(last) if last.span == span => {}
// In all other cases, this is a new source span.
_ => self.spans.push(SourceSpan { span, count: 1 }),
_ => self.spans.push(SourceSpan { span, start }),
}
}
/// Retrieve the [codemap::Span] from which the instruction at
/// `offset` was compiled.
pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
let mut pos = 0;
let position = self
.spans
.binary_search_by(|span| span.start.cmp(&offset.0));
for span in &self.spans {
pos += span.count;
if pos > offset.0 {
return span.span;
let span = match position {
Ok(index) => &self.spans[index],
Err(index) => {
if index == 0 {
&self.spans[0]
} else {
&self.spans[index - 1]
}
}
};
panic!("compiler error: chunk missing span for offset {}", offset.0);
span.span
}
/// Write the disassembler representation of the operation at