feat(tazjin/rlox): Intern all string constants

This is again a step closer to the book, but there are some notable
differences:

* Only constants encountered by the compiler are interned, all other
  string operations (well, concatenation) happen with heap objects.

* OpReturn will always ensure that a returned string value is newly
  heap allocated and does not reference the interner.

Change-Id: If4f04309446e01b8ff2db51094e9710d465dbc50
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2582
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2021-03-02 13:11:21 +02:00 committed by tazjin
parent bcea8e0d16
commit 432e7a7ddd
4 changed files with 65 additions and 19 deletions

View file

@ -1,5 +1,6 @@
use super::chunk::Chunk; use super::chunk::Chunk;
use super::errors::{Error, ErrorKind, LoxResult}; use super::errors::{Error, ErrorKind, LoxResult};
use super::interner::Interner;
use super::opcode::OpCode; use super::opcode::OpCode;
use super::value::Value; use super::value::Value;
use crate::scanner::{self, Token, TokenKind}; use crate::scanner::{self, Token, TokenKind};
@ -12,8 +13,8 @@ struct Compiler<T: Iterator<Item = Token>> {
chunk: Chunk, chunk: Chunk,
panic: bool, panic: bool,
errors: Vec<Error>, errors: Vec<Error>,
strings: Interner,
// TODO(tazjin): Restructure so that these don't need to be Option?
current: Option<Token>, current: Option<Token>,
previous: Option<Token>, previous: Option<Token>,
} }
@ -146,7 +147,7 @@ fn rule_for<T: Iterator<Item = Token>>(token: &TokenKind) -> ParseRule<T> {
TokenKind::String(_) => { TokenKind::String(_) => {
ParseRule::new(Some(Compiler::string), None, Precedence::None) ParseRule::new(Some(Compiler::string), None, Precedence::None)
}, }
_ => ParseRule::new(None, None, Precedence::None), _ => ParseRule::new(None, None, Precedence::None),
} }
@ -260,13 +261,13 @@ impl<T: Iterator<Item = Token>> Compiler<T> {
} }
fn string(&mut self) -> LoxResult<()> { fn string(&mut self) -> LoxResult<()> {
match &self.previous().kind { let val = match &self.previous().kind {
TokenKind::String(s) => { TokenKind::String(s) => s.clone(),
let s = s.clone();
self.emit_constant(Value::String(s));
}
_ => unreachable!("only called for strings"), _ => unreachable!("only called for strings"),
} };
let id = self.strings.intern(val);
self.emit_constant(Value::String(id.into()));
Ok(()) Ok(())
} }
@ -353,7 +354,7 @@ impl<T: Iterator<Item = Token>> Compiler<T> {
} }
} }
pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> { pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec<Error>> {
let chars = code.chars().collect::<Vec<char>>(); let chars = code.chars().collect::<Vec<char>>();
let tokens = scanner::scan(&chars).map_err(|errors| { let tokens = scanner::scan(&chars).map_err(|errors| {
errors.into_iter().map(Into::into).collect::<Vec<Error>>() errors.into_iter().map(Into::into).collect::<Vec<Error>>()
@ -364,6 +365,7 @@ pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
chunk: Default::default(), chunk: Default::default(),
panic: false, panic: false,
errors: vec![], errors: vec![],
strings: Interner::with_capacity(1024),
current: None, current: None,
previous: None, previous: None,
}; };
@ -371,7 +373,7 @@ pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
compiler.compile()?; compiler.compile()?;
if compiler.errors.is_empty() { if compiler.errors.is_empty() {
Ok(compiler.chunk) Ok((compiler.strings, compiler.chunk))
} else { } else {
Err(compiler.errors) Err(compiler.errors)
} }

View file

@ -27,7 +27,7 @@ impl crate::Lox for Interpreter {
&mut self, &mut self,
code: String, code: String,
) -> Result<Self::Value, Vec<Self::Error>> { ) -> Result<Self::Value, Vec<Self::Error>> {
let chunk = compiler::compile(&code)?; let (strings, chunk) = compiler::compile(&code)?;
vm::interpret(chunk).map_err(|e| vec![e]) vm::interpret(strings, chunk).map_err(|e| vec![e])
} }
} }

View file

@ -1,9 +1,29 @@
use super::interner::InternedStr;
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum Value { pub enum Value {
Nil, Nil,
Bool(bool), Bool(bool),
Number(f64), Number(f64),
String(String), String(LoxString),
}
#[derive(Clone, Debug, PartialEq)]
pub enum LoxString {
Heap(String),
Interned(InternedStr),
}
impl From<String> for LoxString {
fn from(s: String) -> Self {
LoxString::Heap(s)
}
}
impl From<InternedStr> for LoxString {
fn from(s: InternedStr) -> Self {
LoxString::Interned(s)
}
} }
impl Value { impl Value {

View file

@ -1,7 +1,8 @@
use super::chunk; use super::chunk;
use super::errors::*; use super::errors::*;
use super::interner::Interner;
use super::opcode::OpCode; use super::opcode::OpCode;
use super::value::Value; use super::value::{LoxString, Value};
pub struct VM { pub struct VM {
chunk: chunk::Chunk, chunk: chunk::Chunk,
@ -11,6 +12,7 @@ pub struct VM {
ip: usize, ip: usize,
stack: Vec<Value>, stack: Vec<Value>,
strings: Interner,
} }
impl VM { impl VM {
@ -69,7 +71,10 @@ impl VM {
self.ip += 1; self.ip += 1;
match op { match op {
OpCode::OpReturn => return Ok(self.pop()), OpCode::OpReturn => {
let val = self.pop();
return Ok(self.return_value(val));
}
OpCode::OpConstant(idx) => { OpCode::OpConstant(idx) => {
let c = self.chunk.constant(*idx).clone(); let c = self.chunk.constant(*idx).clone();
@ -114,9 +119,9 @@ impl VM {
match (a, b) { match (a, b) {
(Value::String(s_a), Value::String(s_b)) => { (Value::String(s_a), Value::String(s_b)) => {
let mut new_s = s_a.clone(); let mut new_s = self.resolve_str(&s_a).to_string();
new_s.push_str(&s_b); new_s.push_str(self.resolve_str(&s_b));
self.push(Value::String(new_s)); self.push(Value::String(new_s.into()));
} }
(Value::Number(n_a), Value::Number(n_b)) => (Value::Number(n_a), Value::Number(n_b)) =>
@ -136,11 +141,30 @@ impl VM {
println!("=> {:?}", self.stack); println!("=> {:?}", self.stack);
} }
} }
// For some types of values (e.g. interned strings), returns
// should no longer include any references into the interpreter.
fn return_value(&self, val: Value) -> Value {
match val {
Value::String(string @ LoxString::Interned(_)) => {
Value::String(self.resolve_str(&string).to_string().into())
}
_ => val,
}
}
fn resolve_str<'a>(&'a self, string: &'a LoxString) -> &'a str {
match string {
LoxString::Heap(s) => s.as_str(),
LoxString::Interned(id) => self.strings.lookup(*id),
}
}
} }
pub fn interpret(chunk: chunk::Chunk) -> LoxResult<Value> { pub fn interpret(strings: Interner, chunk: chunk::Chunk) -> LoxResult<Value> {
let mut vm = VM { let mut vm = VM {
chunk, chunk,
strings,
ip: 0, ip: 0,
stack: vec![], stack: vec![],
}; };