feat(tazjin/rlox): Intern all string constants
This is again a step closer to the book, but there are some notable differences: * Only constants encountered by the compiler are interned, all other string operations (well, concatenation) happen with heap objects. * OpReturn will always ensure that a returned string value is newly heap allocated and does not reference the interner. Change-Id: If4f04309446e01b8ff2db51094e9710d465dbc50 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2582 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
This commit is contained in:
parent
bcea8e0d16
commit
432e7a7ddd
4 changed files with 65 additions and 19 deletions
|
@ -1,5 +1,6 @@
|
||||||
use super::chunk::Chunk;
|
use super::chunk::Chunk;
|
||||||
use super::errors::{Error, ErrorKind, LoxResult};
|
use super::errors::{Error, ErrorKind, LoxResult};
|
||||||
|
use super::interner::Interner;
|
||||||
use super::opcode::OpCode;
|
use super::opcode::OpCode;
|
||||||
use super::value::Value;
|
use super::value::Value;
|
||||||
use crate::scanner::{self, Token, TokenKind};
|
use crate::scanner::{self, Token, TokenKind};
|
||||||
|
@ -12,8 +13,8 @@ struct Compiler<T: Iterator<Item = Token>> {
|
||||||
chunk: Chunk,
|
chunk: Chunk,
|
||||||
panic: bool,
|
panic: bool,
|
||||||
errors: Vec<Error>,
|
errors: Vec<Error>,
|
||||||
|
strings: Interner,
|
||||||
|
|
||||||
// TODO(tazjin): Restructure so that these don't need to be Option?
|
|
||||||
current: Option<Token>,
|
current: Option<Token>,
|
||||||
previous: Option<Token>,
|
previous: Option<Token>,
|
||||||
}
|
}
|
||||||
|
@ -146,7 +147,7 @@ fn rule_for<T: Iterator<Item = Token>>(token: &TokenKind) -> ParseRule<T> {
|
||||||
|
|
||||||
TokenKind::String(_) => {
|
TokenKind::String(_) => {
|
||||||
ParseRule::new(Some(Compiler::string), None, Precedence::None)
|
ParseRule::new(Some(Compiler::string), None, Precedence::None)
|
||||||
},
|
}
|
||||||
|
|
||||||
_ => ParseRule::new(None, None, Precedence::None),
|
_ => ParseRule::new(None, None, Precedence::None),
|
||||||
}
|
}
|
||||||
|
@ -260,13 +261,13 @@ impl<T: Iterator<Item = Token>> Compiler<T> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn string(&mut self) -> LoxResult<()> {
|
fn string(&mut self) -> LoxResult<()> {
|
||||||
match &self.previous().kind {
|
let val = match &self.previous().kind {
|
||||||
TokenKind::String(s) => {
|
TokenKind::String(s) => s.clone(),
|
||||||
let s = s.clone();
|
|
||||||
self.emit_constant(Value::String(s));
|
|
||||||
}
|
|
||||||
_ => unreachable!("only called for strings"),
|
_ => unreachable!("only called for strings"),
|
||||||
}
|
};
|
||||||
|
|
||||||
|
let id = self.strings.intern(val);
|
||||||
|
self.emit_constant(Value::String(id.into()));
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -353,7 +354,7 @@ impl<T: Iterator<Item = Token>> Compiler<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
|
pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec<Error>> {
|
||||||
let chars = code.chars().collect::<Vec<char>>();
|
let chars = code.chars().collect::<Vec<char>>();
|
||||||
let tokens = scanner::scan(&chars).map_err(|errors| {
|
let tokens = scanner::scan(&chars).map_err(|errors| {
|
||||||
errors.into_iter().map(Into::into).collect::<Vec<Error>>()
|
errors.into_iter().map(Into::into).collect::<Vec<Error>>()
|
||||||
|
@ -364,6 +365,7 @@ pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
|
||||||
chunk: Default::default(),
|
chunk: Default::default(),
|
||||||
panic: false,
|
panic: false,
|
||||||
errors: vec![],
|
errors: vec![],
|
||||||
|
strings: Interner::with_capacity(1024),
|
||||||
current: None,
|
current: None,
|
||||||
previous: None,
|
previous: None,
|
||||||
};
|
};
|
||||||
|
@ -371,7 +373,7 @@ pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
|
||||||
compiler.compile()?;
|
compiler.compile()?;
|
||||||
|
|
||||||
if compiler.errors.is_empty() {
|
if compiler.errors.is_empty() {
|
||||||
Ok(compiler.chunk)
|
Ok((compiler.strings, compiler.chunk))
|
||||||
} else {
|
} else {
|
||||||
Err(compiler.errors)
|
Err(compiler.errors)
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ impl crate::Lox for Interpreter {
|
||||||
&mut self,
|
&mut self,
|
||||||
code: String,
|
code: String,
|
||||||
) -> Result<Self::Value, Vec<Self::Error>> {
|
) -> Result<Self::Value, Vec<Self::Error>> {
|
||||||
let chunk = compiler::compile(&code)?;
|
let (strings, chunk) = compiler::compile(&code)?;
|
||||||
vm::interpret(chunk).map_err(|e| vec![e])
|
vm::interpret(strings, chunk).map_err(|e| vec![e])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,9 +1,29 @@
|
||||||
|
use super::interner::InternedStr;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub enum Value {
|
pub enum Value {
|
||||||
Nil,
|
Nil,
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
Number(f64),
|
Number(f64),
|
||||||
String(String),
|
String(LoxString),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
pub enum LoxString {
|
||||||
|
Heap(String),
|
||||||
|
Interned(InternedStr),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for LoxString {
|
||||||
|
fn from(s: String) -> Self {
|
||||||
|
LoxString::Heap(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<InternedStr> for LoxString {
|
||||||
|
fn from(s: InternedStr) -> Self {
|
||||||
|
LoxString::Interned(s)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Value {
|
impl Value {
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
use super::chunk;
|
use super::chunk;
|
||||||
use super::errors::*;
|
use super::errors::*;
|
||||||
|
use super::interner::Interner;
|
||||||
use super::opcode::OpCode;
|
use super::opcode::OpCode;
|
||||||
use super::value::Value;
|
use super::value::{LoxString, Value};
|
||||||
|
|
||||||
pub struct VM {
|
pub struct VM {
|
||||||
chunk: chunk::Chunk,
|
chunk: chunk::Chunk,
|
||||||
|
@ -11,6 +12,7 @@ pub struct VM {
|
||||||
ip: usize,
|
ip: usize,
|
||||||
|
|
||||||
stack: Vec<Value>,
|
stack: Vec<Value>,
|
||||||
|
strings: Interner,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VM {
|
impl VM {
|
||||||
|
@ -69,7 +71,10 @@ impl VM {
|
||||||
self.ip += 1;
|
self.ip += 1;
|
||||||
|
|
||||||
match op {
|
match op {
|
||||||
OpCode::OpReturn => return Ok(self.pop()),
|
OpCode::OpReturn => {
|
||||||
|
let val = self.pop();
|
||||||
|
return Ok(self.return_value(val));
|
||||||
|
}
|
||||||
|
|
||||||
OpCode::OpConstant(idx) => {
|
OpCode::OpConstant(idx) => {
|
||||||
let c = self.chunk.constant(*idx).clone();
|
let c = self.chunk.constant(*idx).clone();
|
||||||
|
@ -114,9 +119,9 @@ impl VM {
|
||||||
|
|
||||||
match (a, b) {
|
match (a, b) {
|
||||||
(Value::String(s_a), Value::String(s_b)) => {
|
(Value::String(s_a), Value::String(s_b)) => {
|
||||||
let mut new_s = s_a.clone();
|
let mut new_s = self.resolve_str(&s_a).to_string();
|
||||||
new_s.push_str(&s_b);
|
new_s.push_str(self.resolve_str(&s_b));
|
||||||
self.push(Value::String(new_s));
|
self.push(Value::String(new_s.into()));
|
||||||
}
|
}
|
||||||
|
|
||||||
(Value::Number(n_a), Value::Number(n_b)) =>
|
(Value::Number(n_a), Value::Number(n_b)) =>
|
||||||
|
@ -136,11 +141,30 @@ impl VM {
|
||||||
println!("=> {:?}", self.stack);
|
println!("=> {:?}", self.stack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For some types of values (e.g. interned strings), returns
|
||||||
|
// should no longer include any references into the interpreter.
|
||||||
|
fn return_value(&self, val: Value) -> Value {
|
||||||
|
match val {
|
||||||
|
Value::String(string @ LoxString::Interned(_)) => {
|
||||||
|
Value::String(self.resolve_str(&string).to_string().into())
|
||||||
|
}
|
||||||
|
_ => val,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resolve_str<'a>(&'a self, string: &'a LoxString) -> &'a str {
|
||||||
|
match string {
|
||||||
|
LoxString::Heap(s) => s.as_str(),
|
||||||
|
LoxString::Interned(id) => self.strings.lookup(*id),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn interpret(chunk: chunk::Chunk) -> LoxResult<Value> {
|
pub fn interpret(strings: Interner, chunk: chunk::Chunk) -> LoxResult<Value> {
|
||||||
let mut vm = VM {
|
let mut vm = VM {
|
||||||
chunk,
|
chunk,
|
||||||
|
strings,
|
||||||
ip: 0,
|
ip: 0,
|
||||||
stack: vec![],
|
stack: vec![],
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue