feat(tazjin/rlox): Synchronise parser state after errors

This lets the parser collect multiple errors instead of returning
after the first one, with some optimistic synchronisation after
encountering something that looks wonky.

Change-Id: Ie9d0ce8de9dcc7a3d1e7aa2abe15f74cab0ab96b
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2236
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2020-12-06 18:03:09 +01:00 committed by tazjin
parent 1835b2be99
commit 42405bfa24
3 changed files with 67 additions and 10 deletions

View file

@ -3,6 +3,7 @@ pub enum ErrorKind {
UnexpectedChar(char), UnexpectedChar(char),
UnterminatedString, UnterminatedString,
UnmatchedParens, UnmatchedParens,
ExpectedExpression(String),
} }
#[derive(Debug)] #[derive(Debug)]

View file

@ -11,7 +11,7 @@ pub fn run(code: &str) {
print_tokens(&tokens); print_tokens(&tokens);
match parser::parse(tokens) { match parser::parse(tokens) {
Ok(expr) => println!("Expression:\n{:?}", expr), Ok(expr) => println!("Expression:\n{:?}", expr),
Err(error) => report_errors(vec![error]), Err(errors) => report_errors(errors),
} }
} }
Err(errors) => report_errors(errors), Err(errors) => report_errors(errors),

View file

@ -100,7 +100,7 @@ impl<'a> Parser<'a> {
fn unary(&mut self) -> ExprResult<'a> { fn unary(&mut self) -> ExprResult<'a> {
if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) { if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
return Ok(Expr::Unary(Unary { return Ok(Expr::Unary(Unary {
operator: self.previous(), operator: self.previous().clone(),
right: Box::new(self.unary()?), right: Box::new(self.unary()?),
})); }));
} }
@ -123,8 +123,13 @@ impl<'a> Parser<'a> {
return Ok(Expr::Grouping(Grouping(Box::new(expr)))); return Ok(Expr::Grouping(Grouping(Box::new(expr))));
} }
// This branch indicates a parser bug, not invalid input. unexpected => {
unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected), eprintln!("encountered {:?}", unexpected);
return Err(Error {
line: next.line,
kind: ErrorKind::ExpectedExpression(next.lexeme.into_iter().collect()),
});
}
}; };
Ok(Expr::Literal(literal)) Ok(Expr::Literal(literal))
@ -150,7 +155,7 @@ impl<'a> Parser<'a> {
self.current += 1; self.current += 1;
} }
return self.previous(); return self.previous().clone();
} }
fn is_at_end(&self) -> bool { fn is_at_end(&self) -> bool {
@ -166,8 +171,8 @@ impl<'a> Parser<'a> {
&self.tokens[self.current] &self.tokens[self.current]
} }
fn previous(&self) -> Token<'a> { fn previous(&self) -> &Token<'a> {
self.tokens[self.current - 1].clone() &self.tokens[self.current - 1]
} }
fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> { fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> {
@ -182,6 +187,31 @@ impl<'a> Parser<'a> {
}) })
} }
fn synchronise(&mut self) {
self.advance();
while !self.is_at_end() {
if self.previous().kind == TokenKind::Semicolon {
return;
}
match self.peek().kind {
TokenKind::Class
| TokenKind::Fun
| TokenKind::Var
| TokenKind::For
| TokenKind::If
| TokenKind::While
| TokenKind::Print
| TokenKind::Return => return,
_ => {
self.advance();
}
}
}
}
fn binary_operator( fn binary_operator(
&mut self, &mut self,
oneof: &[TokenKind], oneof: &[TokenKind],
@ -192,7 +222,7 @@ impl<'a> Parser<'a> {
while self.match_token(oneof) { while self.match_token(oneof) {
expr = Expr::Binary(Binary { expr = Expr::Binary(Binary {
left: Box::new(expr), left: Box::new(expr),
operator: self.previous(), operator: self.previous().clone(),
right: Box::new(each(self)?), right: Box::new(each(self)?),
}) })
} }
@ -201,8 +231,34 @@ impl<'a> Parser<'a> {
} }
} }
pub fn parse<'a>(tokens: Vec<Token<'a>>) -> ExprResult<'a> { pub fn parse<'a>(tokens: Vec<Token<'a>>) -> Result<Expr<'a>, Vec<Error>> {
let mut parser = Parser { tokens, current: 0 }; let mut parser = Parser { tokens, current: 0 };
let mut errors: Vec<Error> = vec![];
parser.expression() while !parser.is_at_end() {
match parser.expression() {
Err(err) => {
errors.push(err);
parser.synchronise();
}
Ok(expr) => {
if !parser.is_at_end() {
// TODO(tazjin): This isn't a functional language
// - multiple statements should be allowed, at
// some point.
let current = &parser.tokens[parser.current];
errors.push(Error {
line: current.line,
kind: ErrorKind::UnexpectedChar(current.lexeme[0]),
});
}
if errors.is_empty() {
return Ok(expr);
}
}
}
}
return Err(errors);
} }