feat(tazjin/rlox): Synchronise parser state after errors
This lets the parser collect multiple errors instead of returning after the first one, with some optimistic synchronisation after encountering something that looks wonky. Change-Id: Ie9d0ce8de9dcc7a3d1e7aa2abe15f74cab0ab96b Reviewed-on: https://cl.tvl.fyi/c/depot/+/2236 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
This commit is contained in:
parent
1835b2be99
commit
42405bfa24
3 changed files with 67 additions and 10 deletions
|
@ -3,6 +3,7 @@ pub enum ErrorKind {
|
||||||
UnexpectedChar(char),
|
UnexpectedChar(char),
|
||||||
UnterminatedString,
|
UnterminatedString,
|
||||||
UnmatchedParens,
|
UnmatchedParens,
|
||||||
|
ExpectedExpression(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
@ -11,7 +11,7 @@ pub fn run(code: &str) {
|
||||||
print_tokens(&tokens);
|
print_tokens(&tokens);
|
||||||
match parser::parse(tokens) {
|
match parser::parse(tokens) {
|
||||||
Ok(expr) => println!("Expression:\n{:?}", expr),
|
Ok(expr) => println!("Expression:\n{:?}", expr),
|
||||||
Err(error) => report_errors(vec![error]),
|
Err(errors) => report_errors(errors),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(errors) => report_errors(errors),
|
Err(errors) => report_errors(errors),
|
||||||
|
|
|
@ -100,7 +100,7 @@ impl<'a> Parser<'a> {
|
||||||
fn unary(&mut self) -> ExprResult<'a> {
|
fn unary(&mut self) -> ExprResult<'a> {
|
||||||
if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
|
if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
|
||||||
return Ok(Expr::Unary(Unary {
|
return Ok(Expr::Unary(Unary {
|
||||||
operator: self.previous(),
|
operator: self.previous().clone(),
|
||||||
right: Box::new(self.unary()?),
|
right: Box::new(self.unary()?),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
@ -123,8 +123,13 @@ impl<'a> Parser<'a> {
|
||||||
return Ok(Expr::Grouping(Grouping(Box::new(expr))));
|
return Ok(Expr::Grouping(Grouping(Box::new(expr))));
|
||||||
}
|
}
|
||||||
|
|
||||||
// This branch indicates a parser bug, not invalid input.
|
unexpected => {
|
||||||
unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected),
|
eprintln!("encountered {:?}", unexpected);
|
||||||
|
return Err(Error {
|
||||||
|
line: next.line,
|
||||||
|
kind: ErrorKind::ExpectedExpression(next.lexeme.into_iter().collect()),
|
||||||
|
});
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Expr::Literal(literal))
|
Ok(Expr::Literal(literal))
|
||||||
|
@ -150,7 +155,7 @@ impl<'a> Parser<'a> {
|
||||||
self.current += 1;
|
self.current += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return self.previous();
|
return self.previous().clone();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_at_end(&self) -> bool {
|
fn is_at_end(&self) -> bool {
|
||||||
|
@ -166,8 +171,8 @@ impl<'a> Parser<'a> {
|
||||||
&self.tokens[self.current]
|
&self.tokens[self.current]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn previous(&self) -> Token<'a> {
|
fn previous(&self) -> &Token<'a> {
|
||||||
self.tokens[self.current - 1].clone()
|
&self.tokens[self.current - 1]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> {
|
fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> {
|
||||||
|
@ -182,6 +187,31 @@ impl<'a> Parser<'a> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn synchronise(&mut self) {
|
||||||
|
self.advance();
|
||||||
|
|
||||||
|
while !self.is_at_end() {
|
||||||
|
if self.previous().kind == TokenKind::Semicolon {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.peek().kind {
|
||||||
|
TokenKind::Class
|
||||||
|
| TokenKind::Fun
|
||||||
|
| TokenKind::Var
|
||||||
|
| TokenKind::For
|
||||||
|
| TokenKind::If
|
||||||
|
| TokenKind::While
|
||||||
|
| TokenKind::Print
|
||||||
|
| TokenKind::Return => return,
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn binary_operator(
|
fn binary_operator(
|
||||||
&mut self,
|
&mut self,
|
||||||
oneof: &[TokenKind],
|
oneof: &[TokenKind],
|
||||||
|
@ -192,7 +222,7 @@ impl<'a> Parser<'a> {
|
||||||
while self.match_token(oneof) {
|
while self.match_token(oneof) {
|
||||||
expr = Expr::Binary(Binary {
|
expr = Expr::Binary(Binary {
|
||||||
left: Box::new(expr),
|
left: Box::new(expr),
|
||||||
operator: self.previous(),
|
operator: self.previous().clone(),
|
||||||
right: Box::new(each(self)?),
|
right: Box::new(each(self)?),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -201,8 +231,34 @@ impl<'a> Parser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse<'a>(tokens: Vec<Token<'a>>) -> ExprResult<'a> {
|
pub fn parse<'a>(tokens: Vec<Token<'a>>) -> Result<Expr<'a>, Vec<Error>> {
|
||||||
let mut parser = Parser { tokens, current: 0 };
|
let mut parser = Parser { tokens, current: 0 };
|
||||||
|
let mut errors: Vec<Error> = vec![];
|
||||||
|
|
||||||
parser.expression()
|
while !parser.is_at_end() {
|
||||||
|
match parser.expression() {
|
||||||
|
Err(err) => {
|
||||||
|
errors.push(err);
|
||||||
|
parser.synchronise();
|
||||||
|
}
|
||||||
|
Ok(expr) => {
|
||||||
|
if !parser.is_at_end() {
|
||||||
|
// TODO(tazjin): This isn't a functional language
|
||||||
|
// - multiple statements should be allowed, at
|
||||||
|
// some point.
|
||||||
|
let current = &parser.tokens[parser.current];
|
||||||
|
errors.push(Error {
|
||||||
|
line: current.line,
|
||||||
|
kind: ErrorKind::UnexpectedChar(current.lexeme[0]),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if errors.is_empty() {
|
||||||
|
return Ok(expr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Err(errors);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue