tvl-depot/users/tazjin/rlox/src/parser.rs

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

611 lines
16 KiB
Rust
Raw Normal View History

// This implements the grammar of Lox as described starting in the
// Crafting Interpreters chapter "Representing Code". Note that the
// upstream Java implementation works around Java being bad at value
// classes by writing a code generator for Java.
//
// My Rust implementation skips this step because it's unnecessary, we
// have real types.
use crate::errors::{Error, ErrorKind};
use crate::scanner::{Token, TokenKind};
// AST
#[derive(Debug)]
pub struct Assign<'a> {
pub name: Token<'a>,
pub value: Box<Expr<'a>>,
}
#[derive(Debug)]
pub struct Binary<'a> {
pub left: Box<Expr<'a>>,
pub operator: Token<'a>,
pub right: Box<Expr<'a>>,
}
#[derive(Debug)]
pub struct Logical<'a> {
pub left: Box<Expr<'a>>,
pub operator: Token<'a>,
pub right: Box<Expr<'a>>,
}
#[derive(Debug)]
pub struct Grouping<'a>(pub Box<Expr<'a>>);
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
Boolean(bool),
Number(f64),
String(String),
Nil,
}
#[derive(Debug)]
pub struct Unary<'a> {
pub operator: Token<'a>,
pub right: Box<Expr<'a>>,
}
#[derive(Debug)]
pub struct Call<'a> {
pub callee: Box<Expr<'a>>,
pub paren: Token<'a>,
pub args: Vec<Expr<'a>>,
}
// Not to be confused with `Var`, which is for assignment.
#[derive(Debug)]
pub struct Variable<'a>(pub Token<'a>);
#[derive(Debug)]
pub enum Expr<'a> {
Assign(Assign<'a>),
Binary(Binary<'a>),
Grouping(Grouping<'a>),
Literal(Literal),
Unary(Unary<'a>),
Call(Call<'a>),
Variable(Variable<'a>),
Logical(Logical<'a>),
}
// Variable assignment. Not to be confused with `Variable`, which is
// for access.
#[derive(Debug)]
pub struct Var<'a> {
pub name: Token<'a>,
pub initialiser: Option<Expr<'a>>,
}
#[derive(Debug)]
pub struct If<'a> {
pub condition: Expr<'a>,
pub then_branch: Box<Statement<'a>>,
pub else_branch: Option<Box<Statement<'a>>>,
}
#[derive(Debug)]
pub struct While<'a> {
pub condition: Expr<'a>,
pub body: Box<Statement<'a>>,
}
pub type Block<'a> = Vec<Statement<'a>>;
#[derive(Debug)]
pub enum Statement<'a> {
Expr(Expr<'a>),
Print(Expr<'a>),
Var(Var<'a>),
Block(Block<'a>),
If(If<'a>),
While(While<'a>),
}
// Parser
/*
program declaration* EOF ;
declaration varDecl
| statement ;
statement exprStmt
| forStmt
| ifStmt
| printStmt
| whileStmt
| block ;
forStmt "for" "(" ( varDecl | exprStmt | ";" )
expression? ";"
expression? ")" statement ;
whileStmt "while" "(" expression ")" statement ;
exprStmt expression ";" ;
ifStmt "if" "(" expression ")" statement
( "else" statement )? ;
printStmt "print" expression ";" ;
expression assignment ;
assignment IDENTIFIER "=" assignment
| logic_or ;
logic_or logic_and ( "or" logic_and )* ;
logic_and equality ( "and" equality )* ;
equality comparison ( ( "!=" | "==" ) comparison )* ;
comparison term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
term factor ( ( "-" | "+" ) factor )* ;
factor unary ( ( "/" | "*" ) unary )* ;
unary ( "!" | "-" ) unary | call ;
call primary ( "(" arguments? ")" )* ;
arguments expression ( "," expression )* ;
primary NUMBER | STRING | "true" | "false" | "nil"
| "(" expression ")" ;
*/
struct Parser<'a> {
tokens: Vec<Token<'a>>,
current: usize,
}
type ExprResult<'a> = Result<Expr<'a>, Error>;
type StmtResult<'a> = Result<Statement<'a>, Error>;
impl<'a> Parser<'a> {
// recursive-descent parser functions
fn declaration(&mut self) -> StmtResult<'a> {
if self.match_token(&[TokenKind::Var]) {
return self.var_declaration();
}
self.statement()
}
fn var_declaration(&mut self) -> StmtResult<'a> {
// Since `TokenKind::Identifier` carries data, we can't use
// `consume`.
if let TokenKind::Identifier(_) = self.peek().kind {
let mut var = Var {
name: self.advance(),
initialiser: None,
};
if self.match_token(&[TokenKind::Equal]) {
var.initialiser = Some(self.expression()?);
}
self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?;
return Ok(Statement::Var(var));
}
return Err(Error {
line: self.peek().line,
kind: ErrorKind::ExpectedVariableName,
});
}
fn statement(&mut self) -> StmtResult<'a> {
if self.match_token(&[TokenKind::Print]) {
self.print_statement()
} else if self.match_token(&[TokenKind::LeftBrace]) {
self.block_statement()
} else if self.match_token(&[TokenKind::If]) {
self.if_statement()
} else if self.match_token(&[TokenKind::While]) {
self.while_statement()
} else if self.match_token(&[TokenKind::For]) {
self.for_statement()
} else {
self.expr_statement()
}
}
fn print_statement(&mut self) -> StmtResult<'a> {
let expr = self.expression()?;
self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?;
Ok(Statement::Print(expr))
}
fn block_statement(&mut self) -> StmtResult<'a> {
let mut block: Block<'a> = vec![];
while !self.check_token(&TokenKind::RightBrace) && !self.is_at_end() {
block.push(self.declaration()?);
}
self.consume(&TokenKind::RightBrace, ErrorKind::ExpectedClosingBrace)?;
Ok(Statement::Block(block))
}
fn if_statement(&mut self) -> StmtResult<'a> {
self.consume(
&TokenKind::LeftParen,
ErrorKind::ExpectedToken("Expected '(' after 'if'"),
)?;
let condition = self.expression()?;
self.consume(
&TokenKind::RightParen,
ErrorKind::ExpectedToken("Expected ')' after condition"),
)?;
let then_branch = Box::new(self.statement()?);
let mut stmt = If {
condition,
then_branch,
else_branch: Option::None,
};
if self.match_token(&[TokenKind::Else]) {
stmt.else_branch = Some(Box::new(self.statement()?));
}
Ok(Statement::If(stmt))
}
fn while_statement(&mut self) -> StmtResult<'a> {
self.consume(
&TokenKind::LeftParen,
ErrorKind::ExpectedToken("Expected '(' after 'while'"),
)?;
let condition = self.expression()?;
self.consume(
&TokenKind::RightParen,
ErrorKind::ExpectedToken("Expected ')' after 'while'"),
)?;
Ok(Statement::While(While {
condition,
body: Box::new(self.statement()?),
}))
}
fn for_statement(&mut self) -> StmtResult<'a> {
// Parsing of clauses ...
self.consume(
&TokenKind::LeftParen,
ErrorKind::ExpectedToken("Expected '(' after 'for'"),
)?;
let initialiser = if self.match_token(&[TokenKind::Semicolon]) {
None
} else if self.match_token(&[TokenKind::Var]) {
Some(self.var_declaration()?)
} else {
Some(self.expr_statement()?)
};
let condition = if self.check_token(&TokenKind::Semicolon) {
// unspecified condition => infinite loop
Expr::Literal(Literal::Boolean(true))
} else {
self.expression()?
};
self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?;
let increment = if self.check_token(&TokenKind::RightParen) {
None
} else {
Some(self.expression()?)
};
self.consume(
&TokenKind::RightParen,
ErrorKind::ExpectedToken("Expected ')' after for clauses"),
)?;
let mut body = self.statement()?;
// ... desugaring to while
if let Some(inc) = increment {
body = Statement::Block(vec![body, Statement::Expr(inc)]);
}
body = Statement::While(While {
condition,
body: Box::new(body),
});
if let Some(init) = initialiser {
body = Statement::Block(vec![init, body]);
}
Ok(body)
}
fn expr_statement(&mut self) -> StmtResult<'a> {
let expr = self.expression()?;
self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?;
Ok(Statement::Expr(expr))
}
fn expression(&mut self) -> ExprResult<'a> {
self.assignment()
}
fn assignment(&mut self) -> ExprResult<'a> {
let expr = self.logic_or()?;
if self.match_token(&[TokenKind::Equal]) {
let equals = self.previous().clone();
let value = self.assignment()?;
if let Expr::Variable(Variable(name)) = expr {
return Ok(Expr::Assign(Assign {
name,
value: Box::new(value),
}));
}
return Err(Error {
line: equals.line,
kind: ErrorKind::InvalidAssignmentTarget(format!("{:?}", equals)),
});
}
Ok(expr)
}
fn logic_or(&mut self) -> ExprResult<'a> {
let mut expr = self.logic_and()?;
while self.match_token(&[TokenKind::Or]) {
expr = Expr::Logical(Logical {
left: Box::new(expr),
operator: self.previous().clone(),
right: Box::new(self.logic_and()?),
})
}
Ok(expr)
}
fn logic_and(&mut self) -> ExprResult<'a> {
let mut expr = self.equality()?;
while self.match_token(&[TokenKind::And]) {
expr = Expr::Logical(Logical {
left: Box::new(expr),
operator: self.previous().clone(),
right: Box::new(self.equality()?),
})
}
Ok(expr)
}
fn equality(&mut self) -> ExprResult<'a> {
self.binary_operator(
&[TokenKind::BangEqual, TokenKind::EqualEqual],
Self::comparison,
)
}
fn comparison(&mut self) -> ExprResult<'a> {
self.binary_operator(
&[
TokenKind::Greater,
TokenKind::GreaterEqual,
TokenKind::Less,
TokenKind::LessEqual,
],
Self::term,
)
}
fn term(&mut self) -> ExprResult<'a> {
self.binary_operator(&[TokenKind::Minus, TokenKind::Plus], Self::factor)
}
fn factor(&mut self) -> ExprResult<'a> {
self.binary_operator(&[TokenKind::Slash, TokenKind::Star], Self::unary)
}
fn unary(&mut self) -> ExprResult<'a> {
if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
return Ok(Expr::Unary(Unary {
operator: self.previous().clone(),
right: Box::new(self.unary()?),
}));
}
return self.call();
}
fn call(&mut self) -> ExprResult<'a> {
let mut expr = self.primary()?;
loop {
if self.match_token(&[TokenKind::LeftParen]) {
expr = self.finish_call(expr)?;
} else {
break;
}
}
Ok(expr)
}
fn finish_call(&mut self, callee: Expr<'a>) -> ExprResult<'a> {
let mut args = vec![];
if !self.check_token(&TokenKind::RightParen) {
loop {
// TODO(tazjin): Check for max args count
args.push(self.expression()?);
if !self.match_token(&[TokenKind::Comma]) {
break;
}
}
}
let paren = self.consume(
&TokenKind::RightParen,
ErrorKind::ExpectedToken("Expect ')' after arguments."),
)?;
Ok(Expr::Call(Call {
args,
callee: Box::new(callee),
paren,
}))
}
fn primary(&mut self) -> ExprResult<'a> {
let next = self.advance();
let literal = match next.kind {
TokenKind::True => Literal::Boolean(true),
TokenKind::False => Literal::Boolean(false),
TokenKind::Nil => Literal::Nil,
TokenKind::Number(num) => Literal::Number(num),
TokenKind::String(string) => Literal::String(string),
TokenKind::LeftParen => {
let expr = self.expression()?;
self.consume(&TokenKind::RightParen, ErrorKind::UnmatchedParens)?;
return Ok(Expr::Grouping(Grouping(Box::new(expr))));
}
TokenKind::Identifier(_) => return Ok(Expr::Variable(Variable(next))),
unexpected => {
eprintln!("encountered {:?}", unexpected);
return Err(Error {
line: next.line,
kind: ErrorKind::ExpectedExpression(next.lexeme.into_iter().collect()),
});
}
};
Ok(Expr::Literal(literal))
}
// internal helpers
/// Check if the next token is in `oneof`, and advance if it is.
fn match_token(&mut self, oneof: &[TokenKind]) -> bool {
for token in oneof {
if self.check_token(token) {
self.advance();
return true;
}
}
return false;
}
/// Return the next token and advance parser state.
fn advance(&mut self) -> Token<'a> {
if !self.is_at_end() {
self.current += 1;
}
return self.previous().clone();
}
fn is_at_end(&self) -> bool {
self.check_token(&TokenKind::Eof)
}
/// Is the next token `token`?
fn check_token(&self, token: &TokenKind) -> bool {
self.peek().kind == *token
}
fn peek(&self) -> &Token<'a> {
&self.tokens[self.current]
}
fn previous(&self) -> &Token<'a> {
&self.tokens[self.current - 1]
}
fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<Token<'a>, Error> {
if self.check_token(kind) {
return Ok(self.advance());
}
Err(Error {
line: self.peek().line,
kind: err,
})
}
fn synchronise(&mut self) {
self.advance();
while !self.is_at_end() {
if self.previous().kind == TokenKind::Semicolon {
return;
}
match self.peek().kind {
TokenKind::Class
| TokenKind::Fun
| TokenKind::Var
| TokenKind::For
| TokenKind::If
| TokenKind::While
| TokenKind::Print
| TokenKind::Return => return,
_ => {
self.advance();
}
}
}
}
fn binary_operator(
&mut self,
oneof: &[TokenKind],
each: fn(&mut Parser<'a>) -> ExprResult<'a>,
) -> ExprResult<'a> {
let mut expr = each(self)?;
while self.match_token(oneof) {
expr = Expr::Binary(Binary {
left: Box::new(expr),
operator: self.previous().clone(),
right: Box::new(each(self)?),
})
}
return Ok(expr);
}
}
pub fn parse<'a>(tokens: Vec<Token<'a>>) -> Result<Block<'a>, Vec<Error>> {
let mut parser = Parser { tokens, current: 0 };
let mut program: Block<'a> = vec![];
let mut errors: Vec<Error> = vec![];
while !parser.is_at_end() {
match parser.declaration() {
Err(err) => {
errors.push(err);
parser.synchronise();
}
Ok(decl) => {
program.push(decl);
}
}
}
if errors.is_empty() {
Ok(program)
} else {
Err(errors)
}
}