feat(tazjin/rlox): Implement parsing up to unary expressions

... with the exception of parenthesised expressions, because error
threading is not implemented yet.

Change-Id: I8d455d85e647548d5b71cbfd3d078f4970dab7fb
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2232
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
This commit is contained in:
Vincent Ambo 2020-12-06 14:21:06 +01:00 committed by tazjin
parent a8371b01df
commit 5fcff11eae
2 changed files with 109 additions and 15 deletions

View file

@ -1,6 +1,6 @@
// This implements the grammar of Lox as described starting in the // This implements the grammar of Lox as described starting in the
// Crafting Interpreters chapter "Representing Code". Note that the // Crafting Interpreters chapter "Representing Code". Note that the
// upstream Java implementation works about Java being bad at value // upstream Java implementation works around Java being bad at value
// classes by writing a code generator for Java. // classes by writing a code generator for Java.
// //
// My Rust implementation skips this step because it's unnecessary, we // My Rust implementation skips this step because it's unnecessary, we
@ -12,21 +12,33 @@ use crate::scanner::{Token, TokenKind};
#[derive(Debug)] #[derive(Debug)]
struct Binary<'a> { struct Binary<'a> {
left: Box<Expr<'a>>, left: Box<Expr<'a>>,
right: Box<Expr<'a>>,
operator: Token<'a>, operator: Token<'a>,
right: Box<Expr<'a>>,
} }
#[derive(Debug)] #[derive(Debug)]
struct Grouping<'a>(Box<Expr<'a>>); struct Grouping<'a>(Box<Expr<'a>>);
#[derive(Debug)] #[derive(Debug)]
struct Literal(TokenKind); enum Literal {
Boolean(bool),
Number(f64),
String(String),
Nil,
}
#[derive(Debug)]
struct Unary<'a> {
operator: Token<'a>,
right: Box<Expr<'a>>,
}
#[derive(Debug)] #[derive(Debug)]
enum Expr<'a> { enum Expr<'a> {
Binary(Binary<'a>), Binary(Binary<'a>),
Grouping(Grouping<'a>), Grouping(Grouping<'a>),
Literal(Literal), Literal(Literal),
Unary(Unary<'a>),
} }
// Parser // Parser
@ -56,15 +68,76 @@ impl<'a> Parser<'a> {
} }
fn equality(&mut self) -> Expr<'a> { fn equality(&mut self) -> Expr<'a> {
let expr = self.comparison(); self.binary_operator(
unimplemented!() &[TokenKind::BangEqual, TokenKind::EqualEqual],
Self::comparison,
Self::comparison,
)
} }
fn comparison(&mut self) -> Expr<'a> { fn comparison(&mut self) -> Expr<'a> {
unimplemented!() self.binary_operator(
&[
TokenKind::Greater,
TokenKind::GreaterEqual,
TokenKind::Less,
TokenKind::LessEqual,
],
Self::term,
Self::term,
)
}
fn term(&mut self) -> Expr<'a> {
self.binary_operator(
&[TokenKind::Minus, TokenKind::Plus],
Self::factor,
Self::factor,
)
}
fn factor(&mut self) -> Expr<'a> {
self.binary_operator(
&[TokenKind::Slash, TokenKind::Star],
Self::unary,
Self::unary,
)
}
fn unary(&mut self) -> Expr<'a> {
if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
return Expr::Unary(Unary {
operator: self.previous(),
right: Box::new(self.unary()),
});
}
return self.primary();
}
fn primary(&mut self) -> Expr<'a> {
let next = self.advance();
let literal = match next.kind {
TokenKind::True => Literal::Boolean(true),
TokenKind::False => Literal::Boolean(false),
TokenKind::Nil => Literal::Nil,
TokenKind::Number(num) => Literal::Number(num),
TokenKind::String(string) => Literal::String(string),
TokenKind::LeftParen => {
unimplemented!("need error handling to deal with unbalanced parens");
}
// This branch indicates a parser bug, not invalid input.
unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected),
};
Expr::Literal(literal)
} }
// internal helpers // internal helpers
/// Check if the next token is in `oneof`, and advance if it is.
fn match_token(&mut self, oneof: &[TokenKind]) -> bool { fn match_token(&mut self, oneof: &[TokenKind]) -> bool {
for token in oneof { for token in oneof {
if self.check_token(token) { if self.check_token(token) {
@ -76,7 +149,8 @@ impl<'a> Parser<'a> {
return false; return false;
} }
fn advance(&mut self) -> &Token { /// Return the next token and advance parser state.
fn advance(&mut self) -> Token<'a> {
if !self.is_at_end() { if !self.is_at_end() {
self.current += 1; self.current += 1;
} }
@ -88,15 +162,35 @@ impl<'a> Parser<'a> {
self.check_token(&TokenKind::Eof) self.check_token(&TokenKind::Eof)
} }
/// Is the next token `token`?
fn check_token(&self, token: &TokenKind) -> bool { fn check_token(&self, token: &TokenKind) -> bool {
self.peek().kind == *token self.peek().kind == *token
} }
fn peek(&self) -> &Token { fn peek(&self) -> &Token<'a> {
&self.tokens[self.current] &self.tokens[self.current]
} }
fn previous(&self) -> &Token { fn previous(&self) -> Token<'a> {
&self.tokens[self.current - 1] self.tokens[self.current - 1].clone()
}
fn binary_operator(
&mut self,
oneof: &[TokenKind],
left: fn(&mut Parser<'a>) -> Expr<'a>,
right: fn(&mut Parser<'a>) -> Expr<'a>,
) -> Expr<'a> {
let mut expr = left(self);
while self.match_token(oneof) {
expr = Expr::Binary(Binary {
left: Box::new(expr),
operator: self.previous(),
right: Box::new(right(self)),
})
}
return expr;
} }
} }

View file

@ -1,6 +1,6 @@
use crate::errors::{Error, ErrorKind}; use crate::errors::{Error, ErrorKind};
#[derive(Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum TokenKind { pub enum TokenKind {
// Single-character tokens. // Single-character tokens.
LeftParen, LeftParen,
@ -29,22 +29,22 @@ pub enum TokenKind {
Identifier(String), Identifier(String),
String(String), String(String),
Number(f64), Number(f64),
True,
False,
Nil,
// Keywords. // Keywords.
And, And,
Class, Class,
Else, Else,
False,
Fun, Fun,
For, For,
If, If,
Nil,
Or, Or,
Print, Print,
Return, Return,
Super, Super,
This, This,
True,
Var, Var,
While, While,
@ -52,7 +52,7 @@ pub enum TokenKind {
Eof, Eof,
} }
#[derive(Debug)] #[derive(Clone, Debug)]
pub struct Token<'a> { pub struct Token<'a> {
pub kind: TokenKind, pub kind: TokenKind,
pub lexeme: &'a [char], pub lexeme: &'a [char],