feat(tazjin/rlox): Bootstrap recursive-descent parser for Lox
... mostly some AST boilerplate and a first top-level rule, plus boilerplate similar to that set up in the Scanner. Change-Id: I605d1de23c47a3b3702ab4f62cd3371bc3988c7d Reviewed-on: https://cl.tvl.fyi/c/depot/+/2194 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
This commit is contained in:
parent
754edb4616
commit
349583d5a9
3 changed files with 107 additions and 5 deletions
|
@ -6,6 +6,7 @@ use std::process;
|
||||||
|
|
||||||
mod errors;
|
mod errors;
|
||||||
mod interpreter;
|
mod interpreter;
|
||||||
|
mod parser;
|
||||||
mod scanner;
|
mod scanner;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
|
|
102
users/tazjin/rlox/src/parser.rs
Normal file
102
users/tazjin/rlox/src/parser.rs
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
// This implements the grammar of Lox as described starting in the
|
||||||
|
// Crafting Interpreters chapter "Representing Code". Note that the
|
||||||
|
// upstream Java implementation works about Java being bad at value
|
||||||
|
// classes by writing a code generator for Java.
|
||||||
|
//
|
||||||
|
// My Rust implementation skips this step because it's unnecessary, we
|
||||||
|
// have real types.
|
||||||
|
use crate::scanner::{Token, TokenKind};
|
||||||
|
|
||||||
|
// AST
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Binary<'a> {
|
||||||
|
left: Box<Expr<'a>>,
|
||||||
|
right: Box<Expr<'a>>,
|
||||||
|
operator: Token<'a>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Grouping<'a>(Box<Expr<'a>>);
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Literal(TokenKind);
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum Expr<'a> {
|
||||||
|
Binary(Binary<'a>),
|
||||||
|
Grouping(Grouping<'a>),
|
||||||
|
Literal(Literal),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parser
|
||||||
|
|
||||||
|
/*
|
||||||
|
expression → equality ;
|
||||||
|
equality → comparison ( ( "!=" | "==" ) comparison )* ;
|
||||||
|
comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
|
||||||
|
term → factor ( ( "-" | "+" ) factor )* ;
|
||||||
|
factor → unary ( ( "/" | "*" ) unary )* ;
|
||||||
|
unary → ( "!" | "-" ) unary
|
||||||
|
| primary ;
|
||||||
|
primary → NUMBER | STRING | "true" | "false" | "nil"
|
||||||
|
| "(" expression ")" ;
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct Parser<'a> {
|
||||||
|
tokens: Vec<Token<'a>>,
|
||||||
|
current: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
// recursive-descent parser functions
|
||||||
|
|
||||||
|
fn expression(&mut self) -> Expr<'a> {
|
||||||
|
self.equality()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn equality(&mut self) -> Expr<'a> {
|
||||||
|
let expr = self.comparison();
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn comparison(&mut self) -> Expr<'a> {
|
||||||
|
unimplemented!()
|
||||||
|
}
|
||||||
|
|
||||||
|
// internal helpers
|
||||||
|
fn match_token(&mut self, oneof: &[TokenKind]) -> bool {
|
||||||
|
for token in oneof {
|
||||||
|
if self.check_token(token) {
|
||||||
|
self.advance();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn advance(&mut self) -> &Token {
|
||||||
|
if !self.is_at_end() {
|
||||||
|
self.current += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.previous();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_at_end(&self) -> bool {
|
||||||
|
self.check_token(&TokenKind::Eof)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_token(&self, token: &TokenKind) -> bool {
|
||||||
|
self.peek().kind == *token
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek(&self) -> &Token {
|
||||||
|
&self.tokens[self.current]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn previous(&self) -> &Token {
|
||||||
|
&self.tokens[self.current - 1]
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::errors::{Error, ErrorKind};
|
use crate::errors::{Error, ErrorKind};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub enum TokenKind {
|
pub enum TokenKind {
|
||||||
// Single-character tokens.
|
// Single-character tokens.
|
||||||
LeftParen,
|
LeftParen,
|
||||||
|
@ -54,10 +54,9 @@ pub enum TokenKind {
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Token<'a> {
|
pub struct Token<'a> {
|
||||||
kind: TokenKind,
|
pub kind: TokenKind,
|
||||||
lexeme: &'a [char],
|
pub lexeme: &'a [char],
|
||||||
// literal: Object, // TODO(tazjin): Uhh?
|
pub line: usize,
|
||||||
line: usize,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Scanner<'a> {
|
struct Scanner<'a> {
|
||||||
|
|
Loading…
Reference in a new issue