feat(tazjin/rlox): Implement single-character scanning
... still not that interesting, but at this point slightly divergent from the book: The book embraces mutability for interpreter state, initially for tracking whether an error condition has occured. I avoid this by instead defining an error type and collecting the error values, to be handled later on. Notes: So far nothing special, but this is just the beginning of the book. I like the style it is written in and it has pointed to some interesting resources, such as a 1965 paper titled "The Next 700 Languages". Change-Id: I030b38438fec9eb55372bf547af225138908230a Reviewed-on: https://cl.tvl.fyi/c/depot/+/2144 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
This commit is contained in:
parent
9d2b001c4c
commit
3d1b116f7f
3 changed files with 139 additions and 0 deletions
14
users/tazjin/rlox/src/errors.rs
Normal file
14
users/tazjin/rlox/src/errors.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
#[derive(Debug)]
|
||||
pub enum ErrorKind {
|
||||
UnexpectedChar(char),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error {
|
||||
pub line: usize,
|
||||
pub kind: ErrorKind,
|
||||
}
|
||||
|
||||
pub fn report(loc: &str, err: &Error) {
|
||||
eprintln!("[line {}] Error {}: {:?}", err.line, loc, err.kind);
|
||||
}
|
|
@ -4,7 +4,9 @@ use std::io;
|
|||
use std::io::Write;
|
||||
use std::process;
|
||||
|
||||
mod errors;
|
||||
mod interpreter;
|
||||
mod scanner;
|
||||
|
||||
fn main() {
|
||||
let mut args = env::args();
|
||||
|
|
123
users/tazjin/rlox/src/scanner.rs
Normal file
123
users/tazjin/rlox/src/scanner.rs
Normal file
|
@ -0,0 +1,123 @@
|
|||
use crate::errors::{Error, ErrorKind};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TokenKind {
|
||||
// Single-character tokens.
|
||||
LeftParen,
|
||||
RightParen,
|
||||
LeftBrace,
|
||||
RightBrace,
|
||||
Comma,
|
||||
Dot,
|
||||
Minus,
|
||||
Plus,
|
||||
Semicolon,
|
||||
Slash,
|
||||
Star,
|
||||
|
||||
// One or two character tokens.
|
||||
Bang,
|
||||
BangEqual,
|
||||
Equal,
|
||||
EqualEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
Less,
|
||||
LessEqual,
|
||||
|
||||
// Literals.
|
||||
Identifier,
|
||||
String,
|
||||
Number,
|
||||
|
||||
// Keywords.
|
||||
And,
|
||||
Class,
|
||||
Else,
|
||||
False,
|
||||
Fun,
|
||||
For,
|
||||
If,
|
||||
Nil,
|
||||
Or,
|
||||
Print,
|
||||
Return,
|
||||
Super,
|
||||
This,
|
||||
True,
|
||||
Var,
|
||||
While,
|
||||
|
||||
// Special things
|
||||
Eof,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Token<'a> {
|
||||
kind: TokenKind,
|
||||
lexeme: &'a str,
|
||||
// literal: Object, // TODO(tazjin): Uhh?
|
||||
line: usize,
|
||||
}
|
||||
|
||||
struct Scanner<'a> {
|
||||
source: &'a str,
|
||||
tokens: Vec<Token<'a>>,
|
||||
errors: Vec<Error>,
|
||||
start: usize, // offset of first character in current lexeme
|
||||
current: usize, // current offset into source
|
||||
line: usize, // current line in source
|
||||
}
|
||||
|
||||
impl<'a> Scanner<'a> {
|
||||
fn is_at_end(&self) -> bool {
|
||||
return self.current >= self.source.len();
|
||||
}
|
||||
|
||||
fn advance(&mut self) -> char {
|
||||
self.current += 1;
|
||||
|
||||
// TODO(tazjin): Due to utf8-safety, this is a bit annoying.
|
||||
// Since string iteration is not the point here I'm just
|
||||
// leaving this as is for now.
|
||||
self.source.chars().nth(self.current - 1).unwrap()
|
||||
}
|
||||
|
||||
fn add_token(&mut self, kind: TokenKind) {
|
||||
let lexeme = &self.source[self.start..self.current];
|
||||
self.tokens.push(Token {
|
||||
kind,
|
||||
lexeme,
|
||||
line: self.line,
|
||||
})
|
||||
}
|
||||
|
||||
fn scan_token(&mut self) {
|
||||
match self.advance() {
|
||||
'(' => self.add_token(TokenKind::LeftParen),
|
||||
')' => self.add_token(TokenKind::RightParen),
|
||||
'{' => self.add_token(TokenKind::LeftBrace),
|
||||
'}' => self.add_token(TokenKind::RightBrace),
|
||||
',' => self.add_token(TokenKind::Comma),
|
||||
'.' => self.add_token(TokenKind::Dot),
|
||||
'-' => self.add_token(TokenKind::Minus),
|
||||
'+' => self.add_token(TokenKind::Plus),
|
||||
';' => self.add_token(TokenKind::Semicolon),
|
||||
'*' => self.add_token(TokenKind::Star),
|
||||
|
||||
unexpected => self.errors.push(Error {
|
||||
line: self.line,
|
||||
kind: ErrorKind::UnexpectedChar(unexpected),
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
fn scan_tokens(mut self) -> Vec<Token<'a>> {
|
||||
while !self.is_at_end() {
|
||||
self.start = self.current;
|
||||
self.scan_token();
|
||||
}
|
||||
|
||||
return self.tokens;
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue