2020-12-06 14:21:06 +01:00
|
|
|
#[derive(Clone, Debug, PartialEq)]
|
2020-11-23 02:00:02 +01:00
|
|
|
pub enum TokenKind {
|
|
|
|
// Single-character tokens.
|
|
|
|
LeftParen,
|
|
|
|
RightParen,
|
|
|
|
LeftBrace,
|
|
|
|
RightBrace,
|
|
|
|
Comma,
|
|
|
|
Dot,
|
|
|
|
Minus,
|
|
|
|
Plus,
|
|
|
|
Semicolon,
|
|
|
|
Slash,
|
|
|
|
Star,
|
|
|
|
|
|
|
|
// One or two character tokens.
|
|
|
|
Bang,
|
|
|
|
BangEqual,
|
|
|
|
Equal,
|
|
|
|
EqualEqual,
|
|
|
|
Greater,
|
|
|
|
GreaterEqual,
|
|
|
|
Less,
|
|
|
|
LessEqual,
|
|
|
|
|
|
|
|
// Literals.
|
2020-11-28 17:51:44 +01:00
|
|
|
Identifier(String),
|
2020-11-28 17:18:48 +01:00
|
|
|
String(String),
|
2020-11-28 17:32:44 +01:00
|
|
|
Number(f64),
|
2020-12-06 14:21:06 +01:00
|
|
|
True,
|
|
|
|
False,
|
|
|
|
Nil,
|
2020-11-23 02:00:02 +01:00
|
|
|
|
|
|
|
// Keywords.
|
|
|
|
And,
|
|
|
|
Class,
|
|
|
|
Else,
|
|
|
|
Fun,
|
|
|
|
For,
|
|
|
|
If,
|
|
|
|
Or,
|
|
|
|
Print,
|
|
|
|
Return,
|
|
|
|
Super,
|
|
|
|
This,
|
|
|
|
Var,
|
|
|
|
While,
|
|
|
|
|
|
|
|
// Special things
|
|
|
|
Eof,
|
|
|
|
}
|
|
|
|
|
2020-12-06 14:21:06 +01:00
|
|
|
#[derive(Clone, Debug)]
|
2021-01-14 16:36:06 +01:00
|
|
|
pub struct Token {
|
2020-11-28 19:53:51 +01:00
|
|
|
pub kind: TokenKind,
|
2021-01-14 16:36:06 +01:00
|
|
|
pub lexeme: String,
|
2020-11-28 19:53:51 +01:00
|
|
|
pub line: usize,
|
2020-11-23 02:00:02 +01:00
|
|
|
}
|
|
|
|
|
2021-01-18 18:27:14 +01:00
|
|
|
pub enum ScannerError {
|
|
|
|
UnexpectedChar { line: usize, unexpected: char },
|
|
|
|
UnterminatedString { line: usize },
|
|
|
|
}
|
|
|
|
|
2020-11-23 02:00:02 +01:00
|
|
|
struct Scanner<'a> {
|
2020-11-27 17:55:38 +01:00
|
|
|
source: &'a [char],
|
2021-01-14 16:36:06 +01:00
|
|
|
tokens: Vec<Token>,
|
2021-01-18 18:27:14 +01:00
|
|
|
errors: Vec<ScannerError>,
|
2020-11-23 02:00:02 +01:00
|
|
|
start: usize, // offset of first character in current lexeme
|
|
|
|
current: usize, // current offset into source
|
|
|
|
line: usize, // current line in source
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Scanner<'a> {
|
|
|
|
fn is_at_end(&self) -> bool {
|
|
|
|
return self.current >= self.source.len();
|
|
|
|
}
|
|
|
|
|
|
|
|
fn advance(&mut self) -> char {
|
|
|
|
self.current += 1;
|
2020-11-28 17:18:48 +01:00
|
|
|
self.source[self.current - 1]
|
2020-11-23 02:00:02 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn add_token(&mut self, kind: TokenKind) {
|
|
|
|
let lexeme = &self.source[self.start..self.current];
|
|
|
|
self.tokens.push(Token {
|
|
|
|
kind,
|
2021-01-14 16:36:06 +01:00
|
|
|
lexeme: lexeme.into_iter().collect(),
|
2020-11-23 02:00:02 +01:00
|
|
|
line: self.line,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
fn scan_token(&mut self) {
|
|
|
|
match self.advance() {
|
2020-11-27 18:10:32 +01:00
|
|
|
// simple single-character tokens
|
2020-11-23 02:00:02 +01:00
|
|
|
'(' => self.add_token(TokenKind::LeftParen),
|
|
|
|
')' => self.add_token(TokenKind::RightParen),
|
|
|
|
'{' => self.add_token(TokenKind::LeftBrace),
|
|
|
|
'}' => self.add_token(TokenKind::RightBrace),
|
|
|
|
',' => self.add_token(TokenKind::Comma),
|
|
|
|
'.' => self.add_token(TokenKind::Dot),
|
|
|
|
'-' => self.add_token(TokenKind::Minus),
|
|
|
|
'+' => self.add_token(TokenKind::Plus),
|
|
|
|
';' => self.add_token(TokenKind::Semicolon),
|
|
|
|
'*' => self.add_token(TokenKind::Star),
|
|
|
|
|
2020-11-27 18:10:32 +01:00
|
|
|
// possible multi-character tokens
|
|
|
|
'!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang),
|
2021-02-27 10:27:52 +01:00
|
|
|
'=' => {
|
|
|
|
self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal)
|
|
|
|
}
|
2020-11-27 18:10:32 +01:00
|
|
|
'<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less),
|
2021-02-27 10:27:52 +01:00
|
|
|
'>' => self.add_if_next(
|
|
|
|
'=',
|
|
|
|
TokenKind::GreaterEqual,
|
|
|
|
TokenKind::Greater,
|
|
|
|
),
|
2020-11-27 18:10:32 +01:00
|
|
|
|
2020-11-28 16:52:13 +01:00
|
|
|
'/' => {
|
|
|
|
// support comments until EOL by discarding characters
|
|
|
|
if self.match_next('/') {
|
|
|
|
while self.peek() != '\n' && !self.is_at_end() {
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
self.add_token(TokenKind::Slash);
|
|
|
|
}
|
2020-11-28 17:18:48 +01:00
|
|
|
}
|
2020-11-28 16:52:13 +01:00
|
|
|
|
2020-11-28 16:54:59 +01:00
|
|
|
// ignore whitespace
|
2020-11-28 17:51:44 +01:00
|
|
|
ws if ws.is_whitespace() => {
|
|
|
|
if ws == '\n' {
|
|
|
|
self.line += 1
|
|
|
|
}
|
|
|
|
}
|
2020-11-28 16:54:59 +01:00
|
|
|
|
2020-11-28 17:18:48 +01:00
|
|
|
'"' => self.scan_string(),
|
|
|
|
|
2020-11-28 17:32:44 +01:00
|
|
|
digit if digit.is_digit(10) => self.scan_number(),
|
|
|
|
|
2020-11-28 17:51:44 +01:00
|
|
|
chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(),
|
|
|
|
|
2021-01-18 18:27:14 +01:00
|
|
|
unexpected => self.errors.push(ScannerError::UnexpectedChar {
|
2020-11-23 02:00:02 +01:00
|
|
|
line: self.line,
|
2021-01-18 18:27:14 +01:00
|
|
|
unexpected,
|
2020-11-23 02:00:02 +01:00
|
|
|
}),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2020-11-28 16:52:13 +01:00
|
|
|
fn match_next(&mut self, expected: char) -> bool {
|
2020-11-27 18:10:32 +01:00
|
|
|
if self.is_at_end() || self.source[self.current] != expected {
|
2020-11-28 16:52:13 +01:00
|
|
|
false
|
2020-11-27 18:10:32 +01:00
|
|
|
} else {
|
|
|
|
self.current += 1;
|
2020-11-28 16:52:13 +01:00
|
|
|
true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) {
|
|
|
|
if self.match_next(expected) {
|
2020-11-27 18:10:32 +01:00
|
|
|
self.add_token(then);
|
2020-11-28 16:52:13 +01:00
|
|
|
} else {
|
|
|
|
self.add_token(or);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn peek(&self) -> char {
|
|
|
|
if self.is_at_end() {
|
|
|
|
return '\0';
|
|
|
|
} else {
|
|
|
|
return self.source[self.current];
|
2020-11-27 18:10:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-28 17:32:44 +01:00
|
|
|
fn peek_next(&self) -> char {
|
2020-11-28 17:51:44 +01:00
|
|
|
if self.current + 1 >= self.source.len() {
|
2020-11-28 17:32:44 +01:00
|
|
|
return '\0';
|
|
|
|
} else {
|
|
|
|
return self.source[self.current + 1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-28 17:18:48 +01:00
|
|
|
fn scan_string(&mut self) {
|
2020-11-28 17:32:44 +01:00
|
|
|
while self.peek() != '"' && !self.is_at_end() {
|
2020-11-28 17:18:48 +01:00
|
|
|
if self.peek() == '\n' {
|
|
|
|
self.line += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
if self.is_at_end() {
|
2021-01-18 18:27:14 +01:00
|
|
|
self.errors
|
|
|
|
.push(ScannerError::UnterminatedString { line: self.line });
|
2020-11-28 17:18:48 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// closing '"'
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
// add token without surrounding quotes
|
|
|
|
let string: String = self.source[(self.start + 1)..(self.current - 1)]
|
|
|
|
.iter()
|
|
|
|
.collect();
|
|
|
|
self.add_token(TokenKind::String(string));
|
|
|
|
}
|
|
|
|
|
2020-11-28 17:32:44 +01:00
|
|
|
fn scan_number(&mut self) {
|
|
|
|
while self.peek().is_digit(10) {
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look for a fractional part
|
|
|
|
if self.peek() == '.' && self.peek_next().is_digit(10) {
|
|
|
|
// consume '.'
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
while self.peek().is_digit(10) {
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let num: f64 = self.source[self.start..self.current]
|
|
|
|
.iter()
|
|
|
|
.collect::<String>()
|
|
|
|
.parse()
|
|
|
|
.expect("float parsing should always work");
|
|
|
|
|
|
|
|
self.add_token(TokenKind::Number(num));
|
|
|
|
}
|
|
|
|
|
2020-11-28 17:51:44 +01:00
|
|
|
fn scan_identifier(&mut self) {
|
|
|
|
while self.peek().is_alphanumeric() || self.peek() == '_' {
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
|
2021-02-27 10:27:52 +01:00
|
|
|
let ident: String =
|
|
|
|
self.source[self.start..self.current].iter().collect();
|
2020-11-28 17:51:44 +01:00
|
|
|
|
|
|
|
// Determine whether this is an identifier, or a keyword:
|
|
|
|
let token_kind = match ident.as_str() {
|
|
|
|
"and" => TokenKind::And,
|
|
|
|
"class" => TokenKind::Class,
|
|
|
|
"else" => TokenKind::Else,
|
|
|
|
"false" => TokenKind::False,
|
|
|
|
"for" => TokenKind::For,
|
|
|
|
"fun" => TokenKind::Fun,
|
|
|
|
"if" => TokenKind::If,
|
|
|
|
"nil" => TokenKind::Nil,
|
|
|
|
"or" => TokenKind::Or,
|
|
|
|
"print" => TokenKind::Print,
|
|
|
|
"return" => TokenKind::Return,
|
|
|
|
"super" => TokenKind::Super,
|
|
|
|
"this" => TokenKind::This,
|
|
|
|
"true" => TokenKind::True,
|
|
|
|
"var" => TokenKind::Var,
|
|
|
|
"while" => TokenKind::While,
|
|
|
|
_ => TokenKind::Identifier(ident),
|
|
|
|
};
|
|
|
|
|
|
|
|
self.add_token(token_kind);
|
|
|
|
}
|
|
|
|
|
2020-11-28 18:20:10 +01:00
|
|
|
fn scan_tokens(&mut self) {
|
2020-11-23 02:00:02 +01:00
|
|
|
while !self.is_at_end() {
|
|
|
|
self.start = self.current;
|
|
|
|
self.scan_token();
|
|
|
|
}
|
|
|
|
|
2020-11-28 17:51:44 +01:00
|
|
|
self.add_token(TokenKind::Eof);
|
2020-11-23 02:00:02 +01:00
|
|
|
}
|
|
|
|
}
|
2020-11-27 18:16:42 +01:00
|
|
|
|
2021-01-18 18:27:14 +01:00
|
|
|
pub fn scan<'a>(input: &'a [char]) -> Result<Vec<Token>, Vec<ScannerError>> {
|
2020-11-28 18:20:10 +01:00
|
|
|
let mut scanner = Scanner {
|
2020-11-27 18:16:42 +01:00
|
|
|
source: &input,
|
|
|
|
tokens: vec![],
|
|
|
|
errors: vec![],
|
|
|
|
start: 0,
|
|
|
|
current: 0,
|
|
|
|
line: 0,
|
|
|
|
};
|
|
|
|
|
2020-11-28 18:20:10 +01:00
|
|
|
scanner.scan_tokens();
|
|
|
|
|
|
|
|
if !scanner.errors.is_empty() {
|
|
|
|
return Err(scanner.errors);
|
|
|
|
}
|
|
|
|
|
|
|
|
return Ok(scanner.tokens);
|
2020-11-27 18:16:42 +01:00
|
|
|
}
|