Lexer: reduce code duplication in Rule mapping
produce_token renamed to map_rule - Skips leading whitespace before any Rule evaluations - Handles creation and destruction of Rule
This commit is contained in:
parent
d4245844ce
commit
0661789d42
@ -83,10 +83,12 @@ pub mod lexer {
|
|||||||
self.cursor += len
|
self.cursor += len
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Advances the cursor and produces a token
|
/// Advances the cursor and produces a token from a provided [Rule] function
|
||||||
fn produce_token(&mut self, ty: Type, len: usize) -> Option<Token> {
|
fn map_rule<F>(&mut self, rule: F, ty: Type) -> Option<Token>
|
||||||
|
where F: Fn(Rule) -> Rule {
|
||||||
|
self.skip_whitespace();
|
||||||
let start = self.cursor;
|
let start = self.cursor;
|
||||||
self.cursor += len;
|
self.cursor += Rule::new(self.text()).and(rule).end()?;
|
||||||
Some(Token::new(ty, start, self.cursor))
|
Some(Token::new(ty, start, self.cursor))
|
||||||
}
|
}
|
||||||
/// Gets a slice of text beginning at the cursor
|
/// Gets a slice of text beginning at the cursor
|
||||||
@ -126,85 +128,66 @@ pub mod lexer {
|
|||||||
}
|
}
|
||||||
// functions for lexing individual tokens
|
// functions for lexing individual tokens
|
||||||
pub fn invalid(&mut self) -> Option<Token> {
|
pub fn invalid(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.invalid(), Type::Invalid)
|
||||||
self.produce_token(Type::Invalid, Rule::new(self.text()).invalid().end()?)
|
|
||||||
}
|
}
|
||||||
// comments
|
// comments
|
||||||
pub fn comment(&mut self) -> Option<Token> {
|
pub fn comment(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.comment(), Type::Comment)
|
||||||
self.produce_token(Type::Comment, Rule::new(self.text()).comment().end()?)
|
|
||||||
}
|
}
|
||||||
// keywords
|
// keywords
|
||||||
pub fn kw_else(&mut self) -> Option<Token> {
|
pub fn kw_else(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("else"), Type::KwElse)
|
||||||
self.produce_token(Type::KwElse, Rule::new(self.text()).str("else").end()?)
|
|
||||||
}
|
}
|
||||||
pub fn kw_for(&mut self) -> Option<Token> {
|
pub fn kw_for(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("for"), Type::KwFor)
|
||||||
self.produce_token(Type::KwFor, Rule::new(self.text()).str("for").end()?)
|
|
||||||
}
|
}
|
||||||
pub fn kw_fn(&mut self) -> Option<Token> {
|
pub fn kw_fn(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("fn"), Type::KwFn)
|
||||||
self.produce_token(Type::KwFn, Rule::new(self.text()).str("fn").end()?)
|
|
||||||
}
|
}
|
||||||
pub fn kw_if(&mut self) -> Option<Token> {
|
pub fn kw_if(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("if"), Type::KwIf)
|
||||||
self.produce_token(Type::KwIf, Rule::new(self.text()).str("if").end()?)
|
|
||||||
}
|
}
|
||||||
pub fn kw_in(&mut self) -> Option<Token> {
|
pub fn kw_in(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("in"), Type::KwIn)
|
||||||
self.produce_token(Type::KwIn, Rule::new(self.text()).str("in").end()?)
|
|
||||||
}
|
}
|
||||||
pub fn kw_let(&mut self) -> Option<Token> {
|
pub fn kw_let(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("let"), Type::KwLet)
|
||||||
self.produce_token(Type::KwLet, Rule::new(self.text()).str("let").end()?)
|
|
||||||
}
|
}
|
||||||
pub fn kw_while(&mut self) -> Option<Token> {
|
pub fn kw_while(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.str("while"), Type::KwWhile)
|
||||||
self.produce_token(Type::KwWhile, Rule::new(self.text()).str("while").end()?)
|
|
||||||
}
|
}
|
||||||
// identifiers
|
// identifiers
|
||||||
pub fn identifier(&mut self) -> Option<Token> {
|
pub fn identifier(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.identifier(), Type::Identifier)
|
||||||
self.produce_token(Type::Identifier, Rule::new(self.text()).identifier().end()?)
|
|
||||||
}
|
}
|
||||||
// literals
|
// literals
|
||||||
pub fn lit_integer(&mut self) -> Option<Token> {
|
pub fn lit_integer(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.integer(), Type::LitInteger)
|
||||||
self.produce_token(Type::LitInteger, Rule::new(self.text()).integer().end()?)
|
|
||||||
}
|
}
|
||||||
pub fn lit_float(&mut self) -> Option<Token> {
|
pub fn lit_float(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.float(), Type::LitFloat)
|
||||||
self.produce_token(Type::LitFloat, Rule::new(self.text()).float().end()?)
|
|
||||||
}
|
}
|
||||||
pub fn lit_string(&mut self) -> Option<Token> {
|
pub fn lit_string(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.string(), Type::LitString)
|
||||||
self.produce_token(Type::LitString, Rule::new(self.text()).string().end()?)
|
|
||||||
}
|
}
|
||||||
// delimiters
|
// delimiters
|
||||||
pub fn l_brack(&mut self) -> Option<Token> {
|
pub fn l_brack(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.char('['), Type::LBrack)
|
||||||
self.produce_token(Type::LBrack, Rule::new(self.text()).char('[').end()?)
|
|
||||||
}
|
}
|
||||||
pub fn r_brack(&mut self) -> Option<Token> {
|
pub fn r_brack(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.char(']'), Type::RBrack)
|
||||||
self.produce_token(Type::RBrack, Rule::new(self.text()).char(']').end()?)
|
|
||||||
}
|
}
|
||||||
pub fn l_curly(&mut self) -> Option<Token> {
|
pub fn l_curly(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.char('{'), Type::LCurly)
|
||||||
self.produce_token(Type::LCurly, Rule::new(self.text()).char('{').end()?)
|
|
||||||
}
|
}
|
||||||
pub fn r_curly(&mut self) -> Option<Token> {
|
pub fn r_curly(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.char('}'), Type::RCurly)
|
||||||
self.produce_token(Type::RCurly, Rule::new(self.text()).char('}').end()?)
|
|
||||||
}
|
}
|
||||||
pub fn l_paren(&mut self) -> Option<Token> {
|
pub fn l_paren(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.char('('), Type::LParen)
|
||||||
self.produce_token(Type::LParen, Rule::new(self.text()).char('(').end()?)
|
|
||||||
}
|
}
|
||||||
pub fn r_paren(&mut self) -> Option<Token> {
|
pub fn r_paren(&mut self) -> Option<Token> {
|
||||||
self.skip_whitespace();
|
self.map_rule(|r| r.char(')'), Type::RParen)
|
||||||
self.produce_token(Type::RParen, Rule::new(self.text()).char(')').end()?)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -546,76 +529,76 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
mod integer {
|
mod integer {
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
fn bare() {
|
fn bare() {
|
||||||
assert_whole_input_is_token("10010110", Lexer::lit_integer, Type::LitInteger);
|
assert_whole_input_is_token("10010110", Lexer::lit_integer, Type::LitInteger);
|
||||||
assert_whole_input_is_token("12345670", Lexer::lit_integer, Type::LitInteger);
|
assert_whole_input_is_token("12345670", Lexer::lit_integer, Type::LitInteger);
|
||||||
assert_whole_input_is_token("1234567890", Lexer::lit_integer, Type::LitInteger);
|
assert_whole_input_is_token("1234567890", Lexer::lit_integer, Type::LitInteger);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn base16() {
|
||||||
|
assert_has_type_and_range("0x1234", Lexer::lit_integer, Type::LitInteger, 0..6);
|
||||||
|
assert_has_type_and_range(
|
||||||
|
"0x1234 \"hello\"",
|
||||||
|
Lexer::lit_integer,
|
||||||
|
Type::LitInteger,
|
||||||
|
0..6,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn base10() {
|
||||||
|
assert_whole_input_is_token("0d1234", Lexer::lit_integer, Type::LitInteger);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn base8() {
|
||||||
|
assert_whole_input_is_token("0o1234", Lexer::lit_integer, Type::LitInteger);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn base2() {
|
||||||
|
assert_whole_input_is_token("0b1010", Lexer::lit_integer, Type::LitInteger);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
mod float {
|
||||||
fn base16() {
|
use super::*;
|
||||||
assert_has_type_and_range("0x1234", Lexer::lit_integer, Type::LitInteger, 0..6);
|
#[test]
|
||||||
assert_has_type_and_range(
|
fn number_dot_number_is_float() {
|
||||||
"0x1234 \"hello\"",
|
assert_whole_input_is_token("1.0", Lexer::lit_float, Type::LitFloat);
|
||||||
Lexer::lit_integer,
|
}
|
||||||
Type::LitInteger,
|
#[test]
|
||||||
0..6,
|
fn nothing_dot_number_is_float() {
|
||||||
);
|
assert_whole_input_is_token(".0", Lexer::lit_float, Type::LitFloat);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn number_dot_nothing_is_not_float() {
|
||||||
|
assert_whole_input_is_token("1.", Lexer::lit_float, Type::LitFloat);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
#[should_panic]
|
||||||
|
fn nothing_dot_nothing_is_not_float() {
|
||||||
|
assert_whole_input_is_token(".", Lexer::lit_float, Type::LitFloat);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
mod string {
|
||||||
fn base10() {
|
use super::*;
|
||||||
assert_whole_input_is_token("0d1234", Lexer::lit_integer, Type::LitInteger);
|
#[test]
|
||||||
|
fn empty_string() {
|
||||||
|
assert_whole_input_is_token("\"\"", Lexer::lit_string, Type::LitString);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn unicode_string() {
|
||||||
|
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::lit_string, Type::LitString);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
fn escape_string() {
|
||||||
|
assert_whole_input_is_token(
|
||||||
|
r#"" \"This is a quote\" ""#,
|
||||||
|
Lexer::lit_string,
|
||||||
|
Type::LitString,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
|
||||||
fn base8() {
|
|
||||||
assert_whole_input_is_token("0o1234", Lexer::lit_integer, Type::LitInteger);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn base2() {
|
|
||||||
assert_whole_input_is_token("0b1010", Lexer::lit_integer, Type::LitInteger);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mod float {
|
|
||||||
use super::*;
|
|
||||||
#[test]
|
|
||||||
fn number_dot_number_is_float() {
|
|
||||||
assert_whole_input_is_token("1.0", Lexer::lit_float, Type::LitFloat);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn nothing_dot_number_is_float() {
|
|
||||||
assert_whole_input_is_token(".0", Lexer::lit_float, Type::LitFloat);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
#[should_panic]
|
|
||||||
fn number_dot_nothing_is_not_float() {
|
|
||||||
assert_whole_input_is_token("1.", Lexer::lit_float, Type::LitFloat);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
#[should_panic]
|
|
||||||
fn nothing_dot_nothing_is_not_float() {
|
|
||||||
assert_whole_input_is_token(".", Lexer::lit_float, Type::LitFloat);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mod string {
|
|
||||||
use super::*;
|
|
||||||
#[test]
|
|
||||||
fn empty_string() {
|
|
||||||
assert_whole_input_is_token("\"\"", Lexer::lit_string, Type::LitString);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn unicode_string() {
|
|
||||||
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::lit_string, Type::LitString);
|
|
||||||
}
|
|
||||||
#[test]
|
|
||||||
fn escape_string() {
|
|
||||||
assert_whole_input_is_token(
|
|
||||||
r#"" \"This is a quote\" ""#,
|
|
||||||
Lexer::lit_string,
|
|
||||||
Type::LitString,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mod delimiter {
|
mod delimiter {
|
||||||
use super::*;
|
use super::*;
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user