token/lexer: Keywords are identifiers. Add missing operators. Fix <<=/>>=.
This commit is contained in:
parent
9c993b31a1
commit
50bb03ae18
@ -8,20 +8,13 @@ pub mod token {
|
||||
mod token_type;
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Type {
|
||||
// Invalid syntax
|
||||
Invalid,
|
||||
// Any kind of comment
|
||||
Comment,
|
||||
// Any identifier
|
||||
Identifier,
|
||||
// Keywords
|
||||
KwBreak,
|
||||
KwElse,
|
||||
KwFalse,
|
||||
KwFor,
|
||||
KwFn,
|
||||
KwIf,
|
||||
KwIn,
|
||||
KwLet,
|
||||
KwTrue,
|
||||
KwWhile,
|
||||
Keyword(Keyword),
|
||||
// Literals
|
||||
Integer,
|
||||
Float,
|
||||
@ -37,14 +30,17 @@ pub mod token {
|
||||
// Compound punctuation
|
||||
Lsh,
|
||||
Rsh,
|
||||
AndAnd,
|
||||
OrOr,
|
||||
AmpAmp,
|
||||
BarBar,
|
||||
NotNot,
|
||||
CatEar,
|
||||
EqEq,
|
||||
GtEq,
|
||||
LtEq,
|
||||
NotEq,
|
||||
StarEq,
|
||||
DivEq,
|
||||
RemEq,
|
||||
AddEq,
|
||||
SubEq,
|
||||
AndEq,
|
||||
@ -79,6 +75,43 @@ pub mod token {
|
||||
Grave,
|
||||
}
|
||||
|
||||
/// Represents a reserved word.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum Keyword {
|
||||
Break,
|
||||
Continue,
|
||||
Else,
|
||||
False,
|
||||
For,
|
||||
Fn,
|
||||
If,
|
||||
In,
|
||||
Let,
|
||||
Return,
|
||||
True,
|
||||
While,
|
||||
}
|
||||
impl std::str::FromStr for Keyword {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Ok(match s {
|
||||
"break" => Self::Break,
|
||||
"continue" => Self::Continue,
|
||||
"else" => Self::Else,
|
||||
"false" => Self::False,
|
||||
"for" => Self::For,
|
||||
"fn" => Self::Fn,
|
||||
"if" => Self::If,
|
||||
"in" => Self::In,
|
||||
"let" => Self::Let,
|
||||
"return" => Self::Return,
|
||||
"true" => Self::True,
|
||||
"while" => Self::While,
|
||||
_ => Err(())?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Token {
|
||||
ty: Type,
|
||||
@ -91,6 +124,12 @@ pub mod token {
|
||||
pub fn new(ty: Type, head: usize, tail: usize, line: usize, col: usize) -> Self {
|
||||
Self { ty, head, tail, line, col }
|
||||
}
|
||||
pub fn cast(self, ty: Type) -> Self {
|
||||
Self { ty, ..self }
|
||||
}
|
||||
pub fn rebound(self, head: usize, tail: usize) -> Self {
|
||||
Self { head, tail, ..self }
|
||||
}
|
||||
pub fn line(&self) -> usize {
|
||||
self.line
|
||||
}
|
||||
@ -179,27 +218,14 @@ pub mod lexer {
|
||||
/// Returns the result of the rule with the highest precedence, if any matches
|
||||
pub fn any(&mut self) -> Option<Token> {
|
||||
None.or_else(|| self.comment())
|
||||
.or_else(|| self.keyword())
|
||||
.or_else(|| self.identifier())
|
||||
.or_else(|| self.literal())
|
||||
.or_else(|| self.delimiter())
|
||||
.or_else(|| self.punctuation())
|
||||
.or_else(|| self.invalid())
|
||||
}
|
||||
/// Attempts to produce a Keyword
|
||||
pub fn keyword(&mut self) -> Option<Token> {
|
||||
None.or_else(|| self.kw_break())
|
||||
.or_else(|| self.kw_else())
|
||||
.or_else(|| self.kw_false())
|
||||
.or_else(|| self.kw_for())
|
||||
.or_else(|| self.kw_fn())
|
||||
.or_else(|| self.kw_if())
|
||||
.or_else(|| self.kw_in())
|
||||
.or_else(|| self.kw_let())
|
||||
.or_else(|| self.kw_true())
|
||||
.or_else(|| self.kw_while())
|
||||
}
|
||||
/// Attempts to produce a [Type::LitString], [Type::LitFloat], or [Type::LitInteger]
|
||||
/// Attempts to produce a [Type::String], [Type::Float], or [Type::Integer]
|
||||
pub fn literal(&mut self) -> Option<Token> {
|
||||
None.or_else(|| self.string())
|
||||
.or_else(|| self.character())
|
||||
@ -217,23 +243,26 @@ pub mod lexer {
|
||||
}
|
||||
/// Evaluates punctuation rules
|
||||
pub fn punctuation(&mut self) -> Option<Token> {
|
||||
None.or_else(|| self.lsh())
|
||||
.or_else(|| self.rsh())
|
||||
.or_else(|| self.and_and())
|
||||
.or_else(|| self.or_or())
|
||||
None.or_else(|| self.amp_amp())
|
||||
.or_else(|| self.bar_bar())
|
||||
.or_else(|| self.not_not())
|
||||
.or_else(|| self.cat_ear())
|
||||
.or_else(|| self.eq_eq())
|
||||
.or_else(|| self.gt_eq())
|
||||
.or_else(|| self.lt_eq())
|
||||
.or_else(|| self.not_eq())
|
||||
.or_else(|| self.lsh_eq())
|
||||
.or_else(|| self.rsh_eq())
|
||||
.or_else(|| self.star_eq())
|
||||
.or_else(|| self.div_eq())
|
||||
.or_else(|| self.rem_eq())
|
||||
.or_else(|| self.add_eq())
|
||||
.or_else(|| self.sub_eq())
|
||||
.or_else(|| self.and_eq())
|
||||
.or_else(|| self.or_eq())
|
||||
.or_else(|| self.xor_eq())
|
||||
.or_else(|| self.lsh_eq())
|
||||
.or_else(|| self.rsh_eq())
|
||||
.or_else(|| self.lsh())
|
||||
.or_else(|| self.rsh())
|
||||
.or_else(|| self.arrow())
|
||||
.or_else(|| self.fatarrow())
|
||||
.or_else(|| self.semi())
|
||||
@ -270,40 +299,13 @@ pub mod lexer {
|
||||
pub fn comment(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.comment(), Type::Comment)
|
||||
}
|
||||
// keywords
|
||||
pub fn kw_break(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("break"), Type::KwBreak)
|
||||
}
|
||||
pub fn kw_else(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("else"), Type::KwElse)
|
||||
}
|
||||
pub fn kw_false(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("false"), Type::KwFalse)
|
||||
}
|
||||
pub fn kw_for(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("for"), Type::KwFor)
|
||||
}
|
||||
pub fn kw_fn(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("fn"), Type::KwFn)
|
||||
}
|
||||
pub fn kw_if(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("if"), Type::KwIf)
|
||||
}
|
||||
pub fn kw_in(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("in"), Type::KwIn)
|
||||
}
|
||||
pub fn kw_let(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("let"), Type::KwLet)
|
||||
}
|
||||
pub fn kw_true(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("true"), Type::KwTrue)
|
||||
}
|
||||
pub fn kw_while(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("while"), Type::KwWhile)
|
||||
}
|
||||
// identifiers
|
||||
pub fn identifier(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.identifier(), Type::Identifier)
|
||||
.map(|token| match self.text[token.range()].parse() {
|
||||
Ok(kw) => token.cast(Type::Keyword(kw)),
|
||||
Err(_) => token,
|
||||
})
|
||||
}
|
||||
// literals
|
||||
pub fn integer(&mut self) -> Option<Token> {
|
||||
@ -313,10 +315,13 @@ pub mod lexer {
|
||||
self.map_rule(|r| r.float(), Type::Float)
|
||||
}
|
||||
pub fn string(&mut self) -> Option<Token> {
|
||||
// TODO: count lines and columns properly within string
|
||||
self.map_rule(|r| r.string(), Type::String)
|
||||
.map(|t| t.rebound(t.head + 1, t.tail - 1))
|
||||
}
|
||||
pub fn character(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.character(), Type::Character)
|
||||
.map(|t| t.rebound(t.head + 1, t.tail - 1))
|
||||
}
|
||||
// delimiters
|
||||
pub fn l_brack(&mut self) -> Option<Token> {
|
||||
@ -344,11 +349,11 @@ pub mod lexer {
|
||||
pub fn rsh(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str(">>"), Type::Rsh)
|
||||
}
|
||||
pub fn and_and(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("&&"), Type::AndAnd)
|
||||
pub fn amp_amp(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("&&"), Type::AmpAmp)
|
||||
}
|
||||
pub fn or_or(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("||"), Type::OrOr)
|
||||
pub fn bar_bar(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("||"), Type::BarBar)
|
||||
}
|
||||
pub fn not_not(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("!!"), Type::NotNot)
|
||||
@ -359,6 +364,12 @@ pub mod lexer {
|
||||
pub fn eq_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("=="), Type::EqEq)
|
||||
}
|
||||
pub fn gt_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str(">="), Type::GtEq)
|
||||
}
|
||||
pub fn lt_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("<="), Type::LtEq)
|
||||
}
|
||||
pub fn not_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("!="), Type::NotEq)
|
||||
}
|
||||
@ -368,6 +379,9 @@ pub mod lexer {
|
||||
pub fn div_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("/="), Type::DivEq)
|
||||
}
|
||||
pub fn rem_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("%="), Type::RemEq)
|
||||
}
|
||||
pub fn add_eq(&mut self) -> Option<Token> {
|
||||
self.map_rule(|r| r.str("+="), Type::AddEq)
|
||||
}
|
||||
@ -464,6 +478,7 @@ pub mod lexer {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: use real, functional parser-combinators here to produce tokens
|
||||
/// A lexer [Rule] matches patterns in text in a declarative manner
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Rule<'t> {
|
||||
@ -761,50 +776,6 @@ mod tests {
|
||||
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
|
||||
}
|
||||
}
|
||||
mod keyword {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn kw_break() {
|
||||
assert_whole_input_is_token("break", Lexer::kw_break, Type::KwBreak);
|
||||
}
|
||||
#[test]
|
||||
fn kw_else() {
|
||||
assert_whole_input_is_token("else", Lexer::kw_else, Type::KwElse);
|
||||
assert_has_type_and_range(" else ", Lexer::kw_else, Type::KwElse, 2..6);
|
||||
}
|
||||
#[test]
|
||||
fn kw_false() {
|
||||
assert_whole_input_is_token("false", Lexer::kw_false, Type::KwFalse);
|
||||
}
|
||||
#[test]
|
||||
fn kw_for() {
|
||||
assert_whole_input_is_token("for", Lexer::kw_for, Type::KwFor);
|
||||
}
|
||||
#[test]
|
||||
fn kw_fn() {
|
||||
assert_whole_input_is_token("fn", Lexer::kw_fn, Type::KwFn);
|
||||
}
|
||||
#[test]
|
||||
fn kw_if() {
|
||||
assert_whole_input_is_token("if", Lexer::kw_if, Type::KwIf);
|
||||
}
|
||||
#[test]
|
||||
fn kw_in() {
|
||||
assert_whole_input_is_token("in", Lexer::kw_in, Type::KwIn);
|
||||
}
|
||||
#[test]
|
||||
fn kw_let() {
|
||||
assert_whole_input_is_token("let", Lexer::kw_let, Type::KwLet);
|
||||
}
|
||||
#[test]
|
||||
fn kw_true() {
|
||||
assert_whole_input_is_token("true", Lexer::kw_true, Type::KwTrue);
|
||||
}
|
||||
#[test]
|
||||
fn kw_while() {
|
||||
assert_whole_input_is_token("while", Lexer::kw_while, Type::KwWhile);
|
||||
}
|
||||
}
|
||||
mod identifier {
|
||||
use super::*;
|
||||
|
||||
@ -835,8 +806,8 @@ mod tests {
|
||||
fn literal_class() {
|
||||
assert_whole_input_is_token("1_00000", Lexer::literal, Type::Integer);
|
||||
assert_whole_input_is_token("1.00000", Lexer::literal, Type::Float);
|
||||
assert_whole_input_is_token("\"1.0\"", Lexer::literal, Type::String);
|
||||
assert_whole_input_is_token("'\"'", Lexer::literal, Type::Character);
|
||||
assert_has_type_and_range("\"1.0\"", Lexer::literal, Type::String, 1..4);
|
||||
assert_has_type_and_range("'\"'", Lexer::literal, Type::Character, 1..2);
|
||||
}
|
||||
mod integer {
|
||||
use super::*;
|
||||
@ -894,18 +865,19 @@ mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert_whole_input_is_token("\"\"", Lexer::string, Type::String);
|
||||
assert_has_type_and_range("\"\"", Lexer::string, Type::String, 1..1);
|
||||
}
|
||||
#[test]
|
||||
fn unicode_string() {
|
||||
assert_whole_input_is_token("\"I 💙 🦈!\"", Lexer::string, Type::String);
|
||||
assert_has_type_and_range("\"I 💙 🦈!\"", Lexer::string, Type::String, 1..13);
|
||||
}
|
||||
#[test]
|
||||
fn escape_string() {
|
||||
assert_whole_input_is_token(
|
||||
assert_has_type_and_range(
|
||||
"\" \\\"This is a quote\\\" \"",
|
||||
Lexer::string,
|
||||
Type::String,
|
||||
1..22
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -913,22 +885,22 @@ mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn plain_char() {
|
||||
assert_whole_input_is_token("'A'", Lexer::character, Type::Character);
|
||||
assert_whole_input_is_token("'a'", Lexer::character, Type::Character);
|
||||
assert_whole_input_is_token("'#'", Lexer::character, Type::Character);
|
||||
assert_has_type_and_range("'A'", Lexer::character, Type::Character, 1..2);
|
||||
assert_has_type_and_range("'a'", Lexer::character, Type::Character, 1..2);
|
||||
assert_has_type_and_range("'#'", Lexer::character, Type::Character, 1..2);
|
||||
}
|
||||
#[test]
|
||||
fn unicode_char() {
|
||||
assert_whole_input_is_token("'ε'", Lexer::character, Type::Character);
|
||||
assert_has_type_and_range("'ε'", Lexer::character, Type::Character, 1..3);
|
||||
}
|
||||
#[test]
|
||||
fn escaped_char() {
|
||||
assert_whole_input_is_token("'\\n'", Lexer::character, Type::Character);
|
||||
assert_has_type_and_range("'\\n'", Lexer::character, Type::Character, 1..3);
|
||||
}
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn no_char() {
|
||||
assert_whole_input_is_token("''", Lexer::character, Type::Character);
|
||||
assert_has_type_and_range("''", Lexer::character, Type::Character, 1..1);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -983,12 +955,12 @@ mod tests {
|
||||
assert_whole_input_is_token(">>", Lexer::rsh, Type::Rsh)
|
||||
}
|
||||
#[test]
|
||||
fn and_and() {
|
||||
assert_whole_input_is_token("&&", Lexer::and_and, Type::AndAnd)
|
||||
fn amp_amp() {
|
||||
assert_whole_input_is_token("&&", Lexer::amp_amp, Type::AmpAmp)
|
||||
}
|
||||
#[test]
|
||||
fn or_or() {
|
||||
assert_whole_input_is_token("||", Lexer::or_or, Type::OrOr)
|
||||
fn bar_bar() {
|
||||
assert_whole_input_is_token("||", Lexer::bar_bar, Type::BarBar)
|
||||
}
|
||||
#[test]
|
||||
fn not_not() {
|
||||
@ -1003,6 +975,14 @@ mod tests {
|
||||
assert_whole_input_is_token("==", Lexer::eq_eq, Type::EqEq)
|
||||
}
|
||||
#[test]
|
||||
fn gt_eq() {
|
||||
assert_whole_input_is_token(">=", Lexer::gt_eq, Type::GtEq)
|
||||
}
|
||||
#[test]
|
||||
fn lt_eq() {
|
||||
assert_whole_input_is_token("<=", Lexer::lt_eq, Type::LtEq)
|
||||
}
|
||||
#[test]
|
||||
fn not_eq() {
|
||||
assert_whole_input_is_token("!=", Lexer::not_eq, Type::NotEq)
|
||||
}
|
||||
|
@ -1,24 +1,18 @@
|
||||
use super::Type;
|
||||
//! Trait impls and helper functions for [Type] and [Keyword]
|
||||
use super::{Keyword, Type};
|
||||
use std::fmt::Display;
|
||||
|
||||
impl Display for Type {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Type::Invalid => Display::fmt("invalid", f),
|
||||
Type::Comment => Display::fmt("comment", f),
|
||||
Type::Identifier => Display::fmt("identifier", f),
|
||||
Type::KwBreak => Display::fmt("break", f),
|
||||
Type::KwElse => Display::fmt("else", f),
|
||||
Type::KwFalse => Display::fmt("false", f),
|
||||
Type::KwFor => Display::fmt("for", f),
|
||||
Type::KwFn => Display::fmt("fn", f),
|
||||
Type::KwIf => Display::fmt("if", f),
|
||||
Type::KwIn => Display::fmt("in", f),
|
||||
Type::KwLet => Display::fmt("let", f),
|
||||
Type::KwTrue => Display::fmt("true", f),
|
||||
Type::KwWhile => Display::fmt("while", f),
|
||||
Type::LitInteger => Display::fmt("integer literal", f),
|
||||
Type::LitFloat => Display::fmt("float literal", f),
|
||||
Type::LitString => Display::fmt("string literal", f),
|
||||
Type::Keyword(k) => Display::fmt(k, f),
|
||||
Type::Integer => Display::fmt("integer literal", f),
|
||||
Type::Float => Display::fmt("float literal", f),
|
||||
Type::String => Display::fmt("string literal", f),
|
||||
Type::Character => Display::fmt("char literal", f),
|
||||
Type::LCurly => Display::fmt("left curly", f),
|
||||
Type::RCurly => Display::fmt("right curly", f),
|
||||
Type::LBrack => Display::fmt("left brack", f),
|
||||
@ -27,14 +21,17 @@ impl Display for Type {
|
||||
Type::RParen => Display::fmt("right paren", f),
|
||||
Type::Lsh => Display::fmt("shift left", f),
|
||||
Type::Rsh => Display::fmt("shift right", f),
|
||||
Type::AndAnd => Display::fmt("logical and", f),
|
||||
Type::OrOr => Display::fmt("logical or", f),
|
||||
Type::AmpAmp => Display::fmt("and-and", f),
|
||||
Type::BarBar => Display::fmt("or-or", f),
|
||||
Type::NotNot => Display::fmt("not-not", f),
|
||||
Type::CatEar => Display::fmt("cat-ears", f),
|
||||
Type::EqEq => Display::fmt("equal to", f),
|
||||
Type::GtEq => Display::fmt("greater than or equal to", f),
|
||||
Type::LtEq => Display::fmt("less than or equal to", f),
|
||||
Type::NotEq => Display::fmt("not equal to", f),
|
||||
Type::StarEq => Display::fmt("star-assign", f),
|
||||
Type::DivEq => Display::fmt("div-assign", f),
|
||||
Type::RemEq => Display::fmt("rem-assign", f),
|
||||
Type::AddEq => Display::fmt("add-assign", f),
|
||||
Type::SubEq => Display::fmt("sub-assign", f),
|
||||
Type::AndEq => Display::fmt("and-assign", f),
|
||||
@ -69,3 +66,22 @@ impl Display for Type {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Keyword {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Break => Display::fmt("break", f),
|
||||
Self::Continue => Display::fmt("continue", f),
|
||||
Self::Else => Display::fmt("else", f),
|
||||
Self::False => Display::fmt("false", f),
|
||||
Self::For => Display::fmt("for", f),
|
||||
Self::Fn => Display::fmt("fn", f),
|
||||
Self::If => Display::fmt("if", f),
|
||||
Self::In => Display::fmt("in", f),
|
||||
Self::Let => Display::fmt("let", f),
|
||||
Self::Return => Display::fmt("return", f),
|
||||
Self::True => Display::fmt("true", f),
|
||||
Self::While => Display::fmt("while", f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user