lexer: Add Invalid token to aid in implementing features

This commit is contained in:
2023-09-27 21:17:35 -05:00
parent 335fea8d73
commit d4245844ce

View File

@@ -7,6 +7,7 @@ pub mod token {
#[derive(Clone, Copy, Debug, PartialEq, Eq)] #[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Type { pub enum Type {
Invalid,
Comment, Comment,
Identifier, Identifier,
// Keywords // Keywords
@@ -99,6 +100,7 @@ pub mod lexer {
.or_else(|| self.identifier()) .or_else(|| self.identifier())
.or_else(|| self.literal()) .or_else(|| self.literal())
.or_else(|| self.delimiter()) .or_else(|| self.delimiter())
.or_else(|| self.invalid())
} }
pub fn keyword(&mut self) -> Option<Token> { pub fn keyword(&mut self) -> Option<Token> {
None.or_else(|| self.kw_else()) None.or_else(|| self.kw_else())
@@ -123,6 +125,10 @@ pub mod lexer {
.or_else(|| self.r_paren()) .or_else(|| self.r_paren())
} }
// functions for lexing individual tokens // functions for lexing individual tokens
pub fn invalid(&mut self) -> Option<Token> {
self.skip_whitespace();
self.produce_token(Type::Invalid, Rule::new(self.text()).invalid().end()?)
}
// comments // comments
pub fn comment(&mut self) -> Option<Token> { pub fn comment(&mut self) -> Option<Token> {
self.skip_whitespace(); self.skip_whitespace();
@@ -222,6 +228,10 @@ pub mod lexer {
} }
impl<'t> Rule<'t> { impl<'t> Rule<'t> {
/// Matches any sequence of non-whitespace characters
pub fn invalid(self) -> Self {
self.and_many(Self::not_whitespace)
}
/// Matches a block, line, or shebang comment /// Matches a block, line, or shebang comment
pub fn comment(self) -> Self { pub fn comment(self) -> Self {
self.and_either(Self::line_comment, Self::block_comment) self.and_either(Self::line_comment, Self::block_comment)
@@ -324,6 +334,10 @@ pub mod lexer {
pub fn whitespace(self) -> Self { pub fn whitespace(self) -> Self {
self.char_fn(|c| c.is_whitespace()) self.char_fn(|c| c.is_whitespace())
} }
/// Matches anything but whitespace
pub fn not_whitespace(self) -> Self {
self.char_fn(|c| !c.is_whitespace())
}
/// Matches one XID_START /// Matches one XID_START
pub fn xid_start(self) -> Self { pub fn xid_start(self) -> Self {
use unicode_xid::UnicodeXID; use unicode_xid::UnicodeXID;