lexer: Add Invalid token to aid in implementing features

2023-09-27 21:17:35 -05:00
parent 335fea8d73
commit d4245844ce
1 changed files with 14 additions and 0 deletions
--- a/libconlang/src/lib.rs
+++ b/libconlang/src/lib.rs
@@ -7,6 +7,7 @@ pub mod token {

    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
    pub enum Type {
+        Invalid,
        Comment,
        Identifier,
        // Keywords
@@ -99,6 +100,7 @@ pub mod lexer {
                .or_else(|| self.identifier())
                .or_else(|| self.literal())
                .or_else(|| self.delimiter())
+                .or_else(|| self.invalid())
        }
        pub fn keyword(&mut self) -> Option<Token> {
            None.or_else(|| self.kw_else())
@@ -123,6 +125,10 @@ pub mod lexer {
                .or_else(|| self.r_paren())
        }
        // functions for lexing individual tokens
+        pub fn invalid(&mut self) -> Option<Token> {
+            self.skip_whitespace();
+            self.produce_token(Type::Invalid, Rule::new(self.text()).invalid().end()?)
+        }
        // comments
        pub fn comment(&mut self) -> Option<Token> {
            self.skip_whitespace();
@@ -222,6 +228,10 @@ pub mod lexer {
    }

    impl<'t> Rule<'t> {
+        /// Matches any sequence of non-whitespace characters
+        pub fn invalid(self) -> Self {
+            self.and_many(Self::not_whitespace)
+        }
        /// Matches a block, line, or shebang comment
        pub fn comment(self) -> Self {
            self.and_either(Self::line_comment, Self::block_comment)
@@ -324,6 +334,10 @@ pub mod lexer {
        pub fn whitespace(self) -> Self {
            self.char_fn(|c| c.is_whitespace())
        }
+        /// Matches anything but whitespace
+        pub fn not_whitespace(self) -> Self {
+            self.char_fn(|c| !c.is_whitespace())
+        }
        /// Matches one XID_START
        pub fn xid_start(self) -> Self {
            use unicode_xid::UnicodeXID;