Lexer rewrite:

- Scan the input string *linearly*, without backtracking - Peek at most one character (unicode code-point) ahead - Store data (unescaped string literals and chars, identifiers, integers, floats) inside Token - This unfortunately makes tokens non-Copy - Refactor Parser to accommodate these changes - On the bright side, Parser no longer needs a reference to the text! - Write a new set of lexer tests - TODO: write a new set of token tests using tokendata Every day, we get closer to parsing `dummy.cl`!
2023-10-22 18:28:20 -05:00 · 2023-10-22 18:28:20 -05:00 · b5abd2bff1
commit b5abd2bff1
parent feb5cc5dd0
5 changed files with 716 additions and 1063 deletions
--- a/libconlang/examples/identify_tokens.rs
+++ b/libconlang/examples/identify_tokens.rs
@ -13,7 +13,7 @@ fn main() -> Result<(), Box<dyn Error>> {
        take_stdin()?;
    } else {
        for path in conf.paths.iter().map(PathBuf::as_path) {
-            lex_tokens(&std::fs::read_to_string(path)?, Some(path));
+            lex_tokens(&std::fs::read_to_string(path)?, Some(path))?;
        }
    }
    Ok(())
@ -32,29 +32,37 @@ impl Config {
 fn take_stdin() -> Result<(), Box<dyn Error>> {
    if stdin().is_terminal() {
        for line in stdin().lines() {
-            lex_tokens(&line?, None)
+            lex_tokens(&line?, None)?
        }
    } else {
-        lex_tokens(&std::io::read_to_string(stdin())?, None)
+        lex_tokens(&std::io::read_to_string(stdin())?, None)?
    }
    Ok(())
 }

-fn lex_tokens(file: &str, path: Option<&Path>) {
+fn lex_tokens(file: &str, path: Option<&Path>) -> Result<(), Box<dyn Error>> {
    for token in Lexer::new(file) {
+        let token = match token {
+            Ok(t) => t,
+            Err(e) => {
+                println!("{e:?}");
+                break;
+            },
+        };
        if let Some(path) = path {
            print!("{path:?}:")
        }
-        print_token(file, token);
+        print_token(token);
    }
+    Ok(())
 }

-fn print_token(line: &str, t: conlang::token::Token) {
+fn print_token(t: conlang::token::Token) {
    println!(
-        "{:02}:{:02}: {:#19} │{}│",
+        "{:02}:{:02}: {:#19} │{:?}│",
        t.line(),
        t.col(),
        t.ty(),
-        &line[t.range()]
+        t.data(),
    )
 }
--- a/libconlang/src/lexer.rs
+++ b/libconlang/src/lexer.rs
@ -1,548 +1,477 @@
 //! Converts a text file into tokens
-use crate::token::{Token, Type};
-use lerox::Combinator;
+use crate::token::{Keyword, Token, TokenData, Type};
+use std::{
+    iter::Peekable,
+    str::{Chars, FromStr},
+};
+use unicode_xid::UnicodeXID;

-pub struct IntoIter<'t> {
-    lexer: Lexer<'t>,
-}
-impl<'t> Iterator for IntoIter<'t> {
-    type Item = Token;
-    fn next(&mut self) -> Option<Self::Item> {
-        self.lexer.any()
+pub mod lexer_iter {
+    use super::{
+        error::{LResult, Reason},
+        Lexer, Token,
+    };
+
+    /// Fallible iterator over a [Lexer], returning optional [LResult<Token>]s
+    pub struct LexerIter<'t> {
+        lexer: Lexer<'t>,
    }
-}
-impl<'t> IntoIterator for Lexer<'t> {
-    type Item = Token;
-    type IntoIter = IntoIter<'t>;
-    fn into_iter(self) -> Self::IntoIter {
-        IntoIter { lexer: self }
+    impl<'t> Iterator for LexerIter<'t> {
+        type Item = LResult<Token>;
+        fn next(&mut self) -> Option<Self::Item> {
+            match self.lexer.scan() {
+                Ok(v) => Some(Ok(v)),
+                Err(e) => {
+                    if e.reason == Reason::EndOfFile {
+                        None
+                    } else {
+                        Some(Err(e))
+                    }
+                }
+            }
+        }
+    }
+    impl<'t> IntoIterator for Lexer<'t> {
+        type Item = LResult<Token>;
+        type IntoIter = LexerIter<'t>;
+        fn into_iter(self) -> Self::IntoIter {
+            LexerIter { lexer: self }
+        }
    }
 }

 #[derive(Clone, Debug)]
 pub struct Lexer<'t> {
-    text: &'t str,
-    cursor: usize,
-    line: u32,
-    col: u32,
+    iter: Peekable<Chars<'t>>,
+    start: usize,
+    start_loc: (u32, u32),
+    current: usize,
+    current_loc: (u32, u32),
 }
-/// Implements the non-terminals of a language
+
 impl<'t> Lexer<'t> {
    pub fn new(text: &'t str) -> Self {
-        Self { text, cursor: 0, line: 1, col: 1 }
-    }
-    /// Consumes the entire [`Lexer`], producing a [`Vec<Token>`]
-    /// and returning the original string
-    pub fn consume(self) -> (Vec<Token>, &'t str) {
-        let text = self.text;
-        (self.into_iter().collect(), text)
-    }
-    /// Counts some length
-    #[inline]
-    fn count_len(&mut self, len: usize) -> &mut Self {
-        self.cursor += len;
-        self.col += len as u32;
-        self
-    }
-    /// Counts a line
-    #[inline]
-    fn count_line(&mut self, lines: u32) -> &mut Self {
-        self.line += lines;
-        self.col = 1;
-        self
-    }
-    /// Skips whitespace in the text
-    fn skip_whitespace(&mut self) {
-        self.count_len(
-            Rule::new(self.text())
-                .and_any(Rule::whitespace_not_newline)
-                .end()
-                .unwrap_or_default(),
-        );
-        if Rule::new(self.text()).char('\n').end().is_some() {
-            // recurse until all newlines are skipped
-            self.count_len(1).count_line(1).skip_whitespace();
+        Self {
+            iter: text.chars().peekable(),
+            start: 0,
+            start_loc: (1, 1),
+            current: 0,
+            current_loc: (1, 1),
        }
    }
-    /// Advances the cursor and produces a token from a provided [Rule] function
-    fn map_rule<F>(&mut self, rule: F, ty: Type) -> Option<Token>
-    where F: Fn(Rule) -> Rule {
-        self.skip_whitespace();
-        let (line, col, start) = (self.line, self.col, self.cursor);
-        self.count_len(Rule::new(self.text()).and(rule).end()?);
-        Some(Token::new(ty, start, self.cursor, line, col))
-    }
-    /// Gets a slice of text beginning at the cursor
-    fn text(&self) -> &str {
-        &self.text[self.cursor..]
-    }
-    // classifies a single arbitrary token
-    /// Returns the result of the rule with the highest precedence, if any matches
-    pub fn any(&mut self) -> Option<Token> {
-        None.or_else(|| self.comment())
-            .or_else(|| self.identifier())
-            .or_else(|| self.literal())
-            .or_else(|| self.delimiter())
-            .or_else(|| self.punctuation())
-            .or_else(|| self.invalid())
-    }
-    /// Attempts to produce a [Type::String], [Type::Float], or [Type::Integer]
-    pub fn literal(&mut self) -> Option<Token> {
-        None.or_else(|| self.string())
-            .or_else(|| self.character())
-            .or_else(|| self.float())
-            .or_else(|| self.integer())
-    }
-    /// Evaluates delimiter rules
-    pub fn delimiter(&mut self) -> Option<Token> {
-        None.or_else(|| self.l_brack())
-            .or_else(|| self.r_brack())
-            .or_else(|| self.l_curly())
-            .or_else(|| self.r_curly())
-            .or_else(|| self.l_paren())
-            .or_else(|| self.r_paren())
-    }
-    /// Evaluates punctuation rules
-    pub fn punctuation(&mut self) -> Option<Token> {
-        None.or_else(|| self.amp_amp()) //      &&
-            .or_else(|| self.amp_eq()) //       &=
-            .or_else(|| self.amp()) //          &
-            .or_else(|| self.at()) //           @
-            .or_else(|| self.backslash()) //    \
-            .or_else(|| self.bang_bang()) //    !!
-            .or_else(|| self.bang_eq()) //      !=
-            .or_else(|| self.bang()) //         !
-            .or_else(|| self.bar_bar()) //      ||
-            .or_else(|| self.bar_eq()) //       |=
-            .or_else(|| self.bar()) //          |
-            .or_else(|| self.colon()) //        :
-            .or_else(|| self.comma()) //        ,
-            .or_else(|| self.dot_dot_eq()) //   ..=
-            .or_else(|| self.dot_dot()) //      ..
-            .or_else(|| self.dot()) //          .
-            .or_else(|| self.eq_eq()) //        ==
-            .or_else(|| self.fatarrow()) //     =>
-            .or_else(|| self.eq()) //           =
-            .or_else(|| self.grave()) //        `
-            .or_else(|| self.gt_eq()) //        >=
-            .or_else(|| self.gt_gt_eq()) //     >>=
-            .or_else(|| self.gt_gt()) //        >>
-            .or_else(|| self.gt()) //           >
-            .or_else(|| self.hash()) //         #
-            .or_else(|| self.lt_eq()) //        <=
-            .or_else(|| self.lt_lt_eq()) //     <<=
-            .or_else(|| self.lt_lt()) //        <<
-            .or_else(|| self.lt()) //           <
-            .or_else(|| self.minus_eq()) //     -=
-            .or_else(|| self.arrow()) //        ->
-            .or_else(|| self.minus()) //        -
-            .or_else(|| self.plus_eq()) //      +=
-            .or_else(|| self.plus()) //         +
-            .or_else(|| self.question()) //     ?
-            .or_else(|| self.rem_eq()) //       %=
-            .or_else(|| self.rem()) //          %
-            .or_else(|| self.semi()) //         ;
-            .or_else(|| self.slash_eq()) //     /=
-            .or_else(|| self.slash()) //        /
-            .or_else(|| self.star_eq()) //      *=
-            .or_else(|| self.star()) //         *
-            .or_else(|| self.tilde()) //        ~
-            .or_else(|| self.xor_eq()) //       ^=
-            .or_else(|| self.xor_xor()) //      ^^
-            .or_else(|| self.xor()) //          ^
-    }
-    pub fn unary_op(&mut self) -> Option<Token> {
-        self.bang().or_else(|| self.minus())
-    }
-    // functions for lexing individual tokens
-    pub fn invalid(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.invalid(), Type::Invalid)
-    }
-    // comments
-    pub fn comment(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.comment(), Type::Comment)
-    }
-    // identifiers
-    pub fn identifier(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.identifier(), Type::Identifier)
-            .map(|token| match self.text[token.range()].parse() {
-                Ok(kw) => token.cast(Type::Keyword(kw)),
-                Err(_) => token,
-            })
-    }
-    // literals
-    pub fn integer(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.integer(), Type::Integer)
-    }
-    pub fn float(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.float(), Type::Float)
-    }
-    pub fn string(&mut self) -> Option<Token> {
-        // TODO: count lines and columns properly within string
-        self.map_rule(|r| r.string(), Type::String)
-            .map(|t| t.rebound(t.head + 1, t.tail - 1))
-    }
-    pub fn character(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.character(), Type::Character)
-            .map(|t| t.rebound(t.head + 1, t.tail - 1))
-    }
-    // delimiters
-    pub fn l_brack(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('['), Type::LBrack)
-    }
-    pub fn r_brack(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char(']'), Type::RBrack)
-    }
-    pub fn l_curly(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('{'), Type::LCurly)
-    }
-    pub fn r_curly(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('}'), Type::RCurly)
-    }
-    pub fn l_paren(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('('), Type::LParen)
-    }
-    pub fn r_paren(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char(')'), Type::RParen)
-    }
-    // compound punctuation
-    pub fn lt_lt(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("<<"), Type::LtLt)
-    }
-    pub fn gt_gt(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str(">>"), Type::GtGt)
-    }
-    pub fn amp_amp(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("&&"), Type::AmpAmp)
-    }
-    pub fn bar_bar(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("||"), Type::BarBar)
-    }
-    pub fn bang_bang(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("!!"), Type::BangBang)
-    }
-    pub fn xor_xor(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("^^"), Type::XorXor)
-    }
-    pub fn eq_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("=="), Type::EqEq)
-    }
-    pub fn gt_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str(">="), Type::GtEq)
-    }
-    pub fn lt_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("<="), Type::LtEq)
-    }
-    pub fn bang_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("!="), Type::BangEq)
-    }
-    pub fn star_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("*="), Type::StarEq)
-    }
-    pub fn slash_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("/="), Type::SlashEq)
-    }
-    pub fn rem_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("%="), Type::RemEq)
-    }
-    pub fn plus_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("+="), Type::PlusEq)
-    }
-    pub fn minus_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("-="), Type::MinusEq)
-    }
-    pub fn amp_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("&="), Type::AmpEq)
-    }
-    pub fn bar_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("|="), Type::BarEq)
-    }
-    pub fn xor_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("^="), Type::XorEq)
-    }
-    pub fn lt_lt_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("<<="), Type::LtLtEq)
-    }
-    pub fn gt_gt_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str(">>="), Type::GtGtEq)
-    }
-    pub fn dot_dot_eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("..="), Type::DotDotEq)
-    }
-    pub fn dot_dot(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str(".."), Type::DotDot)
-    }
-    pub fn arrow(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("->"), Type::Arrow)
-    }
-    pub fn fatarrow(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.str("=>"), Type::FatArrow)
-    }
-    // simple punctuation
-    pub fn semi(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char(';'), Type::Semi)
-    }
-    pub fn dot(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('.'), Type::Dot)
-    }
-    pub fn star(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('*'), Type::Star)
-    }
-    pub fn slash(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('/'), Type::Slash)
-    }
-    pub fn plus(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('+'), Type::Plus)
-    }
-    pub fn minus(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('-'), Type::Minus)
-    }
-    pub fn rem(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('%'), Type::Rem)
-    }
-    pub fn bang(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('!'), Type::Bang)
-    }
-    pub fn eq(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('='), Type::Eq)
-    }
-    pub fn lt(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('<'), Type::Lt)
-    }
-    pub fn gt(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('>'), Type::Gt)
-    }
-    pub fn amp(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('&'), Type::Amp)
-    }
-    pub fn bar(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('|'), Type::Bar)
-    }
-    pub fn xor(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('^'), Type::Xor)
-    }
-    pub fn hash(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('#'), Type::Hash)
-    }
-    pub fn at(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('@'), Type::At)
-    }
-    pub fn colon(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char(':'), Type::Colon)
-    }
-    pub fn question(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('?'), Type::Question)
-    }
-    pub fn comma(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char(','), Type::Comma)
-    }
-    pub fn tilde(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('~'), Type::Tilde)
-    }
-    pub fn grave(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('`'), Type::Grave)
-    }
-    pub fn backslash(&mut self) -> Option<Token> {
-        self.map_rule(|r| r.char('\\'), Type::Backslash)
+    pub fn scan(&mut self) -> LResult<Token> {
+        match self.skip_whitespace().peek()? {
+            '{' => self.consume()?.produce(Type::LCurly, ()),
+            '}' => self.consume()?.produce(Type::RCurly, ()),
+            '[' => self.consume()?.produce(Type::LBrack, ()),
+            ']' => self.consume()?.produce(Type::RBrack, ()),
+            '(' => self.consume()?.produce(Type::LParen, ()),
+            ')' => self.consume()?.produce(Type::RParen, ()),
+            '&' => self.consume()?.amp(),
+            '@' => self.consume()?.produce(Type::At, ()),
+            '\\' => self.consume()?.produce(Type::Backslash, ()),
+            '!' => self.consume()?.bang(),
+            '|' => self.consume()?.bar(),
+            ':' => self.consume()?.produce(Type::Colon, ()),
+            ',' => self.consume()?.produce(Type::Comma, ()),
+            '.' => self.consume()?.dot(),
+            '=' => self.consume()?.equal(),
+            '`' => self.consume()?.produce(Type::Grave, ()),
+            '>' => self.consume()?.greater(),
+            '#' => self.consume()?.produce(Type::Hash, ()),
+            '<' => self.consume()?.less(),
+            '-' => self.consume()?.minus(),
+            '+' => self.consume()?.plus(),
+            '?' => self.consume()?.produce(Type::Question, ()),
+            '%' => self.consume()?.rem(),
+            ';' => self.consume()?.produce(Type::Semi, ()),
+            '/' => self.consume()?.slash(),
+            '*' => self.consume()?.star(),
+            '~' => self.consume()?.produce(Type::Tilde, ()),
+            '^' => self.consume()?.xor(),
+            '0' => self.consume()?.int_with_base(),
+            '1'..='9' => self.digits::<10>(),
+            '"' => self.consume()?.string(),
+            '\'' => self.consume()?.character(),
+            '_' => self.identifier(),
+            i if i.is_xid_start() => self.identifier(),
+            e => Err(Error::unexpected_char(e, self.line(), self.col())),
+        }
+    }
+    /// Gets the line of the next token
+    pub fn line(&self) -> u32 {
+        self.start_loc.0
+    }
+    /// Gets the column of the next token
+    pub fn col(&self) -> u32 {
+        self.start_loc.1
+    }
+    fn next(&mut self) -> LResult<char> {
+        let out = self.peek();
+        self.consume()?;
+        out
+    }
+    fn peek(&mut self) -> LResult<char> {
+        self.iter
+            .peek()
+            .copied()
+            .ok_or(Error::end_of_file(self.line(), self.col()))
+    }
+    fn produce(&mut self, ty: Type, data: impl Into<TokenData>) -> LResult<Token> {
+        let loc = self.start_loc;
+        self.start_loc = self.current_loc;
+        self.start = self.current;
+        Ok(Token::new(ty, data, loc.0, loc.1))
+    }
+    fn skip_whitespace(&mut self) -> &mut Self {
+        while let Ok(c) = self.peek() {
+            if !c.is_whitespace() {
+                break;
+            }
+            let _ = self.consume();
+        }
+        self.start = self.current;
+        self.start_loc = self.current_loc;
+        self
+    }
+    fn consume(&mut self) -> LResult<&mut Self> {
+        self.current += 1;
+        match self.iter.next() {
+            Some('\n') => {
+                let (line, col) = &mut self.current_loc;
+                *line += 1;
+                *col = 1;
+            }
+            Some(_) => self.current_loc.1 += 1,
+            None => Err(Error::end_of_file(self.line(), self.col()))?,
+        }
+        Ok(self)
    }
 }
-
-// TODO: use real, functional parser-combinators here to produce tokens
-/// A lexer [Rule] matches patterns in text in a declarative manner
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct Rule<'t> {
-    text: &'t str,
-    taken: usize,
-    is_alright: bool,
-}
-impl<'t> Rule<'t> {
-    pub fn new(text: &'t str) -> Self {
-        Self { text, taken: 0, is_alright: true }
+/// Digraphs and trigraphs
+impl<'t> Lexer<'t> {
+    fn amp(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('&') => self.consume()?.produce(Type::AmpAmp, ()),
+            Ok('=') => self.consume()?.produce(Type::AmpEq, ()),
+            _ => self.produce(Type::Amp, ()),
+        }
    }
-    pub fn end(self) -> Option<usize> {
-        self.is_alright.then_some(self.taken)
+    fn bang(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('!') => self.consume()?.produce(Type::BangBang, ()),
+            Ok('=') => self.consume()?.produce(Type::BangEq, ()),
+            _ => self.produce(Type::Bang, ()),
+        }
    }
-    pub fn remaining(&self) -> &str {
-        self.text
+    fn bar(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('|') => self.consume()?.produce(Type::BarBar, ()),
+            Ok('=') => self.consume()?.produce(Type::BarEq, ()),
+            _ => self.produce(Type::Bar, ()),
+        }
+    }
+    fn dot(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('.') => {
+                if let Ok('=') = self.consume()?.peek() {
+                    self.consume()?.produce(Type::DotDotEq, ())
+                } else {
+                    self.produce(Type::DotDot, ())
+                }
+            }
+            _ => self.produce(Type::Dot, ()),
+        }
+    }
+    fn equal(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::EqEq, ()),
+            Ok('>') => self.consume()?.produce(Type::FatArrow, ()),
+            _ => self.produce(Type::Eq, ()),
+        }
+    }
+    fn greater(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::GtEq, ()),
+            Ok('>') => {
+                if let Ok('=') = self.consume()?.peek() {
+                    self.consume()?.produce(Type::GtGtEq, ())
+                } else {
+                    self.produce(Type::GtGt, ())
+                }
+            }
+            _ => self.produce(Type::Gt, ()),
+        }
+    }
+    fn less(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::LtEq, ()),
+            Ok('<') => {
+                if let Ok('=') = self.consume()?.peek() {
+                    self.consume()?.produce(Type::LtLtEq, ())
+                } else {
+                    self.produce(Type::LtLt, ())
+                }
+            }
+            _ => self.produce(Type::Lt, ()),
+        }
+    }
+    fn minus(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::MinusEq, ()),
+            Ok('>') => self.consume()?.produce(Type::Arrow, ()),
+            _ => self.produce(Type::Minus, ()),
+        }
+    }
+    fn plus(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::PlusEq, ()),
+            _ => self.produce(Type::Plus, ()),
+        }
+    }
+    fn rem(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::RemEq, ()),
+            _ => self.produce(Type::Rem, ()),
+        }
+    }
+    fn slash(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::SlashEq, ()),
+            Ok('/') => self.consume()?.line_comment(),
+            Ok('*') => self.consume()?.block_comment(),
+            _ => self.produce(Type::Slash, ()),
+        }
+    }
+    fn star(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::StarEq, ()),
+            _ => self.produce(Type::Star, ()),
+        }
+    }
+    fn xor(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('=') => self.consume()?.produce(Type::XorEq, ()),
+            Ok('^') => self.consume()?.produce(Type::XorXor, ()),
+            _ => self.produce(Type::Xor, ()),
+        }
    }
 }
-
-impl<'t> Rule<'t> {
-    /// Matches any sequence of non-whitespace characters
-    pub fn invalid(self) -> Self {
-        self.and_many(Self::not_whitespace)
+/// Comments
+impl<'t> Lexer<'t> {
+    fn line_comment(&mut self) -> LResult<Token> {
+        while Ok('\n') != self.peek() {
+            self.consume()?;
+        }
+        self.produce(Type::Comment, ())
    }
-    /// Matches a block, line, or shebang comment
-    pub fn comment(self) -> Self {
-        self.and_either(Self::line_comment, Self::block_comment)
-    }
-    /// Matches a line or shebang comment
-    fn line_comment(self) -> Self {
-        // line_comment := ("//" | "#!/") (!newline)*
-        self.str("//")
-            .or(|r| r.str("#!/"))
-            .and_any(|r| r.not_char('\n'))
-    }
-    /// Matches a block comment
-    fn block_comment(self) -> Self {
-        // block_comment := "/*" (block_comment | all_but("*/"))* "*/"
-        self.str("/*")
-            .and_any(|r| r.and_either(|f| f.block_comment(), |g| g.not_str("*/")))
-            .str("*/")
-    }
-    /// Matches a Rust-style identifier
-    pub fn identifier(self) -> Self {
-        // identifier := ('_' | XID_START) ~ XID_CONTINUE*
-        self.char('_')
-            .or(Rule::xid_start)
-            .and_any(Rule::xid_continue)
-    }
-    /// Matches a Rust-style base-prefixed int literal
-    fn integer_kind(self, prefix: &str, digit: impl Fn(Self) -> Self) -> Self {
-        // int_kind<Prefix, Digit> := Prefix '_'* Digit (Digit | '_')*
-        self.str(prefix)
-            .and_any(|r| r.char('_'))
-            .and(&digit)
-            .and_any(|r| r.and(&digit).or(|r| r.char('_')))
-    }
-    /// Matches a Rust-style integer literal
-    pub fn integer(self) -> Self {
-        // integer = (int_kind<0d, dec_digit> | int_kind<0x, hex_digit>
-        //           | int_kind<0o, oct_digit> | int_kind<0b, bin_digit> | dec_digit (dec_digit | '_')*)
-        self.and_one_of(&[
-            &|rule| rule.integer_kind("0d", Rule::dec_digit),
-            &|rule| rule.integer_kind("0x", Rule::hex_digit),
-            &|rule| rule.integer_kind("0o", Rule::oct_digit),
-            &|rule| rule.integer_kind("0b", Rule::bin_digit),
-            &|rule| {
-                rule.dec_digit()
-                    .and_any(|r| r.dec_digit().or(|r| r.char('_')))
-            },
-        ])
-    }
-    /// Matches a float literal
-    // TODO: exponent form
-    pub fn float(self) -> Self {
-        self.and_any(Rule::dec_digit)
-            .char('.')
-            .and_many(Rule::dec_digit)
-    }
-    /// Matches one apostrophe-delimited char literal
-    pub fn character(self) -> Self {
-        self.char('\'').character_continue().char('\'')
-    }
-    pub fn character_continue(self) -> Self {
-        self.and(|rule| rule.string_escape().or(|rule| rule.not_char('\'')))
-    }
-    /// Matches one quote-delimited string literal
-    pub fn string(self) -> Self {
-        self.char('"').and_any(Rule::string_continue).char('"')
-    }
-    /// Matches one string escape sequence or non-`"` characcter
-    pub fn string_continue(self) -> Self {
-        self.and(Rule::string_escape).or(|rule| rule.not_char('"'))
+    fn block_comment(&mut self) -> LResult<Token> {
+        while let Ok(c) = self.next() {
+            if '*' == c && Ok('/') == self.next() {
+                break;
+            }
+        }
+        self.produce(Type::Comment, ())
    }
 }
-
-impl<'t> Rule<'t> {
-    /// Matches a char lexicographically between start and end
-    pub fn char_between(self, start: char, end: char) -> Self {
-        self.char_fn(|c| start <= c && c <= end)
+/// Identifiers
+impl<'t> Lexer<'t> {
+    fn identifier(&mut self) -> LResult<Token> {
+        let mut out = String::from(self.xid_start()?);
+        while let Ok(c) = self.xid_continue() {
+            out.push(c)
+        }
+        if let Ok(keyword) = Keyword::from_str(&out) {
+            self.produce(Type::Keyword(keyword), ())
+        } else {
+            self.produce(Type::Identifier, TokenData::Identifier(out.into()))
+        }
    }
-    /// Matches a single char
-    pub fn char(self, c: char) -> Self {
-        self.has(|rule| rule.text.starts_with(c), 1)
+    fn xid_start(&mut self) -> LResult<char> {
+        match self.peek()? {
+            xid if xid == '_' || xid.is_xid_start() => {
+                self.consume()?;
+                Ok(xid)
+            }
+            bad => Err(Error::not_identifier(bad, self.line(), self.col())),
+        }
    }
-    /// Matches the entirety of a string slice
-    pub fn str(self, s: &str) -> Self {
-        self.has(|rule| rule.text.starts_with(s), s.len())
+    fn xid_continue(&mut self) -> LResult<char> {
+        match self.peek()? {
+            xid if xid.is_xid_continue() => {
+                self.consume()?;
+                Ok(xid)
+            }
+            bad => Err(Error::not_identifier(bad, self.line(), self.col())),
+        }
    }
-    /// Matches a char based on the output of a function
-    pub fn char_fn(self, f: impl Fn(char) -> bool) -> Self {
-        self.and(|rule| match rule.text.strip_prefix(&f) {
-            Some(text) => Self { text, taken: rule.taken + next_utf8(rule.text, 1), ..rule },
-            None => Self { is_alright: false, ..rule },
+}
+/// Integers
+impl<'t> Lexer<'t> {
+    fn int_with_base(&mut self) -> LResult<Token> {
+        match self.peek() {
+            Ok('x') => self.consume()?.digits::<16>(),
+            Ok('d') => self.consume()?.digits::<10>(),
+            Ok('o') => self.consume()?.digits::<8>(),
+            Ok('b') => self.consume()?.digits::<2>(),
+            Ok('0'..='9') => self.digits::<10>(),
+            _ => self.produce(Type::Integer, 0),
+        }
+    }
+    fn digits<const B: u32>(&mut self) -> LResult<Token> {
+        let mut value = self.digit::<B>()? as u128;
+        while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
+            value = value * B as u128 + self.digit::<B>()? as u128;
+        }
+        self.produce(Type::Integer, value)
+    }
+    fn digit<const B: u32>(&mut self) -> LResult<u32> {
+        let digit = self.peek()?;
+        self.consume()?;
+        digit
+            .to_digit(B)
+            .ok_or(Error::invalid_digit(digit, self.line(), self.col()))
+    }
+}
+/// Strings and characters
+impl<'t> Lexer<'t> {
+    fn string(&mut self) -> LResult<Token> {
+        let mut value = String::new();
+        while '"'
+            != self
+                .peek()
+                .map_err(|e| e.mask_reason(Reason::UnmatchedDelimiters('"')))?
+        {
+            value.push(self.unescape()?)
+        }
+        self.consume()?.produce(Type::String, value)
+    }
+    fn character(&mut self) -> LResult<Token> {
+        let out = self.unescape()?;
+        match self.peek()? {
+            '\'' => self.consume()?.produce(Type::Character, out),
+            _ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())),
+        }
+    }
+    /// Unescape a single character
+    fn unescape(&mut self) -> LResult<char> {
+        match self.next() {
+            Ok('\\') => (),
+            other => return other,
+        }
+        Ok(match self.next()? {
+            'a' => '\x07',
+            'b' => '\x08',
+            'f' => '\x0c',
+            'n' => '\n',
+            'r' => '\r',
+            't' => '\t',
+            'x' => self.hex_escape()?,
+            'u' => self.unicode_escape()?,
+            '0' => '\0',
+            chr => chr,
        })
    }
-    /// Matches a single char except c
-    pub fn not_char(self, c: char) -> Self {
-        self.has(|rule| !rule.text.starts_with(c), 1)
+    /// unescape a single 2-digit hex escape
+    fn hex_escape(&mut self) -> LResult<char> {
+        let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
+        char::from_u32(out).ok_or(Error::bad_unicode(out, self.line(), self.col()))
    }
-    /// Matches a single char unless the text starts with s
-    pub fn not_str(self, s: &str) -> Self {
-        self.has(|rule| !rule.text.starts_with(s), 1)
-    }
-    // commonly used character classes
-    /// Matches one of any character
-    pub fn any(self) -> Self {
-        self.has(|_| true, 1)
-    }
-    /// Matches one whitespace
-    pub fn whitespace(self) -> Self {
-        self.char_fn(|c| c.is_whitespace())
-    }
-    /// Matches one whitespace, except `'\n'`
-    pub fn whitespace_not_newline(self) -> Self {
-        self.char_fn(|c| '\n' != c && c.is_whitespace())
-    }
-    /// Matches anything but whitespace
-    pub fn not_whitespace(self) -> Self {
-        self.char_fn(|c| !c.is_whitespace())
-    }
-    /// Matches one XID_START
-    pub fn xid_start(self) -> Self {
-        use unicode_xid::UnicodeXID;
-        self.char_fn(UnicodeXID::is_xid_start)
-    }
-    /// Matches one XID_CONTINUE
-    pub fn xid_continue(self) -> Self {
-        use unicode_xid::UnicodeXID;
-        self.char_fn(UnicodeXID::is_xid_continue)
-    }
-    /// Matches one hexadecimal digit
-    pub fn hex_digit(self) -> Self {
-        self.char_fn(|c| c.is_ascii_hexdigit())
-    }
-    /// Matches one decimal digit
-    pub fn dec_digit(self) -> Self {
-        self.char_fn(|c| c.is_ascii_digit())
-    }
-    /// Matches one octal digit
-    pub fn oct_digit(self) -> Self {
-        self.char_between('0', '7')
-    }
-    /// Matches one binary digit
-    pub fn bin_digit(self) -> Self {
-        self.char_between('0', '1')
-    }
-    /// Matches any string escape "\."
-    pub fn string_escape(self) -> Self {
-        self.char('\\').and(Rule::any)
-    }
-    /// Performs a consuming condition assertion on the input
-    fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self {
-        let len = next_utf8(self.text, len);
-        self.and(|rule| match condition(&rule) && !rule.text.is_empty() {
-            true => Self { text: &rule.text[len..], taken: rule.taken + len, ..rule },
-            false => Self { is_alright: false, ..rule },
-        })
+    /// unescape a single \u{} unicode escape
+    fn unicode_escape(&mut self) -> LResult<char> {
+        let mut out = 0;
+        let Ok('{') = self.peek() else {
+            return Err(Error::invalid_escape('u', self.line(), self.col()));
+        };
+        self.consume()?;
+        while let Ok(c) = self.peek() {
+            match c {
+                '}' => {
+                    self.consume()?;
+                    return char::from_u32(out).ok_or(Error::bad_unicode(
+                        out,
+                        self.line(),
+                        self.col(),
+                    ));
+                }
+                _ => out = (out << 4) + self.digit::<16>()?,
+            }
+        }
+        Err(Error::invalid_escape('u', self.line(), self.col()))
    }
 }

-impl<'t> lerox::Combinator for Rule<'t> {
-    fn is_alright(&self) -> bool {
-        self.is_alright
-    }
-    fn into_alright(self) -> Self {
-        Self { is_alright: true, ..self }
-    }
-}
+use error::{Error, LResult, Reason};
+pub mod error {
+    use std::fmt::Display;

-/// Returns the index of the next unicode character, rounded up
-fn next_utf8(text: &str, mut index: usize) -> usize {
-    index = index.min(text.len());
-    while !text.is_char_boundary(index) {
-        index += 1
+    pub type LResult<T> = Result<T, Error>;
+    #[derive(Clone, Debug, PartialEq, Eq)]
+    pub struct Error {
+        pub reason: Reason,
+        pub line: u32,
+        pub col: u32,
+    }
+    /// The reason for the [Error]
+    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+    pub enum Reason {
+        UnmatchedDelimiters(char),
+        UnexpectedChar(char),
+        NotIdentifier(char),
+        UnknownEscape(char),
+        InvalidEscape(char),
+        InvalidDigit(char),
+        UnknownBase(char),
+        BadUnicode(u32),
+        EndOfFile,
+    }
+    error_impl! {
+        unmatched_delimiters(c: char) => Reason::UnmatchedDelimiters(c),
+        unexpected_char(c: char) => Reason::UnexpectedChar(c),
+        not_identifier(c: char) => Reason::NotIdentifier(c),
+        unknown_escape(e: char) => Reason::UnknownEscape(e),
+        invalid_escape(e: char) => Reason::InvalidEscape(e),
+        invalid_digit(digit: char) => Reason::InvalidDigit(digit),
+        unknown_base(base: char) => Reason::UnknownBase(base),
+        bad_unicode(value: u32) => Reason::BadUnicode(value),
+        end_of_file => Reason::EndOfFile,
+    }
+    impl Error {
+        /// Changes the [Reason] of this error
+        pub(super) fn mask_reason(self, reason: Reason) -> Self {
+            Self { reason, ..self }
+        }
+        /// Gets the (line, col) where the error happened
+        pub fn location(&self) -> (u32, u32) {
+            (self.line, self.col)
+        }
+    }
+    macro error_impl ($($fn:ident$(( $($p:ident: $t:ty),* ))? => $reason:expr),*$(,)?) {
+        #[allow(dead_code)]
+        impl Error {
+            $(pub(super) fn $fn ($($($p: $t),*,)? line: u32, col: u32) -> Self {
+                Self { reason: $reason, line, col }
+            })*
+        }
+    }
+    impl Display for Error {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "{}:{}: {}", self.line, self.col, self.reason)
+        }
+    }
+    impl Display for Reason {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            match self {
+                Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c}` in input"},
+                Reason::UnexpectedChar(c) => write!(f, "Character `{c}` not expected"),
+                Reason::NotIdentifier(c) => write!(f, "Character `{c}` not valid in identifiers"),
+                Reason::UnknownEscape(c) => write!(f, "`\\{c}` is not a known escape sequence"),
+                Reason::InvalidEscape(c) => write!(f, "Escape sequence `\\{c}`... is malformed"),
+                Reason::InvalidDigit(c) => write!(f, "`{c}` is not a valid digit"),
+                Reason::UnknownBase(c) => write!(f, "`0{c}`... is not a valid base"),
+                Reason::BadUnicode(c) => write!(f, "`{c}` is not a valid unicode code-point"),
+                Reason::EndOfFile => write!(f, "Reached end of input"),
+            }
+        }
    }
-    index
 }
--- a/libconlang/src/parser.rs
+++ b/libconlang/src/parser.rs
@ -1,12 +1,10 @@
 //! Parses [tokens](super::token) into an [AST](super::ast)
-use std::vec;

 use super::{
    ast::preamble::*,
    lexer::Lexer,
-    token::{Keyword, Token, Type},
+    token::{Keyword, Token, TokenData, Type},
 };
-use constr::ConstrTools;
 use error::{Error, Reason::*, *};

 pub mod error {
@ -16,6 +14,7 @@ pub mod error {
    #[derive(Clone, Debug, Default, PartialEq, Eq)]
    pub enum Reason {
        Expected(Type),
+        Unexpected(Type),
        NotIdentifier,
        NotOperator,
        NotLiteral,
@ -29,7 +28,6 @@ pub mod error {
        IntOverflow,
        NotBranch,
        IncompleteBranch,
-        AllElseFailed,
        EndOfFile,
        PanicStackUnderflow,
        #[default]
@ -41,6 +39,7 @@ pub mod error {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            match self {
                Self::Expected(t) => write!(f, "Expected {t}"),
+                Self::Unexpected(t) => write!(f, "Unexpected {t} in bagging area"),
                Self::NotIdentifier => "Not an identifier".fmt(f),
                Self::NotOperator => "Not an operator".fmt(f),
                Self::NotLiteral => "Not a literal".fmt(f),
@ -54,7 +53,6 @@ pub mod error {
                Self::IntOverflow => "Integer too large".fmt(f),
                Self::IncompleteBranch => "Branch expression was incomplete".fmt(f),
                Self::NotBranch => "Expected branch expression".fmt(f),
-                Self::AllElseFailed => "Did not match any rule".fmt(f),
                Self::EndOfFile => "Got end of file".fmt(f),
                Self::PanicStackUnderflow => "Could not recover from panic".fmt(f),
                Self::Unspecified => {
@ -66,7 +64,7 @@ pub mod error {

    /// [Parser](super::Parser) [Result]
    pub type PResult<T> = Result<T, Error>;
-    #[derive(Clone, Debug, Default, PartialEq, Eq)]
+    #[derive(Clone, Debug, Default, PartialEq)]
    pub struct Error {
        reason: Reason,
        start: Option<Token>,
@ -74,7 +72,7 @@ pub mod error {

    impl Display for Error {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            if let Some(token) = self.start {
+            if let Some(token) = &self.start {
                write!(f, "{}:{}: ", token.line(), token.col())?;
            }
            write!(f, "{}", self.reason)
@ -95,14 +93,15 @@ pub mod error {
        pub fn maybe_token(self, start: Option<Token>) -> Self {
            Self { start, ..self }
        }
-        pub fn start(&self) -> Option<Token> {
-            self.start
+        pub fn start(&self) -> Option<&Token> {
+            self.start.as_ref()
        }
        pub fn reason(self, reason: Reason) -> Self {
            Self { reason, ..self }
        }
        error_impl! {
            expected(e: Type): Expected,
+            unexpected(e: Type): Unexpected,
            not_identifier: NotIdentifier,
            not_operator: NotOperator,
            not_literal: NotLiteral,
@ -115,7 +114,6 @@ pub mod error {
            not_int: NotInt,
            int_overflow: IntOverflow,
            not_branch: NotBranch,
-            all_else_failed: AllElseFailed,
            end_of_file: EndOfFile,
            panic_underflow: PanicStackUnderflow,
            unspecified: Unspecified,
@ -125,27 +123,32 @@ pub mod error {

 /// The Parser performs recursive descent on the AST's grammar
 /// using a provided [Lexer].
-pub struct Parser<'t> {
+pub struct Parser {
    tokens: Vec<Token>,
    panic_stack: Vec<usize>,
-    text: &'t str,
    curr: usize,
 }
-impl<'t> From<Lexer<'t>> for Parser<'t> {
+impl<'t> From<Lexer<'t>> for Parser {
    fn from(value: Lexer<'t>) -> Self {
-        let (tokens, text) = value.consume();
-        Self::new(tokens, text)
+        let mut tokens = vec![];
+        for result in value {
+            match result {
+                Ok(t) => tokens.push(t),
+                Err(e) => println!("{e}"),
+            }
+        }
+        Self::new(tokens)
    }
 }

-impl<'t> Parser<'t> {
+impl Parser {
    /// Create a new [Parser] from a list of [Tokens][1]
    /// and the [text](str) used to generate that list
    /// (as [Tokens][1] do not store their strings)
    ///
    /// [1]: Token
-    pub fn new(tokens: Vec<Token>, text: &'t str) -> Self {
-        Self { tokens, text, panic_stack: vec![], curr: 0 }
+    pub fn new(tokens: Vec<Token>) -> Self {
+        Self { tokens, panic_stack: vec![], curr: 0 }
    }
    /// Parse the [start of an AST](Start)
    pub fn parse(&mut self) -> PResult<Start> {
@ -170,7 +173,7 @@ impl<'t> Parser<'t> {
    pub fn peek(&self) -> PResult<&Token> {
        self.tokens
            .get(self.curr)
-            .ok_or(Error::end_of_file().maybe_token(self.tokens.last().copied()))
+            .ok_or(Error::end_of_file().maybe_token(self.tokens.last().cloned()))
    }
    /// Records the current position on the panic stack
    pub fn mark(&mut self) -> &mut Self {
@ -198,7 +201,7 @@ impl<'t> Parser<'t> {
    }
 }
 /// Helpers
-impl<'t> Parser<'t> {
+impl Parser {
    fn consume_type(&mut self, t: Type) -> PResult<&mut Self> {
        self.matches(t)?;
        Ok(self.consume())
@ -207,17 +210,17 @@ impl<'t> Parser<'t> {
        if self.curr < self.tokens.len() {
            Ok(self)
        } else {
-            Err(Error::end_of_file().maybe_token(self.tokens.last().copied()))
+            Err(Error::end_of_file().maybe_token(self.tokens.last().cloned()))
        }
    }
    fn todo_error(&mut self, l: u32, c: u32, s: &str) -> Error {
        eprintln!("TODO: {s}:{l}:{c}");
-        Error::unspecified().token(*self.peek().unwrap())
+        Error::unspecified().token(self.peek().unwrap().clone())
    }
    fn matches(&mut self, e: Type) -> PResult<&Token> {
        let t = self.check_eof()?.peek().expect("self should not be eof");
        if t.ty() != e {
-            Err(Error::expected(e).token(*t))?
+            Err(Error::expected(e).token(t.clone()))?
        }
        Ok(t)
    }
@ -250,51 +253,54 @@ macro ptodo($self:expr $(, $t:expr)*) {
 }

 /// # Terminals and Pseudo-Terminals
-impl<'t> Parser<'t> {
+impl Parser {
    fn identifier(&mut self) -> PResult<Identifier> {
-        let token = *self
-            .matches(Type::Identifier)
-            .map_err(|e| Error::not_identifier().maybe_token(e.start()))?;
-        Ok(Identifier(self.consume().text[&token].into()))
+        let out = match self.matches(Type::Identifier)?.data() {
+            TokenData::Identifier(id) => Identifier(id.to_string()),
+            _ => Err(Error::not_identifier())?,
+        };
+        self.consume();
+        Ok(out)
    }
    fn literal(&mut self) -> PResult<literal::Literal> {
        use literal::Literal::*;
        use Keyword::{False, True};
-        let tok = self.peek()?;
-        match tok.ty() {
+        let token = self.peek()?;
+        match token.ty() {
            Type::Float => self.float().map(Float),
            Type::Integer => self.int().map(Int),
            Type::String => self.string().map(String),
            Type::Character => self.char().map(Char),
            Type::Keyword(True | False) => self.bool().map(Bool),
-            _ => Err(Error::not_literal().token(*tok)),
+            _ => Err(Error::not_literal().token(token.clone())),
        }
    }
    fn float(&mut self) -> PResult<literal::Float> {
        ptodo!(self)
    }
    fn int(&mut self) -> PResult<u128> {
-        let token = *self.matches(Type::Integer)?;
-        self.consume().text[&token]
-            .chars()
-            .parse_int::<u128>()
-            .next()
-            .ok_or(Error::not_int().token(token))
+        let out = match self.matches(Type::Integer)?.data() {
+            TokenData::Integer(i) => *i,
+            _ => Err(Error::not_int())?,
+        };
+        self.consume();
+        Ok(out)
    }
    fn string(&mut self) -> PResult<String> {
-        let range = self
-            .matches(Type::String)
-            .map_err(|e| e.reason(NotString))?
-            .range();
-        Ok(self.consume().text[range].chars().unescape().collect())
+        let out = match self.matches(Type::String)?.data() {
+            TokenData::String(s) => s.clone(),
+            _ => Err(Error::not_string())?,
+        };
+        self.consume();
+        Ok(out)
    }
    fn char(&mut self) -> PResult<char> {
-        let token = *self.matches(Type::Character)?;
-        self.consume().text[&token]
-            .chars()
-            .unescape()
-            .next()
-            .ok_or(Error::not_char().token(token))
+        let out = match self.matches(Type::Character)?.data() {
+            TokenData::Character(c) => *c,
+            _ => Err(Error::not_char())?,
+        };
+        self.consume();
+        Ok(out)
    }
    fn bool(&mut self) -> PResult<bool> {
        use Keyword::{False, True};
@ -302,14 +308,14 @@ impl<'t> Parser<'t> {
        let out = match token.ty() {
            Type::Keyword(False) => false,
            Type::Keyword(True) => true,
-            _ => Err(Error::not_bool().token(*token))?,
+            _ => Err(Error::not_bool().token(token.clone()))?,
        };
        self.consume();
        Ok(out)
    }
 }
 /// Expressions
-impl<'t> Parser<'t> {
+impl Parser {
    fn expr(&mut self) -> PResult<expression::Expr> {
        use expression::Expr;
        Ok(Expr { ignore: self.ignore()? })
@ -335,7 +341,7 @@ impl<'t> Parser<'t> {
    }
    fn primary(&mut self) -> PResult<expression::Primary> {
        use expression::Primary;
-        let token = *self.peek()?;
+        let token = self.peek()?;
        match token.ty() {
            Type::Identifier => self.identifier().map(Primary::Identifier),
            Type::String
@ -346,7 +352,7 @@ impl<'t> Parser<'t> {
            Type::LCurly => self.block().map(Primary::Block),
            Type::LParen => self.group().map(Primary::Group),
            Type::Keyword(_) => self.flow().map(Primary::Branch),
-            _ => Err(Error::all_else_failed().token(token))?,
+            e => Err(Error::unexpected(e).token(token.clone()))?,
        }
    }
 }
@ -377,7 +383,7 @@ macro binary ($($f:ident = $a:ident, $b:ident);*$(;)?) {$(
    }
 )*}
 /// # [Arithmetic and Logical Subexpressions](math)
-impl<'t> Parser<'t> {
+impl Parser {
    binary! {
        //name    operands operators
        ignore  = assign,  ignore_op;
@ -400,18 +406,19 @@ impl<'t> Parser<'t> {
 }
 macro operator_impl ($($(#[$m:meta])* $f:ident : {$($type:pat => $op:ident),*$(,)?})*) {
    $($(#[$m])* fn $f(&mut self) -> PResult<operator::Binary> {
+
        use operator::Binary;
-        let token = *self.peek()?;
+        let token = self.peek()?;
        let out = Ok(match token.ty() {
            $($type => Binary::$op,)*
-            _ => Err(Error::not_operator().token(token))?,
+            _ => Err(Error::not_operator().token(token.clone()))?,
        });
        self.consume();
        out
    })*
 }
 /// # [Operators](operator)
-impl<'t> Parser<'t> {
+impl Parser {
    operator_impl! {
        factor_op: {
            Type::Star => Mul,
@ -465,7 +472,7 @@ impl<'t> Parser<'t> {
    /// Parse a [unary operator](operator::Unary)
    fn unary_op(&mut self) -> PResult<operator::Unary> {
        use operator::Unary;
-        let token = *self.peek()?;
+        let token = self.peek()?;
        let out = Ok(match token.ty() {
            Type::AmpAmp => Unary::RefRef,
            Type::Amp => Unary::Ref,
@ -475,18 +482,18 @@ impl<'t> Parser<'t> {
            Type::At => Unary::At,
            Type::Hash => Unary::Hash,
            Type::Tilde => Unary::Tilde,
-            _ => Err(Error::not_operator().token(token))?,
+            _ => Err(Error::not_operator().token(token.clone()))?,
        });
        self.consume();
        out
    }
 }
 /// # [Control Flow](control)
-impl<'t> Parser<'t> {
+impl Parser {
    fn flow(&mut self) -> PResult<control::Flow> {
        use control::Flow;
        use Keyword::{Break, Continue, For, If, Return, While};
-        let token = *self.peek()?;
+        let token = self.peek()?;
        match token.ty() {
            Type::Keyword(While) => self.parse_while().map(Flow::While),
            Type::Keyword(For) => self.parse_for().map(Flow::For),
@ -494,9 +501,9 @@ impl<'t> Parser<'t> {
            Type::Keyword(Break) => self.parse_break().map(Flow::Break),
            Type::Keyword(Return) => self.parse_return().map(Flow::Return),
            Type::Keyword(Continue) => self.parse_continue().map(Flow::Continue),
-            _ => Err(Error::all_else_failed().token(token)),
+            e => Err(Error::unexpected(e).token(token.clone()))?,
        }
-        .map_err(|e| e.reason(IncompleteBranch).token(token))
+        .map_err(|e| e.reason(IncompleteBranch))
    }
    fn parse_if(&mut self) -> PResult<control::If> {
        self.keyword(Keyword::If)?;
--- a/libconlang/src/tests.rs
+++ b/libconlang/src/tests.rs
@ -1,476 +1,180 @@
 mod token {
-    use crate::token::*;
-    #[test]
-    fn token_has_type() {
-        assert_eq!(Token::new(Type::Comment, 0, 10, 1, 1).ty(), Type::Comment);
-        assert_eq!(
-            Token::new(Type::Identifier, 0, 10, 1, 1).ty(),
-            Type::Identifier
-        );
-    }
-    #[test]
-    fn token_has_range() {
-        let t = Token::new(Type::Comment, 0, 10, 1, 1);
-        assert_eq!(t.range(), 0..10);
-    }
+    // TODO
 }
 mod ast {
    // TODO
 }
 mod lexer {
-    use std::ops::Range;
-
+    #[allow(unused_imports)]
    use crate::{
-        lexer::*,
-        token::{Token, Type},
+        lexer::Lexer,
+        token::{Token, TokenData, Keyword, Type},
    };

-    fn assert_whole_input_is_token<'t, F>(input: &'t str, f: F, ty: Type)
-    where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
-        assert_has_type_and_range(input, f, ty, 0..input.len())
-    }
-    fn assert_has_type_and_range<'t, F>(input: &'t str, f: F, ty: Type, range: Range<usize>)
-    where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
-        let tok =
-            f(&mut Lexer::new(input)).unwrap_or_else(|| panic!("Should be {ty:?}, {range:?}"));
-        assert_eq!(ty, tok.ty());
-        assert_eq!(range, tok.range());
-    }
-
-    mod comment {
-        use super::*;
-
+    macro test_lexer_output_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
        #[test]
-        fn line_comment() {
-            assert_whole_input_is_token("// comment!", Lexer::comment, Type::Comment);
-        }
-        #[test]
-        #[should_panic]
-        fn not_line_comment() {
-            assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
-        }
-        #[test]
-        fn block_comment() {
-            assert_whole_input_is_token("/* comment! */", Lexer::comment, Type::Comment);
-        }
-        #[test]
-        fn nested_block_comment() {
-            assert_whole_input_is_token(
-                "/* a /* nested */ comment */",
-                Lexer::comment,
-                Type::Comment,
+        fn $f() {$(
+            assert_eq!(
+                Lexer::new($test)
+                    .into_iter()
+                    .map(|t| t.unwrap().ty())
+                    .collect::<Vec<_>>(),
+                dbg!($expect)
            );
-        }
+        )*}
+    )*}
+
+    macro test_lexer_data_type  ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
        #[test]
-        #[should_panic]
-        fn unclosed_nested_comment() {
-            assert_whole_input_is_token(
-                "/* improperly /* nested */ comment",
-                Lexer::comment,
-                Type::Comment,
+        fn $f() {$(
+            assert_eq!(
+                Lexer::new($test)
+                    .into_iter()
+                    .map(|t| t.unwrap().into_data())
+                    .collect::<Vec<_>>(),
+                dbg!($expect)
            );
-        }
-        #[test]
-        #[should_panic]
-        fn not_block_comment() {
-            assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
-        }
-        #[test]
-        fn shebang_comment() {
-            assert_whole_input_is_token("#!/ comment!", Lexer::comment, Type::Comment);
-        }
-        #[test]
-        #[should_panic]
-        fn not_shebang_comment() {
-            assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
-        }
-    }
-    mod identifier {
-        use super::*;
+        )*}
+    )*}

-        #[test]
-        fn identifier() {
-            assert_whole_input_is_token("valid_identifier", Lexer::identifier, Type::Identifier);
-            assert_whole_input_is_token("_0", Lexer::identifier, Type::Identifier);
-            assert_whole_input_is_token("_", Lexer::identifier, Type::Identifier);
-        }
-        #[test]
-        fn unicode_identifier() {
-            assert_whole_input_is_token("ζ_ζζζ_ζζζ_ζζζ", Lexer::identifier, Type::Identifier);
-            assert_whole_input_is_token("_ζζζ_ζζζ_ζζζ_", Lexer::identifier, Type::Identifier);
-        }
-        #[test]
-        #[should_panic]
-        fn not_identifier() {
-            assert_whole_input_is_token("123456789", Lexer::identifier, Type::Identifier);
-        }
+    /// Convert an `[ expr, ... ]` into a `[ *, ... ]`
+    macro td ($($id:expr),*) {
+        [$($id.into()),*]
    }
-    mod literal {
-        use super::*;
-        #[test]
-        fn literal_class() {
-            assert_whole_input_is_token("1_00000", Lexer::literal, Type::Integer);
-            assert_whole_input_is_token("1.00000", Lexer::literal, Type::Float);
-            assert_has_type_and_range("\"1.0\"", Lexer::literal, Type::String, 1..4);
-            assert_has_type_and_range("'\"'", Lexer::literal, Type::Character, 1..2);
-        }
-        mod integer {
-            use super::*;
-            #[test]
-            fn bare() {
-                assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer);
-                assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer);
-                assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer);
-            }
-            #[test]
-            fn base16() {
-                assert_has_type_and_range("0x1234", Lexer::integer, Type::Integer, 0..6);
-                assert_has_type_and_range("0x1234 \"hello\"", Lexer::integer, Type::Integer, 0..6);
-            }
-            #[test]
-            fn base10() {
-                assert_whole_input_is_token("0d1234", Lexer::integer, Type::Integer);
-            }
-            #[test]
-            fn base8() {
-                assert_whole_input_is_token("0o1234", Lexer::integer, Type::Integer);
-            }
-            #[test]
-            fn base2() {
-                assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer);
-            }
-        }
-        mod float {
-            use super::*;
-            #[test]
-            fn number_dot_number_is_float() {
-                assert_whole_input_is_token("1.0", Lexer::float, Type::Float);
-            }
-            #[test]
-            fn nothing_dot_number_is_float() {
-                assert_whole_input_is_token(".0", Lexer::float, Type::Float);
-            }
-            #[test]
-            #[should_panic]
-            fn number_dot_nothing_is_not_float() {
-                assert_whole_input_is_token("1.", Lexer::float, Type::Float);
-            }
-            #[test]
-            #[should_panic]
-            fn nothing_dot_nothing_is_not_float() {
-                assert_whole_input_is_token(".", Lexer::float, Type::Float);
-            }
-        }
-        mod string {
-            use super::*;
-            #[test]
-            fn empty_string() {
-                assert_has_type_and_range("\"\"", Lexer::string, Type::String, 1..1);
-            }
-            #[test]
-            fn unicode_string() {
-                assert_has_type_and_range("\"I 💙 🦈!\"", Lexer::string, Type::String, 1..13);
-            }
-            #[test]
-            fn escape_string() {
-                assert_has_type_and_range(
-                    "\" \\\"This is a quote\\\" \"",
-                    Lexer::string,
-                    Type::String,
-                    1..22,
-                );
-            }
-        }
-        mod char {
-            use super::*;
-            #[test]
-            fn plain_char() {
-                assert_has_type_and_range("'A'", Lexer::character, Type::Character, 1..2);
-                assert_has_type_and_range("'a'", Lexer::character, Type::Character, 1..2);
-                assert_has_type_and_range("'#'", Lexer::character, Type::Character, 1..2);
-            }
-            #[test]
-            fn unicode_char() {
-                assert_has_type_and_range("'ε'", Lexer::character, Type::Character, 1..3);
-            }
-            #[test]
-            fn escaped_char() {
-                assert_has_type_and_range("'\\n'", Lexer::character, Type::Character, 1..3);
-            }
-            #[test]
-            #[should_panic]
-            fn no_char() {
-                assert_has_type_and_range("''", Lexer::character, Type::Character, 1..1);
-            }
-        }
-    }
-    mod delimiter {
-        use super::*;
-        #[test]
-        fn delimiter_class() {
-            assert_whole_input_is_token("[", Lexer::delimiter, Type::LBrack);
-            assert_whole_input_is_token("]", Lexer::delimiter, Type::RBrack);
-            assert_whole_input_is_token("{", Lexer::delimiter, Type::LCurly);
-            assert_whole_input_is_token("}", Lexer::delimiter, Type::RCurly);
-            assert_whole_input_is_token("(", Lexer::delimiter, Type::LParen);
-            assert_whole_input_is_token(")", Lexer::delimiter, Type::RParen);
-        }
-        #[test]
-        fn l_brack() {
-            assert_whole_input_is_token("[", Lexer::l_brack, Type::LBrack);
-        }
-        #[test]
-        fn r_brack() {
-            assert_whole_input_is_token("]", Lexer::r_brack, Type::RBrack);
-        }
-        #[test]
-        fn l_curly() {
-            assert_whole_input_is_token("{", Lexer::l_curly, Type::LCurly);
-        }
-        #[test]
-        fn r_curly() {
-            assert_whole_input_is_token("}", Lexer::r_curly, Type::RCurly);
-        }

-        #[test]
-        fn l_paren() {
-            assert_whole_input_is_token("(", Lexer::l_paren, Type::LParen);
+    mod ident {
+        use super::*;
+        macro ident ($($id:literal),*) {
+            [$(TokenData::Identifier($id.into())),*]
        }
-        #[test]
-        fn r_paren() {
-            assert_whole_input_is_token(")", Lexer::r_paren, Type::RParen);
+        test_lexer_data_type! {
+            underscore { "_ _" => ident!["_", "_"] }
+            unicode { "_ε ε_" => ident!["_ε", "ε_"] }
+            many_underscore { "____________________________________" =>
+            ident!["____________________________________"] }
        }
    }
-    mod punctuation {
+    mod keyword {
        use super::*;
-        mod compound {
-            use super::*;
-            #[test]
-            fn dot_dot() {
-                assert_whole_input_is_token("..", Lexer::dot_dot, Type::DotDot)
+        macro kw($($k:ident),*) {
+            [ $(Type::Keyword(Keyword::$k),)* ]
+        }
+        test_lexer_output_type! {
+            kw_break { "break break" => kw![Break, Break] }
+            kw_continue { "continue continue" => kw![Continue, Continue] }
+            kw_else { "else else" => kw![Else, Else] }
+            kw_false { "false false" => kw![False, False] }
+            kw_for { "for for" => kw![For, For] }
+            kw_fn { "fn fn" => kw![Fn, Fn] }
+            kw_if { "if if" => kw![If, If] }
+            kw_in { "in in" => kw![In, In] }
+            kw_let { "let let" => kw![Let, Let] }
+            kw_return { "return return" => kw![Return, Return] }
+            kw_true { "true true" => kw![True, True] }
+            kw_while { "while while" => kw![While, While] }
+            keywords { "break continue else false for fn if in let return true while" =>
+                kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
+        }
+    }
+    mod integer {
+        use super::*;
+        test_lexer_data_type! {
+            hex {
+                "0x0 0x1 0x15 0x2100 0x8000" =>
+                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
            }
-            #[test]
-            fn dot_dot_eq() {
-                assert_whole_input_is_token("..=", Lexer::dot_dot_eq, Type::DotDotEq)
+            dec {
+                "0d0 0d1 0d21 0d8448 0d32768" =>
+                td![0, 0x1, 0x15, 0x2100, 0x8000]
            }
-            #[test]
-            fn lt_lt() {
-                assert_whole_input_is_token("<<", Lexer::lt_lt, Type::LtLt)
+            oct {
+                "0o0 0o1 0o25 0o20400 0o100000" =>
+                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
            }
-            #[test]
-            fn gt_gt() {
-                assert_whole_input_is_token(">>", Lexer::gt_gt, Type::GtGt)
+            bin {
+                "0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
+                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
            }
-            #[test]
-            fn amp_amp() {
-                assert_whole_input_is_token("&&", Lexer::amp_amp, Type::AmpAmp)
-            }
-            #[test]
-            fn bar_bar() {
-                assert_whole_input_is_token("||", Lexer::bar_bar, Type::BarBar)
-            }
-            #[test]
-            fn bang_bang() {
-                assert_whole_input_is_token("!!", Lexer::bang_bang, Type::BangBang)
-            }
-            #[test]
-            fn xor_xor() {
-                assert_whole_input_is_token("^^", Lexer::xor_xor, Type::XorXor)
-            }
-            #[test]
-            fn eq_eq() {
-                assert_whole_input_is_token("==", Lexer::eq_eq, Type::EqEq)
-            }
-            #[test]
-            fn gt_eq() {
-                assert_whole_input_is_token(">=", Lexer::gt_eq, Type::GtEq)
-            }
-            #[test]
-            fn lt_eq() {
-                assert_whole_input_is_token("<=", Lexer::lt_eq, Type::LtEq)
-            }
-            #[test]
-            fn bang_eq() {
-                assert_whole_input_is_token("!=", Lexer::bang_eq, Type::BangEq)
-            }
-            #[test]
-            fn star_eq() {
-                assert_whole_input_is_token("*=", Lexer::star_eq, Type::StarEq)
-            }
-            #[test]
-            fn slash_eq() {
-                assert_whole_input_is_token("/=", Lexer::slash_eq, Type::SlashEq)
-            }
-            #[test]
-            fn plus_eq() {
-                assert_whole_input_is_token("+=", Lexer::plus_eq, Type::PlusEq)
-            }
-            #[test]
-            fn minus_eq() {
-                assert_whole_input_is_token("-=", Lexer::minus_eq, Type::MinusEq)
-            }
-            #[test]
-            fn amp_eq() {
-                assert_whole_input_is_token("&=", Lexer::amp_eq, Type::AmpEq)
-            }
-            #[test]
-            fn bar_eq() {
-                assert_whole_input_is_token("|=", Lexer::bar_eq, Type::BarEq)
-            }
-            #[test]
-            fn xor_eq() {
-                assert_whole_input_is_token("^=", Lexer::xor_eq, Type::XorEq)
-            }
-            #[test]
-            fn lt_lt_eq() {
-                assert_whole_input_is_token("<<=", Lexer::lt_lt_eq, Type::LtLtEq)
-            }
-            #[test]
-            fn gt_gt_eq() {
-                assert_whole_input_is_token(">>=", Lexer::gt_gt_eq, Type::GtGtEq)
+            baseless {
+                "0 1 21 8448 32768" =>
+                td![0x0, 0x1, 0x15, 0x2100, 0x8000]
            }
        }
-
-        mod simple {
-            use super::*;
-            #[test]
-            fn punctuation_class() {
-                // go from least to most specific
-                assert_whole_input_is_token(";", Lexer::punctuation, Type::Semi);
-                assert_whole_input_is_token(".", Lexer::punctuation, Type::Dot);
-                assert_whole_input_is_token("*", Lexer::punctuation, Type::Star);
-                assert_whole_input_is_token("/", Lexer::punctuation, Type::Slash);
-                assert_whole_input_is_token("+", Lexer::punctuation, Type::Plus);
-                assert_whole_input_is_token("-", Lexer::punctuation, Type::Minus);
-                assert_whole_input_is_token("%", Lexer::punctuation, Type::Rem);
-                assert_whole_input_is_token("!", Lexer::punctuation, Type::Bang);
-                assert_whole_input_is_token("=", Lexer::punctuation, Type::Eq);
-                assert_whole_input_is_token("<", Lexer::punctuation, Type::Lt);
-                assert_whole_input_is_token(">", Lexer::punctuation, Type::Gt);
-                assert_whole_input_is_token("&", Lexer::punctuation, Type::Amp);
-                assert_whole_input_is_token("|", Lexer::punctuation, Type::Bar);
-                assert_whole_input_is_token("^", Lexer::punctuation, Type::Xor);
-                assert_whole_input_is_token("#", Lexer::punctuation, Type::Hash);
-                assert_whole_input_is_token("@", Lexer::punctuation, Type::At);
-                assert_whole_input_is_token(":", Lexer::punctuation, Type::Colon);
-                assert_whole_input_is_token("?", Lexer::punctuation, Type::Question);
-                assert_whole_input_is_token(",", Lexer::punctuation, Type::Comma);
-                assert_whole_input_is_token("~", Lexer::punctuation, Type::Tilde);
-                assert_whole_input_is_token("`", Lexer::punctuation, Type::Grave);
-                assert_whole_input_is_token("\\", Lexer::punctuation, Type::Backslash);
-                assert_whole_input_is_token("<<", Lexer::punctuation, Type::LtLt);
-                assert_whole_input_is_token(">>", Lexer::punctuation, Type::GtGt);
-                assert_whole_input_is_token("&&", Lexer::punctuation, Type::AmpAmp);
-                assert_whole_input_is_token("||", Lexer::punctuation, Type::BarBar);
-                assert_whole_input_is_token("!!", Lexer::punctuation, Type::BangBang);
-                assert_whole_input_is_token("^^", Lexer::punctuation, Type::XorXor);
-                assert_whole_input_is_token("==", Lexer::punctuation, Type::EqEq);
-                assert_whole_input_is_token(">=", Lexer::punctuation, Type::GtEq);
-                assert_whole_input_is_token("<=", Lexer::punctuation, Type::LtEq);
-                assert_whole_input_is_token("!=", Lexer::punctuation, Type::BangEq);
-                assert_whole_input_is_token("*=", Lexer::punctuation, Type::StarEq);
-                assert_whole_input_is_token("/=", Lexer::punctuation, Type::SlashEq);
-                assert_whole_input_is_token("+=", Lexer::punctuation, Type::PlusEq);
-                assert_whole_input_is_token("-=", Lexer::punctuation, Type::MinusEq);
-                assert_whole_input_is_token("&=", Lexer::punctuation, Type::AmpEq);
-                assert_whole_input_is_token("|=", Lexer::punctuation, Type::BarEq);
-                assert_whole_input_is_token("^=", Lexer::punctuation, Type::XorEq);
-                assert_whole_input_is_token("..", Lexer::punctuation, Type::DotDot);
-                assert_whole_input_is_token("..=", Lexer::punctuation, Type::DotDotEq);
-                assert_whole_input_is_token("<<=", Lexer::punctuation, Type::LtLtEq);
-                assert_whole_input_is_token(">>=", Lexer::punctuation, Type::GtGtEq);
+    }
+    mod string {
+        use super::*;
+        test_lexer_data_type! {
+            empty_string {
+                "\"\"" =>
+                td![String::from("")]
            }
-            // individual functions below
-            #[test]
-            fn semi() {
-                assert_whole_input_is_token(";", Lexer::semi, Type::Semi)
+            unicode_string {
+                "\"I 💙 🦈!\"" =>
+                td![String::from("I 💙 🦈!")]
            }
-            #[test]
-            fn dot() {
-                assert_whole_input_is_token(".", Lexer::dot, Type::Dot)
-            }
-            #[test]
-            fn star() {
-                assert_whole_input_is_token("*", Lexer::star, Type::Star)
-            }
-            #[test]
-            fn slash() {
-                assert_whole_input_is_token("/", Lexer::slash, Type::Slash)
-            }
-            #[test]
-            fn plus() {
-                assert_whole_input_is_token("+", Lexer::plus, Type::Plus)
-            }
-            #[test]
-            fn minus() {
-                assert_whole_input_is_token("-", Lexer::minus, Type::Minus)
-            }
-            #[test]
-            fn rem() {
-                assert_whole_input_is_token("%", Lexer::rem, Type::Rem)
-            }
-            #[test]
-            fn bang() {
-                assert_whole_input_is_token("!", Lexer::bang, Type::Bang)
-            }
-            #[test]
-            fn eq() {
-                assert_whole_input_is_token("=", Lexer::eq, Type::Eq)
-            }
-            #[test]
-            fn lt() {
-                assert_whole_input_is_token("<", Lexer::lt, Type::Lt)
-            }
-            #[test]
-            fn gt() {
-                assert_whole_input_is_token(">", Lexer::gt, Type::Gt)
-            }
-            #[test]
-            fn amp() {
-                assert_whole_input_is_token("&", Lexer::amp, Type::Amp)
-            }
-            #[test]
-            fn bar() {
-                assert_whole_input_is_token("|", Lexer::bar, Type::Bar)
-            }
-            #[test]
-            fn xor() {
-                assert_whole_input_is_token("^", Lexer::xor, Type::Xor)
-            }
-            #[test]
-            fn hash() {
-                assert_whole_input_is_token("#", Lexer::hash, Type::Hash)
-            }
-            #[test]
-            fn at() {
-                assert_whole_input_is_token("@", Lexer::at, Type::At)
-            }
-            #[test]
-            fn colon() {
-                assert_whole_input_is_token(":", Lexer::colon, Type::Colon)
-            }
-            #[test]
-            fn backslash() {
-                assert_whole_input_is_token("\\", Lexer::backslash, Type::Backslash)
-            }
-            #[test]
-            fn question() {
-                assert_whole_input_is_token("?", Lexer::question, Type::Question)
-            }
-            #[test]
-            fn comma() {
-                assert_whole_input_is_token(",", Lexer::comma, Type::Comma)
-            }
-            #[test]
-            fn tilde() {
-                assert_whole_input_is_token("~", Lexer::tilde, Type::Tilde)
-            }
-            #[test]
-            fn grave() {
-                assert_whole_input_is_token("`", Lexer::grave, Type::Grave)
+            escape_string {
+                " \"This is a shark: \\u{1f988}\" " =>
+                td![String::from("This is a shark: 🦈")]
            }
        }
    }
+    mod punct {
+        use super::*;
+        test_lexer_output_type! {
+            l_curly   { "{ {"   => [ Type::LCurly, Type::LCurly ] }
+            r_curly   { "} }"   => [ Type::RCurly, Type::RCurly ] }
+            l_brack   { "[ ["   => [ Type::LBrack, Type::LBrack ] }
+            r_brack   { "] ]"   => [ Type::RBrack, Type::RBrack ] }
+            l_paren   { "( ("   => [ Type::LParen, Type::LParen ] }
+            r_paren   { ") )"   => [ Type::RParen, Type::RParen ] }
+            amp       { "& &"   => [ Type::Amp, Type::Amp ] }
+            amp_amp   { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
+            amp_eq    { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
+            arrow     { "-> ->" => [ Type::Arrow, Type::Arrow] }
+            at        { "@ @"   => [ Type::At, Type::At] }
+            backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
+            bang      { "! !"   => [ Type::Bang, Type::Bang] }
+            bangbang  { "!! !!" => [ Type::BangBang, Type::BangBang] }
+            bangeq    { "!= !=" => [ Type::BangEq, Type::BangEq] }
+            bar       { "| |"   => [ Type::Bar, Type::Bar] }
+            barbar    { "|| ||" => [ Type::BarBar, Type::BarBar] }
+            bareq     { "|= |=" => [ Type::BarEq, Type::BarEq] }
+            colon     { ": :"   => [ Type::Colon, Type::Colon] }
+            comma     { ", ,"   => [ Type::Comma, Type::Comma] }
+            dot       { ". ."   => [ Type::Dot, Type::Dot] }
+            dotdot    { ".. .." => [ Type::DotDot, Type::DotDot] }
+            dotdoteq  { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
+            eq        { "= ="   => [ Type::Eq, Type::Eq] }
+            eqeq      { "== ==" => [ Type::EqEq, Type::EqEq] }
+            fatarrow  { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
+            grave     { "` `"   => [ Type::Grave, Type::Grave] }
+            gt        { "> >"   => [ Type::Gt, Type::Gt] }
+            gteq      { ">= >=" => [ Type::GtEq, Type::GtEq] }
+            gtgt      { ">> >>" => [ Type::GtGt, Type::GtGt] }
+            gtgteq    { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
+            hash      { "# #"   => [ Type::Hash, Type::Hash] }
+            lt        { "< <"   => [ Type::Lt, Type::Lt] }
+            lteq      { "<= <=" => [ Type::LtEq, Type::LtEq] }
+            ltlt      { "<< <<" => [ Type::LtLt, Type::LtLt] }
+            ltlteq    { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
+            minus     { "- -"   => [ Type::Minus, Type::Minus] }
+            minuseq   { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
+            plus      { "+ +"   => [ Type::Plus, Type::Plus] }
+            pluseq    { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
+            question  { "? ?"   => [ Type::Question, Type::Question] }
+            rem       { "% %"   => [ Type::Rem, Type::Rem] }
+            remeq     { "%= %=" => [ Type::RemEq, Type::RemEq] }
+            semi      { "; ;"   => [ Type::Semi, Type::Semi] }
+            slash     { "/ /"   => [ Type::Slash, Type::Slash] }
+            slasheq   { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
+            star      { "* *"   => [ Type::Star, Type::Star] }
+            stareq    { "*= *=" => [ Type::StarEq, Type::StarEq] }
+            tilde     { "~ ~"   => [ Type::Tilde, Type::Tilde] }
+            xor       { "^ ^"   => [ Type::Xor, Type::Xor] }
+            xoreq     { "^= ^=" => [ Type::XorEq, Type::XorEq] }
+            xorxor    { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
+        }
+    }
 }
 mod parser {
    // TODO
--- a/libconlang/src/token.rs
+++ b/libconlang/src/token.rs
@ -1,5 +1,4 @@
 //! Stores a component of a file as a type and span
-use std::ops::Range;

 mod token_type;
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@ -88,54 +87,60 @@ pub enum Keyword {
    While,
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+#[derive(Clone, Debug, PartialEq)]
+pub enum TokenData {
+    Identifier(Box<str>),
+    String(String),
+    Character(char),
+    Integer(u128),
+    Float(f64),
+    None,
+}
+from! {
+    value: &str => Self::Identifier(value.into()),
+    value: String => Self::String(value),
+    value: u128 => Self::Integer(value),
+    value: f64 => Self::Float(value),
+    value: char => Self::Character(value),
+    _v:    () => Self::None,
+}
+macro from($($value:ident: $src:ty => $dst:expr),*$(,)?) {
+    $(impl From<$src> for TokenData {
+        fn from($value: $src) -> Self { $dst }
+    })*
+}
+
+#[derive(Clone, Debug, PartialEq)]
 pub struct Token {
    ty: Type,
-    pub head: usize,
-    pub tail: usize,
+    data: TokenData,
    line: u32,
    col: u32,
 }
 impl Token {
-    pub fn new(ty: Type, head: usize, tail: usize, line: u32, col: u32) -> Self {
-        Self { ty, head, tail, line, col }
+    /// Creates a new [Token] out of a [Type], [TokenData], line, and column.
+    pub fn new(ty: Type, data: impl Into<TokenData>, line: u32, col: u32) -> Self {
+        Self { ty, data: data.into(), line, col }
    }
-    /// Cast this [Token] to a new [Type]
+    /// Casts this token to a new [Type]
    pub fn cast(self, ty: Type) -> Self {
        Self { ty, ..self }
    }
-    /// Hack to work around the current [lexer's design limitations](crate::lexer)
-    pub fn rebound(self, head: usize, tail: usize) -> Self {
-        Self { head, tail, ..self }
-    }
-    /// Gets the line from this token
-    pub fn line(&self) -> u32 {
-        self.line
-    }
-    /// Gets the column from this token
-    pub fn col(&self) -> u32 {
-        self.col
-    }
-    pub fn is_empty(&self) -> bool {
-        self.tail == self.head
-    }
-    /// Gets the length of the token, in bytes
-    pub fn len(&self) -> usize {
-        self.tail - self.head
-    }
-    /// Gets the [Type] of the token
+    /// Gets the [Type] of this token
    pub fn ty(&self) -> Type {
        self.ty
    }
-    /// Gets the exclusive range of the token
-    pub fn range(&self) -> Range<usize> {
-        self.head..self.tail
-    }
-}
-
-impl std::ops::Index<&Token> for str {
-    type Output = str;
-    fn index(&self, index: &Token) -> &Self::Output {
-        &self[index.range()]
+    /// Gets the [TokenData] of this token
+    pub fn data(&self) -> &TokenData {
+        &self.data
+    }
+    pub fn into_data(self) -> TokenData {
+        self.data
+    }
+    pub fn line(&self) -> u32 {
+        self.line
+    }
+    pub fn col(&self) -> u32 {
+        self.col
    }
 }