cl-lexer: Bring over new lexer

- Different flow, similar action - Consolidated all the single-purpose di- and trigraph functions - Gave the lexer explicit access to its entire string, which is can slice.
2025-09-14 23:02:04 -04:00 · 2025-09-14 23:02:04 -04:00 · 1fe796dda7
commit 1fe796dda7
parent f0c871711c
1 changed files with 289 additions and 358 deletions
--- a/compiler/cl-lexer/src/lib.rs
+++ b/compiler/cl-lexer/src/lib.rs
@ -5,7 +5,7 @@ use cl_structures::span::Loc;
 use cl_token::{TokenKind as Kind, *};
 use std::{
    iter::Peekable,
-    str::{Chars, FromStr},
+    str::{CharIndices, FromStr},
 };
 use unicode_ident::*;
@ -76,409 +76,370 @@ pub mod lexer_iter {
 /// ```
 #[derive(Clone, Debug)]
 pub struct Lexer<'t> {
-    iter: Peekable<Chars<'t>>,
+    /// The source text
-    start: usize,
+    text: &'t str,
-    start_loc: (u32, u32),
+    /// A peekable iterator over the source text
-    current: usize,
+    iter: Peekable<CharIndices<'t>>,
-    current_loc: (u32, u32),
+    /// The end of the current token
    head: usize,
    /// The (line, col) end of the current token
    head_loc: (u32, u32),
    /// The start of the current token
    tail: usize,
    /// The (line, col) start of the current token
    tail_loc: (u32, u32),
 }
 impl<'t> Lexer<'t> {
    /// Creates a new [Lexer] over a [str]
    pub fn new(text: &'t str) -> Self {
        Self {
-            iter: text.chars().peekable(),
+            text,
-            start: 0,
+            iter: text.char_indices().peekable(),
-            start_loc: (1, 1),
+            head: 0,
-            current: 0,
+            head_loc: (1, 1),
-            current_loc: (1, 1),
+            tail: 0,
-        }
+            tail_loc: (1, 1),
    }
    /// Scans through the text, searching for the next [Token]
    pub fn scan(&mut self) -> LResult<Token> {
        match self.skip_whitespace().peek()? {
            '{' => self.consume()?.produce_op(Kind::LCurly),
            '}' => self.consume()?.produce_op(Kind::RCurly),
            '[' => self.consume()?.produce_op(Kind::LBrack),
            ']' => self.consume()?.produce_op(Kind::RBrack),
            '(' => self.consume()?.produce_op(Kind::LParen),
            ')' => self.consume()?.produce_op(Kind::RParen),
            '&' => self.consume()?.amp(),
            '@' => self.consume()?.produce_op(Kind::At),
            '\\' => self.consume()?.produce_op(Kind::Backslash),
            '!' => self.consume()?.bang(),
            '|' => self.consume()?.bar(),
            ':' => self.consume()?.colon(),
            ',' => self.consume()?.produce_op(Kind::Comma),
            '.' => self.consume()?.dot(),
            '=' => self.consume()?.equal(),
            '`' => self.consume()?.produce_op(Kind::Grave),
            '>' => self.consume()?.greater(),
            '#' => self.consume()?.hash(),
            '<' => self.consume()?.less(),
            '-' => self.consume()?.minus(),
            '+' => self.consume()?.plus(),
            '?' => self.consume()?.produce_op(Kind::Question),
            '%' => self.consume()?.rem(),
            ';' => self.consume()?.produce_op(Kind::Semi),
            '/' => self.consume()?.slash(),
            '*' => self.consume()?.star(),
            '~' => self.consume()?.produce_op(Kind::Tilde),
            '^' => self.consume()?.xor(),
            '0' => self.consume()?.int_with_base(),
            '1'..='9' => self.digits::<10>(),
            '"' => self.consume()?.string(),
            '\'' => self.consume()?.character(),
            '_' => self.identifier(),
            i if is_xid_start(i) => self.identifier(),
            e => {
                let err = Err(Error::unexpected_char(e, self.line(), self.col()));
                let _ = self.consume();
                err
            }
        }
    }
    /// Returns the current line
    pub fn line(&self) -> u32 {
-        self.start_loc.0
+        self.tail_loc.0
    }
    /// Returns the current column
    pub fn col(&self) -> u32 {
-        self.start_loc.1
+        self.tail_loc.1
    }
-    fn next(&mut self) -> LResult<char> {
+
-        let out = self.peek();
+    /// Returns the current token's lexeme
-        self.consume()?;
+    fn lexeme(&mut self) -> &'t str {
-        out
+        &self.text[self.tail..self.head]
    }
-    fn peek(&mut self) -> LResult<char> {
+
-        self.iter
+    /// Peeks the next character without advancing the lexer
-            .peek()
+    fn peek(&mut self) -> Option<char> {
-            .copied()
+        self.iter.peek().map(|(_, c)| *c)
            .ok_or(Error::end_of_file(self.line(), self.col()))
    }
-    fn produce(&mut self, kind: Kind, data: impl Into<TokenData>) -> LResult<Token> {
+
-        let loc = self.start_loc;
+    /// Advances the 'tail' (current position)
-        self.start_loc = self.current_loc;
+    fn advance_tail(&mut self) {
-        self.start = self.current;
+        let (idx, c) = self.iter.peek().copied().unwrap_or((self.text.len(), '\0'));
-        Ok(Token::new(kind, data, loc.0, loc.1))
+        let (line, col) = &mut self.head_loc;
-    }
+        let diff = idx - self.head;
-    fn produce_op(&mut self, kind: Kind) -> LResult<Token> {
+
-        self.produce(kind, ())
+        self.head = idx;
-    }
+        match c {
-    fn skip_whitespace(&mut self) -> &mut Self {
+            '\n' => {
        while let Ok(c) = self.peek() {
            if !c.is_whitespace() {
                break;
            }
            let _ = self.consume();
        }
        self.start = self.current;
        self.start_loc = self.current_loc;
        self
    }
    fn consume(&mut self) -> LResult<&mut Self> {
        self.current += 1;
        match self.iter.next() {
            Some('\n') => {
                let (line, col) = &mut self.current_loc;
                *line += 1;
                *col = 1;
            }
-            Some(_) => self.current_loc.1 += 1,
+            _ => *col += diff as u32,
            None => Err(Error::end_of_file(self.line(), self.col()))?,
        }
        Ok(self)
    }
 }
 /// Digraphs and trigraphs
 impl Lexer<'_> {
    fn amp(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('&') => self.consume()?.produce_op(Kind::AmpAmp),
            Ok('=') => self.consume()?.produce_op(Kind::AmpEq),
            _ => self.produce_op(Kind::Amp),
        }
    }
    fn bang(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('!') => self.consume()?.produce_op(Kind::BangBang),
            Ok('=') => self.consume()?.produce_op(Kind::BangEq),
            _ => self.produce_op(Kind::Bang),
        }
    }
    fn bar(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('|') => self.consume()?.produce_op(Kind::BarBar),
            Ok('=') => self.consume()?.produce_op(Kind::BarEq),
            _ => self.produce_op(Kind::Bar),
        }
    }
    fn colon(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok(':') => self.consume()?.produce_op(Kind::ColonColon),
            _ => self.produce_op(Kind::Colon),
        }
    }
    fn dot(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('.') => {
                if let Ok('=') = self.consume()?.peek() {
                    self.consume()?.produce_op(Kind::DotDotEq)
                } else {
                    self.produce_op(Kind::DotDot)
                }
            }
            _ => self.produce_op(Kind::Dot),
        }
    }
    fn equal(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::EqEq),
            Ok('>') => self.consume()?.produce_op(Kind::FatArrow),
            _ => self.produce_op(Kind::Eq),
        }
    }
    fn greater(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::GtEq),
            Ok('>') => {
                if let Ok('=') = self.consume()?.peek() {
                    self.consume()?.produce_op(Kind::GtGtEq)
                } else {
                    self.produce_op(Kind::GtGt)
                }
            }
            _ => self.produce_op(Kind::Gt),
        }
    }
    fn hash(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('!') => self.consume()?.hashbang(),
            _ => self.produce_op(Kind::Hash),
        }
    }
    fn hashbang(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('/' | '\'') => self.line_comment(),
            _ => self.produce_op(Kind::HashBang),
        }
    }
    fn less(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::LtEq),
            Ok('<') => {
                if let Ok('=') = self.consume()?.peek() {
                    self.consume()?.produce_op(Kind::LtLtEq)
                } else {
                    self.produce_op(Kind::LtLt)
                }
            }
            _ => self.produce_op(Kind::Lt),
        }
    }
    fn minus(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::MinusEq),
            Ok('>') => self.consume()?.produce_op(Kind::Arrow),
            _ => self.produce_op(Kind::Minus),
        }
    }
    fn plus(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::PlusEq),
            _ => self.produce_op(Kind::Plus),
        }
    }
    fn rem(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::RemEq),
            _ => self.produce_op(Kind::Rem),
        }
    }
    fn slash(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::SlashEq),
            Ok('/') => self.consume()?.line_comment(),
            Ok('*') => self.consume()?.block_comment(),
            _ => self.produce_op(Kind::Slash),
        }
    }
    fn star(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::StarEq),
            _ => self.produce_op(Kind::Star),
        }
    }
    fn xor(&mut self) -> LResult<Token> {
        match self.peek() {
            Ok('=') => self.consume()?.produce_op(Kind::XorEq),
            Ok('^') => self.consume()?.produce_op(Kind::XorXor),
            _ => self.produce_op(Kind::Xor),
        }
    }
    /// Takes the last-peeked character, or the next character if none peeked.
    pub fn take(&mut self) -> Option<char> {
        let (_, c) = self.iter.next()?;
        self.advance_tail();
        Some(c)
    }
    /// Takes the next char if it matches the `expected` char
    pub fn next_if(&mut self, expected: char) -> Option<char> {
        let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
        self.advance_tail();
        Some(c)
    }
    /// Consumes the last-peeked character, advancing the tail
    pub fn consume(&mut self) -> &mut Self {
        self.iter.next();
        self.advance_tail();
        self
    }
    /// Produces an [Error] at the start of the current token
    fn error(&self, reason: Reason) -> Error {
        Error { reason, line: self.line(), col: self.col() }
    }
    /// Produces a token with the current [lexeme](Lexer::lexeme) as its data
    fn produce(&mut self, kind: Kind) -> LResult<Token> {
        let lexeme = self.lexeme().to_owned();
        self.produce_with(kind, lexeme)
    }
    /// Produces a token with the provided `data`
    fn produce_with(&mut self, kind: Kind, data: impl Into<TokenData>) -> LResult<Token> {
        let loc = self.tail_loc;
        self.tail_loc = self.head_loc;
        self.tail = self.head;
        Ok(Token::new(kind, data, loc.0, loc.1))
    }
    /// Produces a token with no `data`
    fn produce_op(&mut self, kind: Kind) -> LResult<Token> {
        self.produce_with(kind, ())
    }
    /// Consumes 0 or more whitespace
    fn skip_whitespace(&mut self) -> &mut Self {
        while self.peek().is_some_and(char::is_whitespace) {
            let _ = self.consume();
        }
        self
    }
    /// Starts a new token
    fn start_token(&mut self) -> &mut Self {
        self.tail_loc = self.head_loc;
        self.tail = self.head;
        self
    }
    /// Scans through the text, searching for the next [Token]
    pub fn scan(&mut self) -> LResult<Token> {
        use TokenKind::*;
        // !"#%&'()*+,-./:;<=>?@[\\]^`{|}~
        let tok = match self
            .skip_whitespace()
            .start_token()
            .peek()
            .ok_or_else(|| self.error(Reason::EndOfFile))?
        {
            '!' => Bang,
            '"' => return self.string(),
            '#' => Hash,
            '%' => Rem,
            '&' => Amp,
            '\'' => return self.character(),
            '(' => LParen,
            ')' => RParen,
            '*' => Star,
            '+' => Plus,
            ',' => Comma,
            '-' => Minus,
            '.' => Dot,
            '/' => Slash,
            '0' => TokenKind::Literal,
            '1'..='9' => return self.digits::<10>(),
            ':' => Colon,
            ';' => Semi,
            '<' => Lt,
            '=' => Eq,
            '>' => Gt,
            '?' => Question,
            '@' => At,
            '[' => LBrack,
            '\\' => Backslash,
            ']' => RBrack,
            '^' => Xor,
            '`' => Grave,
            '{' => LCurly,
            '|' => Bar,
            '}' => RCurly,
            '~' => Tilde,
            '_' => return self.identifier(),
            c if is_xid_start(c) => return self.identifier(),
            e => {
                let err = Err(self.error(Reason::UnexpectedChar(e)));
                let _ = self.consume();
                err?
            }
        };
        // Handle digraphs
        let tok = match (tok, self.consume().peek()) {
            (Literal, Some('b')) => return self.consume().digits::<2>(),
            (Literal, Some('d')) => return self.consume().digits::<10>(),
            (Literal, Some('o')) => return self.consume().digits::<8>(),
            (Literal, Some('x')) => return self.consume().digits::<16>(),
            (Literal, Some('~')) => return self.consume().digits::<36>(),
            (Literal, _) => return self.digits::<10>(),
            (Amp, Some('&')) => AmpAmp,
            (Amp, Some('=')) => AmpEq,
            (Bang, Some('!')) => BangBang,
            (Bang, Some('=')) => BangEq,
            (Bar, Some('|')) => BarBar,
            (Bar, Some('=')) => BarEq,
            (Colon, Some(':')) => ColonColon,
            (Dot, Some('.')) => DotDot,
            (Eq, Some('=')) => EqEq,
            (Eq, Some('>')) => FatArrow,
            (Gt, Some('=')) => GtEq,
            (Gt, Some('>')) => GtGt,
            (Hash, Some('!')) => HashBang,
            (Lt, Some('=')) => LtEq,
            (Lt, Some('<')) => LtLt,
            (Minus, Some('=')) => MinusEq,
            (Minus, Some('>')) => Arrow,
            (Plus, Some('=')) => PlusEq,
            (Rem, Some('=')) => RemEq,
            (Slash, Some('*')) => return self.block_comment()?.produce(Kind::Comment),
            (Slash, Some('/')) => return self.line_comment(),
            (Slash, Some('=')) => SlashEq,
            (Star, Some('=')) => StarEq,
            (Xor, Some('=')) => XorEq,
            (Xor, Some('^')) => XorXor,
            _ => return self.produce_op(tok),
        };
        // Handle trigraphs
        let tok = match (tok, self.consume().peek()) {
            (HashBang, Some('/')) => return self.line_comment(),
            (DotDot, Some('=')) => DotDotEq,
            (GtGt, Some('=')) => GtGtEq,
            (LtLt, Some('=')) => LtLtEq,
            _ => return self.produce_op(tok),
        };
        self.consume().produce_op(tok)
    }
 }
 /// Comments
 impl Lexer<'_> {
    /// Consumes until the next newline '\n', producing a [Comment](Kind::Comment)
    fn line_comment(&mut self) -> LResult<Token> {
-        let mut comment = String::new();
+        while self.consume().peek().is_some_and(|c| c != '\n') {}
-        while Ok('\n') != self.peek() {
+        self.produce(Kind::Comment)
            comment.push(self.next()?);
    }
-        self.produce(Kind::Comment, comment)
+
    /// Consumes nested block-comments. Does not produce by itself.
    fn block_comment(&mut self) -> LResult<&mut Self> {
        self.consume();
        while let Some(c) = self.take() {
            match (c, self.peek()) {
                ('/', Some('*')) => self.block_comment()?,
                ('*', Some('/')) => return Ok(self.consume()),
                _ => continue,
            };
        }
-    fn block_comment(&mut self) -> LResult<Token> {
+        Err(self.error(Reason::UnmatchedDelimiters('/')))
        let mut comment = String::new();
        while let Ok(c) = self.next() {
            if '*' == c && Ok('/') == self.peek() {
                break;
            }
            comment.push(c);
        }
        self.consume()?.produce(Kind::Comment, comment)
    }
 }
 /// Identifiers
 impl Lexer<'_> {
    /// Produces an [Identifier](Kind::Identifier) or keyword
    fn identifier(&mut self) -> LResult<Token> {
-        let mut out = String::from(self.xid_start()?);
+        while self.consume().peek().is_some_and(is_xid_continue) {}
-        while let Ok(c) = self.xid_continue() {
+        if let Ok(keyword) = Kind::from_str(self.lexeme()) {
-            out.push(c)
+            self.produce_with(keyword, ())
        }
        if let Ok(keyword) = Kind::from_str(&out) {
            self.produce(keyword, ())
        } else {
-            self.produce(Kind::Identifier, TokenData::String(out))
+            self.produce(Kind::Identifier)
        }
    }
    fn xid_start(&mut self) -> LResult<char> {
        match self.peek()? {
            xid if xid == '_' || is_xid_start(xid) => {
                self.consume()?;
                Ok(xid)
            }
            bad => Err(Error::not_identifier(bad, self.line(), self.col())),
        }
    }
    fn xid_continue(&mut self) -> LResult<char> {
        match self.peek()? {
            xid if is_xid_continue(xid) => {
                self.consume()?;
                Ok(xid)
            }
            bad => Err(Error::not_identifier(bad, self.line(), self.col())),
        }
    }
 }
 /// Integers
 impl Lexer<'_> {
-    fn int_with_base(&mut self) -> LResult<Token> {
+    /// Produces a [Literal](Kind::Literal) with an integer or float value.
        match self.peek() {
            Ok('~') => self.consume()?.digits::<36>(),
            Ok('x') => self.consume()?.digits::<16>(),
            Ok('d') => self.consume()?.digits::<10>(),
            Ok('o') => self.consume()?.digits::<8>(),
            Ok('b') => self.consume()?.digits::<2>(),
            Ok('0'..='9' | '.') => self.digits::<10>(),
            _ => self.produce(Kind::Literal, 0),
        }
    }
    fn digits<const B: u32>(&mut self) -> LResult<Token> {
        let mut value = 0;
-        while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
+        while let Some(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
            value = value * B as u128 + self.digit::<B>()? as u128;
        }
        // TODO: find a better way to handle floats in the tokenizer
        match self.peek() {
-            Ok('.') => {
+            Some('.') => {
                // FIXME: hack: 0.. is not [0.0, '.']
-                if let Ok('.') = self.clone().consume()?.next() {
+                if let Some('.') = self.clone().consume().take() {
-                    return self.produce(Kind::Literal, value);
+                    return self.produce_with(Kind::Literal, value);
                }
                let mut float = format!("{value}.");
-                self.consume()?;
+                self.consume();
-                while let Ok(true) = self.peek().as_ref().map(char::is_ascii_digit) {
+                while let Some(true) = self.peek().as_ref().map(char::is_ascii_digit) {
-                    float.push(self.iter.next().unwrap_or_default());
+                    float.push(self.iter.next().map(|(_, c)| c).unwrap_or_default());
                }
                let float = f64::from_str(&float).expect("must be parsable as float");
-                self.produce(Kind::Literal, float)
+                self.produce_with(Kind::Literal, float)
            }
-            _ => self.produce(Kind::Literal, value),
+            _ => self.produce_with(Kind::Literal, value),
        }
    }
    /// Consumes a single digit of base [B](Lexer::digit)
    fn digit<const B: u32>(&mut self) -> LResult<u32> {
-        let digit = self.peek()?;
+        let digit = self.take().ok_or_else(|| self.error(Reason::EndOfFile))?;
        self.consume()?;
        digit
            .to_digit(B)
-            .ok_or(Error::invalid_digit(digit, self.line(), self.col()))
+            .ok_or_else(|| self.error(Reason::InvalidDigit(digit)))
    }
 }
 /// Strings and characters
 impl Lexer<'_> {
-    fn string(&mut self) -> LResult<Token> {
+    /// Produces a [Literal](Kind::Literal) with a pre-escaped [String]
-        let mut value = String::new();
+    pub fn string(&mut self) -> Result<Token, Error> {
-        while '"'
+        let mut lexeme = String::new();
-            != self
+        self.consume();
-                .peek()
+        loop {
-                .map_err(|e| e.mask_reason(Reason::UnmatchedDelimiters('"')))?
+            lexeme.push(match self.take() {
-        {
+                None => Err(self.error(Reason::UnmatchedDelimiters('"')))?,
-            value.push(self.unescape()?)
+                Some('\\') => self.unescape()?,
                Some('"') => break,
                Some(c) => c,
            })
        }
-        self.consume()?.produce(Kind::Literal, value)
+        lexeme.shrink_to_fit();
        self.produce_with(Kind::Literal, lexeme)
    }
-    fn character(&mut self) -> LResult<Token> {
+
-        let out = self.unescape()?;
+    /// Produces a [Literal](Kind::Literal) with a pre-escaped [char]
-        match self.peek()? {
+    fn character(&mut self) -> Result<Token, Error> {
-            '\'' => self.consume()?.produce(Kind::Literal, out),
+        let c = match self.consume().take() {
-            _ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())),
+            Some('\\') => self.unescape()?,
            Some(c) => c,
            None => '\0',
        };
        if self.take().is_some_and(|c| c == '\'') {
            self.produce_with(Kind::Literal, c)
        } else {
            Err(self.error(Reason::UnmatchedDelimiters('\'')))
        }
    }
-    /// Unescape a single character
+
    /// Unescapes a single character
    #[rustfmt::skip]
    fn unescape(&mut self) -> LResult<char> {
-        match self.next() {
+        Ok(match self.take().ok_or_else(|| self.error(Reason::EndOfFile))? {
-            Ok('\\') => (),
+            ' ' => '\u{a0}',
-            other => return other,
+            '0' => '\0',
        }
        Ok(match self.next()? {
            'a' => '\x07',
            'b' => '\x08',
            'e' => '\x1b',
            'f' => '\x0c',
            'n' => '\n',
            'r' => '\r',
            't' => '\t',
            'x' => self.hex_escape()?,
            'u' => self.unicode_escape()?,
-            '0' => '\0',
+            'x' => self.hex_escape()?,
            chr => chr,
        })
    }
-    /// unescape a single 2-digit hex escape
+    /// Unescapes a single 2-digit hex escape
    fn hex_escape(&mut self) -> LResult<char> {
        let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
-        char::from_u32(out).ok_or(Error::bad_unicode(out, self.line(), self.col()))
+        char::from_u32(out).ok_or_else(|| self.error(Reason::BadUnicode(out)))
    }
-    /// unescape a single \u{} unicode escape
+
-    fn unicode_escape(&mut self) -> LResult<char> {
+    /// Unescapes a single \u{} unicode escape
    pub fn unicode_escape(&mut self) -> Result<char, Error> {
        self.next_if('{')
            .ok_or_else(|| self.error(Reason::InvalidEscape('u')))?;
        let mut out = 0;
-        let Ok('{') = self.peek() else {
+        while let Some(c) = self.take() {
-            return Err(Error::invalid_escape('u', self.line(), self.col()));
+            if c == '}' {
-        };
+                return char::from_u32(out).ok_or_else(|| self.error(Reason::BadUnicode(out)));
        self.consume()?;
        while let Ok(c) = self.peek() {
            match c {
                '}' => {
                    self.consume()?;
                    return char::from_u32(out).ok_or(Error::bad_unicode(
                        out,
                        self.line(),
                        self.col(),
                    ));
            }
-                _ => out = (out << 4) + self.digit::<16>()?,
+            out = out * 16
                + c.to_digit(16)
                    .ok_or_else(|| self.error(Reason::InvalidDigit(c)))?;
        }
-        }
+        Err(self.error(Reason::UnmatchedDelimiters('}')))
        Err(Error::invalid_escape('u', self.line(), self.col()))
    }
 }
@ -508,8 +469,6 @@ pub mod error {
        UnmatchedDelimiters(char),
        /// Found a character that doesn't belong to any [TokenKind](cl_token::TokenKind)
        UnexpectedChar(char),
        /// Found a character that's not valid in identifiers while looking for an identifier
        NotIdentifier(char),
        /// Found a character that's not valid in an escape sequence while looking for an escape
        /// sequence
        UnknownEscape(char),
@ -517,30 +476,12 @@ pub mod error {
        InvalidEscape(char),
        /// Character is not a valid digit in the requested base
        InvalidDigit(char),
        /// Base conversion requested, but the base character was not in the set of known
        /// characters
        UnknownBase(char),
        /// Unicode escape does not map to a valid unicode code-point
        BadUnicode(u32),
        /// Reached end of input
        EndOfFile,
    }
    error_impl! {
        unmatched_delimiters(c: char) => Reason::UnmatchedDelimiters(c),
        unexpected_char(c: char) => Reason::UnexpectedChar(c),
        not_identifier(c: char) => Reason::NotIdentifier(c),
        unknown_escape(e: char) => Reason::UnknownEscape(e),
        invalid_escape(e: char) => Reason::InvalidEscape(e),
        invalid_digit(digit: char) => Reason::InvalidDigit(digit),
        unknown_base(base: char) => Reason::UnknownBase(base),
        bad_unicode(value: u32) => Reason::BadUnicode(value),
        end_of_file => Reason::EndOfFile,
    }
    impl Error {
        /// Changes the [Reason] of this error
        pub(super) fn mask_reason(self, reason: Reason) -> Self {
            Self { reason, ..self }
        }
        /// Returns the [Reason] for this error
        pub fn reason(&self) -> &Reason {
            &self.reason
@ -550,14 +491,6 @@ pub mod error {
            (self.line, self.col)
        }
    }
    macro error_impl ($($fn:ident$(( $($p:ident: $t:ty),* ))? => $reason:expr),*$(,)?) {
        #[allow(dead_code)]
        impl Error {
            $(pub(super) fn $fn ($($($p: $t),*,)? line: u32, col: u32) -> Self {
                Self { reason: $reason, line, col }
            })*
        }
    }
    impl std::error::Error for Error {}
    impl Display for Error {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@ -567,14 +500,12 @@ pub mod error {
    impl Display for Reason {
        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            match self {
-                Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c}` in input"},
+                Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c:?}` in input"},
-                Reason::UnexpectedChar(c) => write!(f, "Character `{c}` not expected"),
+                Reason::UnexpectedChar(c) => write!(f, "Character `{c:?}` not expected"),
                Reason::NotIdentifier(c) => write!(f, "Character `{c}` not valid in identifiers"),
                Reason::UnknownEscape(c) => write!(f, "`\\{c}` is not a known escape sequence"),
                Reason::InvalidEscape(c) => write!(f, "Escape sequence `\\{c}`... is malformed"),
-                Reason::InvalidDigit(c) => write!(f, "`{c}` is not a valid digit"),
+                Reason::InvalidDigit(c) => write!(f, "`{c:?}` is not a valid digit"),
-                Reason::UnknownBase(c) => write!(f, "`0{c}`... is not a valid base"),
+                Reason::BadUnicode(c) => write!(f, "`\\u{{{c:x}}}` is not valid unicode"),
                Reason::BadUnicode(c) => write!(f, "`{c}` is not a valid unicode code-point"),
                Reason::EndOfFile => write!(f, "Reached end of input"),
            }
        }