lexer: Document stuff
This commit is contained in:
		
							
								
								
									
										57
									
								
								src/lexer.rs
									
									
									
									
									
								
							
							
						
						
									
										57
									
								
								src/lexer.rs
									
									
									
									
									
								
							@@ -43,6 +43,7 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        self.iter.peek().map(|&(_, c)| c)
 | 
					        self.iter.peek().map(|&(_, c)| c)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Advances the tail to the current character index
 | 
				
			||||||
    fn advance_tail(&mut self) {
 | 
					    fn advance_tail(&mut self) {
 | 
				
			||||||
        match self.iter.peek() {
 | 
					        match self.iter.peek() {
 | 
				
			||||||
            Some(&(idx, _)) => self.tail = idx as u32,
 | 
					            Some(&(idx, _)) => self.tail = idx as u32,
 | 
				
			||||||
@@ -51,44 +52,45 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Takes the last character
 | 
					    /// Takes the last character
 | 
				
			||||||
    pub fn take(&mut self) -> Option<char> {
 | 
					    fn take(&mut self) -> Option<char> {
 | 
				
			||||||
        let (_, c) = self.iter.next()?;
 | 
					        let (_, c) = self.iter.next()?;
 | 
				
			||||||
        self.advance_tail();
 | 
					        self.advance_tail();
 | 
				
			||||||
        Some(c)
 | 
					        Some(c)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn next_if(&mut self, expected: char) -> Option<char> {
 | 
					    fn next_if(&mut self, expected: char) -> Option<char> {
 | 
				
			||||||
        let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
 | 
					        let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
 | 
				
			||||||
        self.advance_tail();
 | 
					        self.advance_tail();
 | 
				
			||||||
        Some(c)
 | 
					        Some(c)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Consumes the last-peeked character, advancing the tail
 | 
					    /// Consumes the last-peeked character, advancing the tail
 | 
				
			||||||
    pub fn consume(&mut self) -> &mut Self {
 | 
					    fn consume(&mut self) -> &mut Self {
 | 
				
			||||||
        self.iter.next();
 | 
					        self.iter.next();
 | 
				
			||||||
        self.advance_tail();
 | 
					        self.advance_tail();
 | 
				
			||||||
        self
 | 
					        self
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Produces a LexError at the start of the current token
 | 
					    /// Produces a LexError at the start of the current token
 | 
				
			||||||
    pub fn error(&self, res: &'static str) -> LexError {
 | 
					    fn error(&self, res: &'static str) -> LexError {
 | 
				
			||||||
        LexError { pos: self.head, res }
 | 
					        LexError { pos: self.head, res }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn as_str(&self) -> (&'t str, Span) {
 | 
					    /// Gets the Lexer's current &[str] lexeme and [Span]
 | 
				
			||||||
 | 
					    fn as_str(&self) -> (&'t str, Span) {
 | 
				
			||||||
        let span = Span(self.head, self.tail);
 | 
					        let span = Span(self.head, self.tail);
 | 
				
			||||||
        (&self.text[Range::from(span)], span)
 | 
					        (&self.text[Range::from(span)], span)
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Produces a Token
 | 
					    /// Produces a Token
 | 
				
			||||||
    pub fn produce(&mut self, kind: TKind) -> Token {
 | 
					    fn produce(&mut self, kind: TKind) -> Token {
 | 
				
			||||||
        self.advance_tail();
 | 
					        self.advance_tail();
 | 
				
			||||||
        let (lexeme, span) = self.as_str();
 | 
					        let (lexeme, span) = self.as_str();
 | 
				
			||||||
        self.head = self.tail;
 | 
					        self.head = self.tail;
 | 
				
			||||||
        Token { lexeme: Lexeme::String(lexeme.to_owned()), kind, span }
 | 
					        Token { lexeme: Lexeme::String(lexeme.to_owned()), kind, span }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
 | 
					    fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
 | 
				
			||||||
        self.advance_tail();
 | 
					        self.advance_tail();
 | 
				
			||||||
        let span = Span(self.head, self.tail);
 | 
					        let span = Span(self.head, self.tail);
 | 
				
			||||||
        self.head = self.tail;
 | 
					        self.head = self.tail;
 | 
				
			||||||
@@ -96,14 +98,14 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Consumes 0 or more whitespace
 | 
					    /// Consumes 0 or more whitespace
 | 
				
			||||||
    pub fn skip_whitespace(&mut self) -> &mut Self {
 | 
					    fn skip_whitespace(&mut self) -> &mut Self {
 | 
				
			||||||
        while self.peek().is_some_and(char::is_whitespace) {
 | 
					        while self.peek().is_some_and(char::is_whitespace) {
 | 
				
			||||||
            let _ = self.consume();
 | 
					            let _ = self.consume();
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        self
 | 
					        self
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn start_token(&mut self) -> &mut Self {
 | 
					    fn start_token(&mut self) -> &mut Self {
 | 
				
			||||||
        self.head = self.tail;
 | 
					        self.head = self.tail;
 | 
				
			||||||
        self
 | 
					        self
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -203,6 +205,7 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        Ok(self.consume().produce(tok))
 | 
					        Ok(self.consume().produce(tok))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Elides the trailing [Token] `kind` when it comes before a list terminator.
 | 
				
			||||||
    pub fn trailing(&mut self, kind: TKind) -> Result<Token, LexError> {
 | 
					    pub fn trailing(&mut self, kind: TKind) -> Result<Token, LexError> {
 | 
				
			||||||
        Ok(match self.skip_whitespace().peek() {
 | 
					        Ok(match self.skip_whitespace().peek() {
 | 
				
			||||||
            // Some(')') => self.consume().produce(TKind::RParen), // maybe.
 | 
					            // Some(')') => self.consume().produce(TKind::RParen), // maybe.
 | 
				
			||||||
@@ -212,11 +215,14 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        })
 | 
					        })
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Consumes characters until the lexer reaches a newline `'\n'`
 | 
				
			||||||
    pub fn line_comment(&mut self) -> Result<Token, LexError> {
 | 
					    pub fn line_comment(&mut self) -> Result<Token, LexError> {
 | 
				
			||||||
        while self.consume().peek().is_some_and(|c| c != '\n') {}
 | 
					        while self.consume().peek().is_some_and(|c| c != '\n') {}
 | 
				
			||||||
        Ok(self.produce(TKind::Comment))
 | 
					        Ok(self.produce(TKind::Comment))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Consumes characters until the lexer reaches the end of a *nested* block comment.
 | 
				
			||||||
 | 
					    /// This allows you to arbitrarily comment out code, even if that code has a block comment.
 | 
				
			||||||
    pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
 | 
					    pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
 | 
				
			||||||
        self.consume();
 | 
					        self.consume();
 | 
				
			||||||
        while let Some(c) = self.take() {
 | 
					        while let Some(c) = self.take() {
 | 
				
			||||||
@@ -229,6 +235,11 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        Err(self.error("Unterminated block comment"))
 | 
					        Err(self.error("Unterminated block comment"))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Consumes characters until it reaches a character not in [is_xid_continue].
 | 
				
			||||||
 | 
					    ///
 | 
				
			||||||
 | 
					    /// Always consumes the first character.
 | 
				
			||||||
 | 
					    ///
 | 
				
			||||||
 | 
					    /// Maps the result to either a [TKind::Identifier] or a [TKind] keyword.
 | 
				
			||||||
    pub fn identifier(&mut self) -> Result<Token, LexError> {
 | 
					    pub fn identifier(&mut self) -> Result<Token, LexError> {
 | 
				
			||||||
        while self.consume().peek().is_some_and(is_xid_continue) {}
 | 
					        while self.consume().peek().is_some_and(is_xid_continue) {}
 | 
				
			||||||
        let (lexeme, _span) = self.as_str();
 | 
					        let (lexeme, _span) = self.as_str();
 | 
				
			||||||
@@ -265,6 +276,7 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        })
 | 
					        })
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Eagerly parses a character literal starting at the current lexer position.
 | 
				
			||||||
    pub fn character(&mut self) -> Result<Token, LexError> {
 | 
					    pub fn character(&mut self) -> Result<Token, LexError> {
 | 
				
			||||||
        let c = match self.consume().take() {
 | 
					        let c = match self.consume().take() {
 | 
				
			||||||
            Some('\\') => self.escape()?,
 | 
					            Some('\\') => self.escape()?,
 | 
				
			||||||
@@ -278,6 +290,7 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Eagerly parses a string literal starting at the current lexer position.
 | 
				
			||||||
    pub fn string(&mut self) -> Result<Token, LexError> {
 | 
					    pub fn string(&mut self) -> Result<Token, LexError> {
 | 
				
			||||||
        let mut lexeme = String::new();
 | 
					        let mut lexeme = String::new();
 | 
				
			||||||
        self.consume();
 | 
					        self.consume();
 | 
				
			||||||
@@ -293,28 +306,31 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        Ok(self.produce_with_lexeme(TKind::String, Lexeme::String(lexeme)))
 | 
					        Ok(self.produce_with_lexeme(TKind::String, Lexeme::String(lexeme)))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Parses a single escape sequence into its resulting char value.
 | 
				
			||||||
    pub fn escape(&mut self) -> Result<char, LexError> {
 | 
					    pub fn escape(&mut self) -> Result<char, LexError> {
 | 
				
			||||||
        Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
 | 
					        Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
 | 
				
			||||||
            ' ' => '\u{a0}',
 | 
					            ' ' => '\u{a0}', // Non-breaking space
 | 
				
			||||||
            '0' => '\0',
 | 
					            '0' => '\0',     // C0 Null Character
 | 
				
			||||||
            'a' => '\x07',
 | 
					            'a' => '\x07',   // C0 Acknowledge
 | 
				
			||||||
            'b' => '\x08',
 | 
					            'b' => '\x08',   // C0 Bell
 | 
				
			||||||
            'e' => '\x1b',
 | 
					            'e' => '\x1b',   // C0 Escape
 | 
				
			||||||
            'f' => '\x0c',
 | 
					            'f' => '\x0c',   // Form Feed
 | 
				
			||||||
            'n' => '\n',
 | 
					            'n' => '\n',     // New Line
 | 
				
			||||||
            'r' => '\r',
 | 
					            'r' => '\r',     // Carriage Return
 | 
				
			||||||
            't' => '\t',
 | 
					            't' => '\t',     // Tab
 | 
				
			||||||
            'u' => self.unicode_escape()?,
 | 
					            'u' => self.unicode_escape()?,
 | 
				
			||||||
            'x' => self.hex_escape()?,
 | 
					            'x' => self.hex_escape()?,
 | 
				
			||||||
            c => c,
 | 
					            c => c,
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Parses two hex-digits and constructs a [char] out of them.
 | 
				
			||||||
    pub fn hex_escape(&mut self) -> Result<char, LexError> {
 | 
					    pub fn hex_escape(&mut self) -> Result<char, LexError> {
 | 
				
			||||||
        let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
 | 
					        let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
 | 
				
			||||||
        char::from_u32(out).ok_or(self.error("Invalid digit"))
 | 
					        char::from_u32(out).ok_or(self.error("Invalid digit"))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them.
 | 
				
			||||||
    pub fn unicode_escape(&mut self) -> Result<char, LexError> {
 | 
					    pub fn unicode_escape(&mut self) -> Result<char, LexError> {
 | 
				
			||||||
        self.next_if('{')
 | 
					        self.next_if('{')
 | 
				
			||||||
            .ok_or_else(|| self.error("No unicode escape opener"))?;
 | 
					            .ok_or_else(|| self.error("No unicode escape opener"))?;
 | 
				
			||||||
@@ -328,6 +344,10 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        Err(self.error("Unterminated unicode escape"))
 | 
					        Err(self.error("Unterminated unicode escape"))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36.
 | 
				
			||||||
 | 
					    ///
 | 
				
			||||||
 | 
					    /// If the sequence of digits exceeds the bounds of a [u128], the resulting number will wrap
 | 
				
			||||||
 | 
					    /// around 2^128.
 | 
				
			||||||
    pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
 | 
					    pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
 | 
				
			||||||
        let mut int: u128 = 0;
 | 
					        let mut int: u128 = 0;
 | 
				
			||||||
        while let Some(c) = self.peek() {
 | 
					        while let Some(c) = self.peek() {
 | 
				
			||||||
@@ -342,6 +362,7 @@ impl<'t> Lexer<'t> {
 | 
				
			|||||||
        Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
 | 
					        Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36
 | 
				
			||||||
    pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
 | 
					    pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
 | 
				
			||||||
        if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
 | 
					        if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
 | 
				
			||||||
            Ok(digit)
 | 
					            Ok(digit)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user