lexer: Document stuff
This commit is contained in:
57
src/lexer.rs
57
src/lexer.rs
@@ -43,6 +43,7 @@ impl<'t> Lexer<'t> {
|
||||
self.iter.peek().map(|&(_, c)| c)
|
||||
}
|
||||
|
||||
/// Advances the tail to the current character index
|
||||
fn advance_tail(&mut self) {
|
||||
match self.iter.peek() {
|
||||
Some(&(idx, _)) => self.tail = idx as u32,
|
||||
@@ -51,44 +52,45 @@ impl<'t> Lexer<'t> {
|
||||
}
|
||||
|
||||
/// Takes the last character
|
||||
pub fn take(&mut self) -> Option<char> {
|
||||
fn take(&mut self) -> Option<char> {
|
||||
let (_, c) = self.iter.next()?;
|
||||
self.advance_tail();
|
||||
Some(c)
|
||||
}
|
||||
|
||||
pub fn next_if(&mut self, expected: char) -> Option<char> {
|
||||
fn next_if(&mut self, expected: char) -> Option<char> {
|
||||
let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
|
||||
self.advance_tail();
|
||||
Some(c)
|
||||
}
|
||||
|
||||
/// Consumes the last-peeked character, advancing the tail
|
||||
pub fn consume(&mut self) -> &mut Self {
|
||||
fn consume(&mut self) -> &mut Self {
|
||||
self.iter.next();
|
||||
self.advance_tail();
|
||||
self
|
||||
}
|
||||
|
||||
/// Produces a LexError at the start of the current token
|
||||
pub fn error(&self, res: &'static str) -> LexError {
|
||||
fn error(&self, res: &'static str) -> LexError {
|
||||
LexError { pos: self.head, res }
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> (&'t str, Span) {
|
||||
/// Gets the Lexer's current &[str] lexeme and [Span]
|
||||
fn as_str(&self) -> (&'t str, Span) {
|
||||
let span = Span(self.head, self.tail);
|
||||
(&self.text[Range::from(span)], span)
|
||||
}
|
||||
|
||||
/// Produces a Token
|
||||
pub fn produce(&mut self, kind: TKind) -> Token {
|
||||
fn produce(&mut self, kind: TKind) -> Token {
|
||||
self.advance_tail();
|
||||
let (lexeme, span) = self.as_str();
|
||||
self.head = self.tail;
|
||||
Token { lexeme: Lexeme::String(lexeme.to_owned()), kind, span }
|
||||
}
|
||||
|
||||
pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
|
||||
fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
|
||||
self.advance_tail();
|
||||
let span = Span(self.head, self.tail);
|
||||
self.head = self.tail;
|
||||
@@ -96,14 +98,14 @@ impl<'t> Lexer<'t> {
|
||||
}
|
||||
|
||||
/// Consumes 0 or more whitespace
|
||||
pub fn skip_whitespace(&mut self) -> &mut Self {
|
||||
fn skip_whitespace(&mut self) -> &mut Self {
|
||||
while self.peek().is_some_and(char::is_whitespace) {
|
||||
let _ = self.consume();
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn start_token(&mut self) -> &mut Self {
|
||||
fn start_token(&mut self) -> &mut Self {
|
||||
self.head = self.tail;
|
||||
self
|
||||
}
|
||||
@@ -203,6 +205,7 @@ impl<'t> Lexer<'t> {
|
||||
Ok(self.consume().produce(tok))
|
||||
}
|
||||
|
||||
/// Elides the trailing [Token] `kind` when it comes before a list terminator.
|
||||
pub fn trailing(&mut self, kind: TKind) -> Result<Token, LexError> {
|
||||
Ok(match self.skip_whitespace().peek() {
|
||||
// Some(')') => self.consume().produce(TKind::RParen), // maybe.
|
||||
@@ -212,11 +215,14 @@ impl<'t> Lexer<'t> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Consumes characters until the lexer reaches a newline `'\n'`
|
||||
pub fn line_comment(&mut self) -> Result<Token, LexError> {
|
||||
while self.consume().peek().is_some_and(|c| c != '\n') {}
|
||||
Ok(self.produce(TKind::Comment))
|
||||
}
|
||||
|
||||
/// Consumes characters until the lexer reaches the end of a *nested* block comment.
|
||||
/// This allows you to arbitrarily comment out code, even if that code has a block comment.
|
||||
pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
|
||||
self.consume();
|
||||
while let Some(c) = self.take() {
|
||||
@@ -229,6 +235,11 @@ impl<'t> Lexer<'t> {
|
||||
Err(self.error("Unterminated block comment"))
|
||||
}
|
||||
|
||||
/// Consumes characters until it reaches a character not in [is_xid_continue].
|
||||
///
|
||||
/// Always consumes the first character.
|
||||
///
|
||||
/// Maps the result to either a [TKind::Identifier] or a [TKind] keyword.
|
||||
pub fn identifier(&mut self) -> Result<Token, LexError> {
|
||||
while self.consume().peek().is_some_and(is_xid_continue) {}
|
||||
let (lexeme, _span) = self.as_str();
|
||||
@@ -265,6 +276,7 @@ impl<'t> Lexer<'t> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Eagerly parses a character literal starting at the current lexer position.
|
||||
pub fn character(&mut self) -> Result<Token, LexError> {
|
||||
let c = match self.consume().take() {
|
||||
Some('\\') => self.escape()?,
|
||||
@@ -278,6 +290,7 @@ impl<'t> Lexer<'t> {
|
||||
}
|
||||
}
|
||||
|
||||
// Eagerly parses a string literal starting at the current lexer position.
|
||||
pub fn string(&mut self) -> Result<Token, LexError> {
|
||||
let mut lexeme = String::new();
|
||||
self.consume();
|
||||
@@ -293,28 +306,31 @@ impl<'t> Lexer<'t> {
|
||||
Ok(self.produce_with_lexeme(TKind::String, Lexeme::String(lexeme)))
|
||||
}
|
||||
|
||||
/// Parses a single escape sequence into its resulting char value.
|
||||
pub fn escape(&mut self) -> Result<char, LexError> {
|
||||
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
|
||||
' ' => '\u{a0}',
|
||||
'0' => '\0',
|
||||
'a' => '\x07',
|
||||
'b' => '\x08',
|
||||
'e' => '\x1b',
|
||||
'f' => '\x0c',
|
||||
'n' => '\n',
|
||||
'r' => '\r',
|
||||
't' => '\t',
|
||||
' ' => '\u{a0}', // Non-breaking space
|
||||
'0' => '\0', // C0 Null Character
|
||||
'a' => '\x07', // C0 Acknowledge
|
||||
'b' => '\x08', // C0 Bell
|
||||
'e' => '\x1b', // C0 Escape
|
||||
'f' => '\x0c', // Form Feed
|
||||
'n' => '\n', // New Line
|
||||
'r' => '\r', // Carriage Return
|
||||
't' => '\t', // Tab
|
||||
'u' => self.unicode_escape()?,
|
||||
'x' => self.hex_escape()?,
|
||||
c => c,
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses two hex-digits and constructs a [char] out of them.
|
||||
pub fn hex_escape(&mut self) -> Result<char, LexError> {
|
||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||
char::from_u32(out).ok_or(self.error("Invalid digit"))
|
||||
}
|
||||
|
||||
/// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them.
|
||||
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
|
||||
self.next_if('{')
|
||||
.ok_or_else(|| self.error("No unicode escape opener"))?;
|
||||
@@ -328,6 +344,10 @@ impl<'t> Lexer<'t> {
|
||||
Err(self.error("Unterminated unicode escape"))
|
||||
}
|
||||
|
||||
/// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36.
|
||||
///
|
||||
/// If the sequence of digits exceeds the bounds of a [u128], the resulting number will wrap
|
||||
/// around 2^128.
|
||||
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
|
||||
let mut int: u128 = 0;
|
||||
while let Some(c) = self.peek() {
|
||||
@@ -342,6 +362,7 @@ impl<'t> Lexer<'t> {
|
||||
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
|
||||
}
|
||||
|
||||
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36
|
||||
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
|
||||
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
|
||||
Ok(digit)
|
||||
|
||||
Reference in New Issue
Block a user