lexer: Document stuff
This commit is contained in:
57
src/lexer.rs
57
src/lexer.rs
@@ -43,6 +43,7 @@ impl<'t> Lexer<'t> {
|
|||||||
self.iter.peek().map(|&(_, c)| c)
|
self.iter.peek().map(|&(_, c)| c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Advances the tail to the current character index
|
||||||
fn advance_tail(&mut self) {
|
fn advance_tail(&mut self) {
|
||||||
match self.iter.peek() {
|
match self.iter.peek() {
|
||||||
Some(&(idx, _)) => self.tail = idx as u32,
|
Some(&(idx, _)) => self.tail = idx as u32,
|
||||||
@@ -51,44 +52,45 @@ impl<'t> Lexer<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Takes the last character
|
/// Takes the last character
|
||||||
pub fn take(&mut self) -> Option<char> {
|
fn take(&mut self) -> Option<char> {
|
||||||
let (_, c) = self.iter.next()?;
|
let (_, c) = self.iter.next()?;
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
Some(c)
|
Some(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn next_if(&mut self, expected: char) -> Option<char> {
|
fn next_if(&mut self, expected: char) -> Option<char> {
|
||||||
let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
|
let (_, c) = self.iter.next_if(|&(_, c)| c == expected)?;
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
Some(c)
|
Some(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consumes the last-peeked character, advancing the tail
|
/// Consumes the last-peeked character, advancing the tail
|
||||||
pub fn consume(&mut self) -> &mut Self {
|
fn consume(&mut self) -> &mut Self {
|
||||||
self.iter.next();
|
self.iter.next();
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produces a LexError at the start of the current token
|
/// Produces a LexError at the start of the current token
|
||||||
pub fn error(&self, res: &'static str) -> LexError {
|
fn error(&self, res: &'static str) -> LexError {
|
||||||
LexError { pos: self.head, res }
|
LexError { pos: self.head, res }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_str(&self) -> (&'t str, Span) {
|
/// Gets the Lexer's current &[str] lexeme and [Span]
|
||||||
|
fn as_str(&self) -> (&'t str, Span) {
|
||||||
let span = Span(self.head, self.tail);
|
let span = Span(self.head, self.tail);
|
||||||
(&self.text[Range::from(span)], span)
|
(&self.text[Range::from(span)], span)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produces a Token
|
/// Produces a Token
|
||||||
pub fn produce(&mut self, kind: TKind) -> Token {
|
fn produce(&mut self, kind: TKind) -> Token {
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
let (lexeme, span) = self.as_str();
|
let (lexeme, span) = self.as_str();
|
||||||
self.head = self.tail;
|
self.head = self.tail;
|
||||||
Token { lexeme: Lexeme::String(lexeme.to_owned()), kind, span }
|
Token { lexeme: Lexeme::String(lexeme.to_owned()), kind, span }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
|
fn produce_with_lexeme(&mut self, kind: TKind, lexeme: Lexeme) -> Token {
|
||||||
self.advance_tail();
|
self.advance_tail();
|
||||||
let span = Span(self.head, self.tail);
|
let span = Span(self.head, self.tail);
|
||||||
self.head = self.tail;
|
self.head = self.tail;
|
||||||
@@ -96,14 +98,14 @@ impl<'t> Lexer<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Consumes 0 or more whitespace
|
/// Consumes 0 or more whitespace
|
||||||
pub fn skip_whitespace(&mut self) -> &mut Self {
|
fn skip_whitespace(&mut self) -> &mut Self {
|
||||||
while self.peek().is_some_and(char::is_whitespace) {
|
while self.peek().is_some_and(char::is_whitespace) {
|
||||||
let _ = self.consume();
|
let _ = self.consume();
|
||||||
}
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn start_token(&mut self) -> &mut Self {
|
fn start_token(&mut self) -> &mut Self {
|
||||||
self.head = self.tail;
|
self.head = self.tail;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@@ -203,6 +205,7 @@ impl<'t> Lexer<'t> {
|
|||||||
Ok(self.consume().produce(tok))
|
Ok(self.consume().produce(tok))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Elides the trailing [Token] `kind` when it comes before a list terminator.
|
||||||
pub fn trailing(&mut self, kind: TKind) -> Result<Token, LexError> {
|
pub fn trailing(&mut self, kind: TKind) -> Result<Token, LexError> {
|
||||||
Ok(match self.skip_whitespace().peek() {
|
Ok(match self.skip_whitespace().peek() {
|
||||||
// Some(')') => self.consume().produce(TKind::RParen), // maybe.
|
// Some(')') => self.consume().produce(TKind::RParen), // maybe.
|
||||||
@@ -212,11 +215,14 @@ impl<'t> Lexer<'t> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consumes characters until the lexer reaches a newline `'\n'`
|
||||||
pub fn line_comment(&mut self) -> Result<Token, LexError> {
|
pub fn line_comment(&mut self) -> Result<Token, LexError> {
|
||||||
while self.consume().peek().is_some_and(|c| c != '\n') {}
|
while self.consume().peek().is_some_and(|c| c != '\n') {}
|
||||||
Ok(self.produce(TKind::Comment))
|
Ok(self.produce(TKind::Comment))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consumes characters until the lexer reaches the end of a *nested* block comment.
|
||||||
|
/// This allows you to arbitrarily comment out code, even if that code has a block comment.
|
||||||
pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
|
pub fn block_comment(&mut self) -> Result<&mut Self, LexError> {
|
||||||
self.consume();
|
self.consume();
|
||||||
while let Some(c) = self.take() {
|
while let Some(c) = self.take() {
|
||||||
@@ -229,6 +235,11 @@ impl<'t> Lexer<'t> {
|
|||||||
Err(self.error("Unterminated block comment"))
|
Err(self.error("Unterminated block comment"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Consumes characters until it reaches a character not in [is_xid_continue].
|
||||||
|
///
|
||||||
|
/// Always consumes the first character.
|
||||||
|
///
|
||||||
|
/// Maps the result to either a [TKind::Identifier] or a [TKind] keyword.
|
||||||
pub fn identifier(&mut self) -> Result<Token, LexError> {
|
pub fn identifier(&mut self) -> Result<Token, LexError> {
|
||||||
while self.consume().peek().is_some_and(is_xid_continue) {}
|
while self.consume().peek().is_some_and(is_xid_continue) {}
|
||||||
let (lexeme, _span) = self.as_str();
|
let (lexeme, _span) = self.as_str();
|
||||||
@@ -265,6 +276,7 @@ impl<'t> Lexer<'t> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Eagerly parses a character literal starting at the current lexer position.
|
||||||
pub fn character(&mut self) -> Result<Token, LexError> {
|
pub fn character(&mut self) -> Result<Token, LexError> {
|
||||||
let c = match self.consume().take() {
|
let c = match self.consume().take() {
|
||||||
Some('\\') => self.escape()?,
|
Some('\\') => self.escape()?,
|
||||||
@@ -278,6 +290,7 @@ impl<'t> Lexer<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Eagerly parses a string literal starting at the current lexer position.
|
||||||
pub fn string(&mut self) -> Result<Token, LexError> {
|
pub fn string(&mut self) -> Result<Token, LexError> {
|
||||||
let mut lexeme = String::new();
|
let mut lexeme = String::new();
|
||||||
self.consume();
|
self.consume();
|
||||||
@@ -293,28 +306,31 @@ impl<'t> Lexer<'t> {
|
|||||||
Ok(self.produce_with_lexeme(TKind::String, Lexeme::String(lexeme)))
|
Ok(self.produce_with_lexeme(TKind::String, Lexeme::String(lexeme)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses a single escape sequence into its resulting char value.
|
||||||
pub fn escape(&mut self) -> Result<char, LexError> {
|
pub fn escape(&mut self) -> Result<char, LexError> {
|
||||||
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
|
Ok(match self.take().ok_or_else(|| self.error("EOF"))? {
|
||||||
' ' => '\u{a0}',
|
' ' => '\u{a0}', // Non-breaking space
|
||||||
'0' => '\0',
|
'0' => '\0', // C0 Null Character
|
||||||
'a' => '\x07',
|
'a' => '\x07', // C0 Acknowledge
|
||||||
'b' => '\x08',
|
'b' => '\x08', // C0 Bell
|
||||||
'e' => '\x1b',
|
'e' => '\x1b', // C0 Escape
|
||||||
'f' => '\x0c',
|
'f' => '\x0c', // Form Feed
|
||||||
'n' => '\n',
|
'n' => '\n', // New Line
|
||||||
'r' => '\r',
|
'r' => '\r', // Carriage Return
|
||||||
't' => '\t',
|
't' => '\t', // Tab
|
||||||
'u' => self.unicode_escape()?,
|
'u' => self.unicode_escape()?,
|
||||||
'x' => self.hex_escape()?,
|
'x' => self.hex_escape()?,
|
||||||
c => c,
|
c => c,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses two hex-digits and constructs a [char] out of them.
|
||||||
pub fn hex_escape(&mut self) -> Result<char, LexError> {
|
pub fn hex_escape(&mut self) -> Result<char, LexError> {
|
||||||
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
|
||||||
char::from_u32(out).ok_or(self.error("Invalid digit"))
|
char::from_u32(out).ok_or(self.error("Invalid digit"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses a sequence of `{}`-bracketed hex-digits and constructs a [char] out of them.
|
||||||
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
|
pub fn unicode_escape(&mut self) -> Result<char, LexError> {
|
||||||
self.next_if('{')
|
self.next_if('{')
|
||||||
.ok_or_else(|| self.error("No unicode escape opener"))?;
|
.ok_or_else(|| self.error("No unicode escape opener"))?;
|
||||||
@@ -328,6 +344,10 @@ impl<'t> Lexer<'t> {
|
|||||||
Err(self.error("Unterminated unicode escape"))
|
Err(self.error("Unterminated unicode escape"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses a sequence of digits (and underscores) in base `BASE`, where 2 <= `BASE` <= 36.
|
||||||
|
///
|
||||||
|
/// If the sequence of digits exceeds the bounds of a [u128], the resulting number will wrap
|
||||||
|
/// around 2^128.
|
||||||
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
|
pub fn digits<const BASE: u32>(&mut self) -> Result<Token, LexError> {
|
||||||
let mut int: u128 = 0;
|
let mut int: u128 = 0;
|
||||||
while let Some(c) = self.peek() {
|
while let Some(c) = self.peek() {
|
||||||
@@ -342,6 +362,7 @@ impl<'t> Lexer<'t> {
|
|||||||
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
|
Ok(self.produce_with_lexeme(TKind::Integer, Lexeme::Integer(int, BASE)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parses a single digit in base `BASE` as a u32, where 2 <= `BASE` <= 36
|
||||||
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
|
pub fn digit<const BASE: u32>(&mut self) -> Result<u32, LexError> {
|
||||||
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
|
if let Some(digit) = self.take().and_then(|c| c.to_digit(BASE)) {
|
||||||
Ok(digit)
|
Ok(digit)
|
||||||
|
|||||||
Reference in New Issue
Block a user