Lexer rewrite:

- Scan the input string *linearly*, without backtracking
  - Peek at most one character (unicode code-point) ahead
- Store data (unescaped string literals and chars, identifiers, integers, floats) inside Token
  - This unfortunately makes tokens non-Copy
- Refactor Parser to accommodate these changes
  - On the bright side, Parser no longer needs a reference to the text!
- Write a new set of lexer tests
  - TODO: write a new set of token tests using tokendata

Every day, we get closer to parsing `dummy.cl`!
This commit is contained in:
John 2023-10-22 18:28:20 -05:00
parent feb5cc5dd0
commit b5abd2bff1
5 changed files with 716 additions and 1063 deletions

View File

@ -13,7 +13,7 @@ fn main() -> Result<(), Box<dyn Error>> {
take_stdin()?;
} else {
for path in conf.paths.iter().map(PathBuf::as_path) {
lex_tokens(&std::fs::read_to_string(path)?, Some(path));
lex_tokens(&std::fs::read_to_string(path)?, Some(path))?;
}
}
Ok(())
@ -32,29 +32,37 @@ impl Config {
fn take_stdin() -> Result<(), Box<dyn Error>> {
if stdin().is_terminal() {
for line in stdin().lines() {
lex_tokens(&line?, None)
lex_tokens(&line?, None)?
}
} else {
lex_tokens(&std::io::read_to_string(stdin())?, None)
lex_tokens(&std::io::read_to_string(stdin())?, None)?
}
Ok(())
}
fn lex_tokens(file: &str, path: Option<&Path>) {
fn lex_tokens(file: &str, path: Option<&Path>) -> Result<(), Box<dyn Error>> {
for token in Lexer::new(file) {
let token = match token {
Ok(t) => t,
Err(e) => {
println!("{e:?}");
break;
},
};
if let Some(path) = path {
print!("{path:?}:")
}
print_token(file, token);
print_token(token);
}
Ok(())
}
fn print_token(line: &str, t: conlang::token::Token) {
fn print_token(t: conlang::token::Token) {
println!(
"{:02}:{:02}: {:#19} │{}│",
"{:02}:{:02}: {:#19} │{:?}│",
t.line(),
t.col(),
t.ty(),
&line[t.range()]
t.data(),
)
}

View File

@ -1,548 +1,477 @@
//! Converts a text file into tokens
use crate::token::{Token, Type};
use lerox::Combinator;
use crate::token::{Keyword, Token, TokenData, Type};
use std::{
iter::Peekable,
str::{Chars, FromStr},
};
use unicode_xid::UnicodeXID;
pub struct IntoIter<'t> {
lexer: Lexer<'t>,
}
impl<'t> Iterator for IntoIter<'t> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
self.lexer.any()
pub mod lexer_iter {
use super::{
error::{LResult, Reason},
Lexer, Token,
};
/// Fallible iterator over a [Lexer], returning optional [LResult<Token>]s
pub struct LexerIter<'t> {
lexer: Lexer<'t>,
}
}
impl<'t> IntoIterator for Lexer<'t> {
type Item = Token;
type IntoIter = IntoIter<'t>;
fn into_iter(self) -> Self::IntoIter {
IntoIter { lexer: self }
impl<'t> Iterator for LexerIter<'t> {
type Item = LResult<Token>;
fn next(&mut self) -> Option<Self::Item> {
match self.lexer.scan() {
Ok(v) => Some(Ok(v)),
Err(e) => {
if e.reason == Reason::EndOfFile {
None
} else {
Some(Err(e))
}
}
}
}
}
impl<'t> IntoIterator for Lexer<'t> {
type Item = LResult<Token>;
type IntoIter = LexerIter<'t>;
fn into_iter(self) -> Self::IntoIter {
LexerIter { lexer: self }
}
}
}
#[derive(Clone, Debug)]
pub struct Lexer<'t> {
text: &'t str,
cursor: usize,
line: u32,
col: u32,
iter: Peekable<Chars<'t>>,
start: usize,
start_loc: (u32, u32),
current: usize,
current_loc: (u32, u32),
}
/// Implements the non-terminals of a language
impl<'t> Lexer<'t> {
pub fn new(text: &'t str) -> Self {
Self { text, cursor: 0, line: 1, col: 1 }
}
/// Consumes the entire [`Lexer`], producing a [`Vec<Token>`]
/// and returning the original string
pub fn consume(self) -> (Vec<Token>, &'t str) {
let text = self.text;
(self.into_iter().collect(), text)
}
/// Counts some length
#[inline]
fn count_len(&mut self, len: usize) -> &mut Self {
self.cursor += len;
self.col += len as u32;
self
}
/// Counts a line
#[inline]
fn count_line(&mut self, lines: u32) -> &mut Self {
self.line += lines;
self.col = 1;
self
}
/// Skips whitespace in the text
fn skip_whitespace(&mut self) {
self.count_len(
Rule::new(self.text())
.and_any(Rule::whitespace_not_newline)
.end()
.unwrap_or_default(),
);
if Rule::new(self.text()).char('\n').end().is_some() {
// recurse until all newlines are skipped
self.count_len(1).count_line(1).skip_whitespace();
Self {
iter: text.chars().peekable(),
start: 0,
start_loc: (1, 1),
current: 0,
current_loc: (1, 1),
}
}
/// Advances the cursor and produces a token from a provided [Rule] function
fn map_rule<F>(&mut self, rule: F, ty: Type) -> Option<Token>
where F: Fn(Rule) -> Rule {
self.skip_whitespace();
let (line, col, start) = (self.line, self.col, self.cursor);
self.count_len(Rule::new(self.text()).and(rule).end()?);
Some(Token::new(ty, start, self.cursor, line, col))
}
/// Gets a slice of text beginning at the cursor
fn text(&self) -> &str {
&self.text[self.cursor..]
}
// classifies a single arbitrary token
/// Returns the result of the rule with the highest precedence, if any matches
pub fn any(&mut self) -> Option<Token> {
None.or_else(|| self.comment())
.or_else(|| self.identifier())
.or_else(|| self.literal())
.or_else(|| self.delimiter())
.or_else(|| self.punctuation())
.or_else(|| self.invalid())
}
/// Attempts to produce a [Type::String], [Type::Float], or [Type::Integer]
pub fn literal(&mut self) -> Option<Token> {
None.or_else(|| self.string())
.or_else(|| self.character())
.or_else(|| self.float())
.or_else(|| self.integer())
}
/// Evaluates delimiter rules
pub fn delimiter(&mut self) -> Option<Token> {
None.or_else(|| self.l_brack())
.or_else(|| self.r_brack())
.or_else(|| self.l_curly())
.or_else(|| self.r_curly())
.or_else(|| self.l_paren())
.or_else(|| self.r_paren())
}
/// Evaluates punctuation rules
pub fn punctuation(&mut self) -> Option<Token> {
None.or_else(|| self.amp_amp()) // &&
.or_else(|| self.amp_eq()) // &=
.or_else(|| self.amp()) // &
.or_else(|| self.at()) // @
.or_else(|| self.backslash()) // \
.or_else(|| self.bang_bang()) // !!
.or_else(|| self.bang_eq()) // !=
.or_else(|| self.bang()) // !
.or_else(|| self.bar_bar()) // ||
.or_else(|| self.bar_eq()) // |=
.or_else(|| self.bar()) // |
.or_else(|| self.colon()) // :
.or_else(|| self.comma()) // ,
.or_else(|| self.dot_dot_eq()) // ..=
.or_else(|| self.dot_dot()) // ..
.or_else(|| self.dot()) // .
.or_else(|| self.eq_eq()) // ==
.or_else(|| self.fatarrow()) // =>
.or_else(|| self.eq()) // =
.or_else(|| self.grave()) // `
.or_else(|| self.gt_eq()) // >=
.or_else(|| self.gt_gt_eq()) // >>=
.or_else(|| self.gt_gt()) // >>
.or_else(|| self.gt()) // >
.or_else(|| self.hash()) // #
.or_else(|| self.lt_eq()) // <=
.or_else(|| self.lt_lt_eq()) // <<=
.or_else(|| self.lt_lt()) // <<
.or_else(|| self.lt()) // <
.or_else(|| self.minus_eq()) // -=
.or_else(|| self.arrow()) // ->
.or_else(|| self.minus()) // -
.or_else(|| self.plus_eq()) // +=
.or_else(|| self.plus()) // +
.or_else(|| self.question()) // ?
.or_else(|| self.rem_eq()) // %=
.or_else(|| self.rem()) // %
.or_else(|| self.semi()) // ;
.or_else(|| self.slash_eq()) // /=
.or_else(|| self.slash()) // /
.or_else(|| self.star_eq()) // *=
.or_else(|| self.star()) // *
.or_else(|| self.tilde()) // ~
.or_else(|| self.xor_eq()) // ^=
.or_else(|| self.xor_xor()) // ^^
.or_else(|| self.xor()) // ^
}
pub fn unary_op(&mut self) -> Option<Token> {
self.bang().or_else(|| self.minus())
}
// functions for lexing individual tokens
pub fn invalid(&mut self) -> Option<Token> {
self.map_rule(|r| r.invalid(), Type::Invalid)
}
// comments
pub fn comment(&mut self) -> Option<Token> {
self.map_rule(|r| r.comment(), Type::Comment)
}
// identifiers
pub fn identifier(&mut self) -> Option<Token> {
self.map_rule(|r| r.identifier(), Type::Identifier)
.map(|token| match self.text[token.range()].parse() {
Ok(kw) => token.cast(Type::Keyword(kw)),
Err(_) => token,
})
}
// literals
pub fn integer(&mut self) -> Option<Token> {
self.map_rule(|r| r.integer(), Type::Integer)
}
pub fn float(&mut self) -> Option<Token> {
self.map_rule(|r| r.float(), Type::Float)
}
pub fn string(&mut self) -> Option<Token> {
// TODO: count lines and columns properly within string
self.map_rule(|r| r.string(), Type::String)
.map(|t| t.rebound(t.head + 1, t.tail - 1))
}
pub fn character(&mut self) -> Option<Token> {
self.map_rule(|r| r.character(), Type::Character)
.map(|t| t.rebound(t.head + 1, t.tail - 1))
}
// delimiters
pub fn l_brack(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('['), Type::LBrack)
}
pub fn r_brack(&mut self) -> Option<Token> {
self.map_rule(|r| r.char(']'), Type::RBrack)
}
pub fn l_curly(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('{'), Type::LCurly)
}
pub fn r_curly(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('}'), Type::RCurly)
}
pub fn l_paren(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('('), Type::LParen)
}
pub fn r_paren(&mut self) -> Option<Token> {
self.map_rule(|r| r.char(')'), Type::RParen)
}
// compound punctuation
pub fn lt_lt(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("<<"), Type::LtLt)
}
pub fn gt_gt(&mut self) -> Option<Token> {
self.map_rule(|r| r.str(">>"), Type::GtGt)
}
pub fn amp_amp(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("&&"), Type::AmpAmp)
}
pub fn bar_bar(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("||"), Type::BarBar)
}
pub fn bang_bang(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("!!"), Type::BangBang)
}
pub fn xor_xor(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("^^"), Type::XorXor)
}
pub fn eq_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("=="), Type::EqEq)
}
pub fn gt_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str(">="), Type::GtEq)
}
pub fn lt_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("<="), Type::LtEq)
}
pub fn bang_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("!="), Type::BangEq)
}
pub fn star_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("*="), Type::StarEq)
}
pub fn slash_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("/="), Type::SlashEq)
}
pub fn rem_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("%="), Type::RemEq)
}
pub fn plus_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("+="), Type::PlusEq)
}
pub fn minus_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("-="), Type::MinusEq)
}
pub fn amp_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("&="), Type::AmpEq)
}
pub fn bar_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("|="), Type::BarEq)
}
pub fn xor_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("^="), Type::XorEq)
}
pub fn lt_lt_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("<<="), Type::LtLtEq)
}
pub fn gt_gt_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str(">>="), Type::GtGtEq)
}
pub fn dot_dot_eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("..="), Type::DotDotEq)
}
pub fn dot_dot(&mut self) -> Option<Token> {
self.map_rule(|r| r.str(".."), Type::DotDot)
}
pub fn arrow(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("->"), Type::Arrow)
}
pub fn fatarrow(&mut self) -> Option<Token> {
self.map_rule(|r| r.str("=>"), Type::FatArrow)
}
// simple punctuation
pub fn semi(&mut self) -> Option<Token> {
self.map_rule(|r| r.char(';'), Type::Semi)
}
pub fn dot(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('.'), Type::Dot)
}
pub fn star(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('*'), Type::Star)
}
pub fn slash(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('/'), Type::Slash)
}
pub fn plus(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('+'), Type::Plus)
}
pub fn minus(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('-'), Type::Minus)
}
pub fn rem(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('%'), Type::Rem)
}
pub fn bang(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('!'), Type::Bang)
}
pub fn eq(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('='), Type::Eq)
}
pub fn lt(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('<'), Type::Lt)
}
pub fn gt(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('>'), Type::Gt)
}
pub fn amp(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('&'), Type::Amp)
}
pub fn bar(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('|'), Type::Bar)
}
pub fn xor(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('^'), Type::Xor)
}
pub fn hash(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('#'), Type::Hash)
}
pub fn at(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('@'), Type::At)
}
pub fn colon(&mut self) -> Option<Token> {
self.map_rule(|r| r.char(':'), Type::Colon)
}
pub fn question(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('?'), Type::Question)
}
pub fn comma(&mut self) -> Option<Token> {
self.map_rule(|r| r.char(','), Type::Comma)
}
pub fn tilde(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('~'), Type::Tilde)
}
pub fn grave(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('`'), Type::Grave)
}
pub fn backslash(&mut self) -> Option<Token> {
self.map_rule(|r| r.char('\\'), Type::Backslash)
pub fn scan(&mut self) -> LResult<Token> {
match self.skip_whitespace().peek()? {
'{' => self.consume()?.produce(Type::LCurly, ()),
'}' => self.consume()?.produce(Type::RCurly, ()),
'[' => self.consume()?.produce(Type::LBrack, ()),
']' => self.consume()?.produce(Type::RBrack, ()),
'(' => self.consume()?.produce(Type::LParen, ()),
')' => self.consume()?.produce(Type::RParen, ()),
'&' => self.consume()?.amp(),
'@' => self.consume()?.produce(Type::At, ()),
'\\' => self.consume()?.produce(Type::Backslash, ()),
'!' => self.consume()?.bang(),
'|' => self.consume()?.bar(),
':' => self.consume()?.produce(Type::Colon, ()),
',' => self.consume()?.produce(Type::Comma, ()),
'.' => self.consume()?.dot(),
'=' => self.consume()?.equal(),
'`' => self.consume()?.produce(Type::Grave, ()),
'>' => self.consume()?.greater(),
'#' => self.consume()?.produce(Type::Hash, ()),
'<' => self.consume()?.less(),
'-' => self.consume()?.minus(),
'+' => self.consume()?.plus(),
'?' => self.consume()?.produce(Type::Question, ()),
'%' => self.consume()?.rem(),
';' => self.consume()?.produce(Type::Semi, ()),
'/' => self.consume()?.slash(),
'*' => self.consume()?.star(),
'~' => self.consume()?.produce(Type::Tilde, ()),
'^' => self.consume()?.xor(),
'0' => self.consume()?.int_with_base(),
'1'..='9' => self.digits::<10>(),
'"' => self.consume()?.string(),
'\'' => self.consume()?.character(),
'_' => self.identifier(),
i if i.is_xid_start() => self.identifier(),
e => Err(Error::unexpected_char(e, self.line(), self.col())),
}
}
/// Gets the line of the next token
pub fn line(&self) -> u32 {
self.start_loc.0
}
/// Gets the column of the next token
pub fn col(&self) -> u32 {
self.start_loc.1
}
fn next(&mut self) -> LResult<char> {
let out = self.peek();
self.consume()?;
out
}
fn peek(&mut self) -> LResult<char> {
self.iter
.peek()
.copied()
.ok_or(Error::end_of_file(self.line(), self.col()))
}
fn produce(&mut self, ty: Type, data: impl Into<TokenData>) -> LResult<Token> {
let loc = self.start_loc;
self.start_loc = self.current_loc;
self.start = self.current;
Ok(Token::new(ty, data, loc.0, loc.1))
}
fn skip_whitespace(&mut self) -> &mut Self {
while let Ok(c) = self.peek() {
if !c.is_whitespace() {
break;
}
let _ = self.consume();
}
self.start = self.current;
self.start_loc = self.current_loc;
self
}
fn consume(&mut self) -> LResult<&mut Self> {
self.current += 1;
match self.iter.next() {
Some('\n') => {
let (line, col) = &mut self.current_loc;
*line += 1;
*col = 1;
}
Some(_) => self.current_loc.1 += 1,
None => Err(Error::end_of_file(self.line(), self.col()))?,
}
Ok(self)
}
}
// TODO: use real, functional parser-combinators here to produce tokens
/// A lexer [Rule] matches patterns in text in a declarative manner
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Rule<'t> {
text: &'t str,
taken: usize,
is_alright: bool,
}
impl<'t> Rule<'t> {
pub fn new(text: &'t str) -> Self {
Self { text, taken: 0, is_alright: true }
/// Digraphs and trigraphs
impl<'t> Lexer<'t> {
fn amp(&mut self) -> LResult<Token> {
match self.peek() {
Ok('&') => self.consume()?.produce(Type::AmpAmp, ()),
Ok('=') => self.consume()?.produce(Type::AmpEq, ()),
_ => self.produce(Type::Amp, ()),
}
}
pub fn end(self) -> Option<usize> {
self.is_alright.then_some(self.taken)
fn bang(&mut self) -> LResult<Token> {
match self.peek() {
Ok('!') => self.consume()?.produce(Type::BangBang, ()),
Ok('=') => self.consume()?.produce(Type::BangEq, ()),
_ => self.produce(Type::Bang, ()),
}
}
pub fn remaining(&self) -> &str {
self.text
fn bar(&mut self) -> LResult<Token> {
match self.peek() {
Ok('|') => self.consume()?.produce(Type::BarBar, ()),
Ok('=') => self.consume()?.produce(Type::BarEq, ()),
_ => self.produce(Type::Bar, ()),
}
}
fn dot(&mut self) -> LResult<Token> {
match self.peek() {
Ok('.') => {
if let Ok('=') = self.consume()?.peek() {
self.consume()?.produce(Type::DotDotEq, ())
} else {
self.produce(Type::DotDot, ())
}
}
_ => self.produce(Type::Dot, ()),
}
}
fn equal(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::EqEq, ()),
Ok('>') => self.consume()?.produce(Type::FatArrow, ()),
_ => self.produce(Type::Eq, ()),
}
}
fn greater(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::GtEq, ()),
Ok('>') => {
if let Ok('=') = self.consume()?.peek() {
self.consume()?.produce(Type::GtGtEq, ())
} else {
self.produce(Type::GtGt, ())
}
}
_ => self.produce(Type::Gt, ()),
}
}
fn less(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::LtEq, ()),
Ok('<') => {
if let Ok('=') = self.consume()?.peek() {
self.consume()?.produce(Type::LtLtEq, ())
} else {
self.produce(Type::LtLt, ())
}
}
_ => self.produce(Type::Lt, ()),
}
}
fn minus(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::MinusEq, ()),
Ok('>') => self.consume()?.produce(Type::Arrow, ()),
_ => self.produce(Type::Minus, ()),
}
}
fn plus(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::PlusEq, ()),
_ => self.produce(Type::Plus, ()),
}
}
fn rem(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::RemEq, ()),
_ => self.produce(Type::Rem, ()),
}
}
fn slash(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::SlashEq, ()),
Ok('/') => self.consume()?.line_comment(),
Ok('*') => self.consume()?.block_comment(),
_ => self.produce(Type::Slash, ()),
}
}
fn star(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::StarEq, ()),
_ => self.produce(Type::Star, ()),
}
}
fn xor(&mut self) -> LResult<Token> {
match self.peek() {
Ok('=') => self.consume()?.produce(Type::XorEq, ()),
Ok('^') => self.consume()?.produce(Type::XorXor, ()),
_ => self.produce(Type::Xor, ()),
}
}
}
impl<'t> Rule<'t> {
/// Matches any sequence of non-whitespace characters
pub fn invalid(self) -> Self {
self.and_many(Self::not_whitespace)
/// Comments
impl<'t> Lexer<'t> {
fn line_comment(&mut self) -> LResult<Token> {
while Ok('\n') != self.peek() {
self.consume()?;
}
self.produce(Type::Comment, ())
}
/// Matches a block, line, or shebang comment
pub fn comment(self) -> Self {
self.and_either(Self::line_comment, Self::block_comment)
}
/// Matches a line or shebang comment
fn line_comment(self) -> Self {
// line_comment := ("//" | "#!/") (!newline)*
self.str("//")
.or(|r| r.str("#!/"))
.and_any(|r| r.not_char('\n'))
}
/// Matches a block comment
fn block_comment(self) -> Self {
// block_comment := "/*" (block_comment | all_but("*/"))* "*/"
self.str("/*")
.and_any(|r| r.and_either(|f| f.block_comment(), |g| g.not_str("*/")))
.str("*/")
}
/// Matches a Rust-style identifier
pub fn identifier(self) -> Self {
// identifier := ('_' | XID_START) ~ XID_CONTINUE*
self.char('_')
.or(Rule::xid_start)
.and_any(Rule::xid_continue)
}
/// Matches a Rust-style base-prefixed int literal
fn integer_kind(self, prefix: &str, digit: impl Fn(Self) -> Self) -> Self {
// int_kind<Prefix, Digit> := Prefix '_'* Digit (Digit | '_')*
self.str(prefix)
.and_any(|r| r.char('_'))
.and(&digit)
.and_any(|r| r.and(&digit).or(|r| r.char('_')))
}
/// Matches a Rust-style integer literal
pub fn integer(self) -> Self {
// integer = (int_kind<0d, dec_digit> | int_kind<0x, hex_digit>
// | int_kind<0o, oct_digit> | int_kind<0b, bin_digit> | dec_digit (dec_digit | '_')*)
self.and_one_of(&[
&|rule| rule.integer_kind("0d", Rule::dec_digit),
&|rule| rule.integer_kind("0x", Rule::hex_digit),
&|rule| rule.integer_kind("0o", Rule::oct_digit),
&|rule| rule.integer_kind("0b", Rule::bin_digit),
&|rule| {
rule.dec_digit()
.and_any(|r| r.dec_digit().or(|r| r.char('_')))
},
])
}
/// Matches a float literal
// TODO: exponent form
pub fn float(self) -> Self {
self.and_any(Rule::dec_digit)
.char('.')
.and_many(Rule::dec_digit)
}
/// Matches one apostrophe-delimited char literal
pub fn character(self) -> Self {
self.char('\'').character_continue().char('\'')
}
pub fn character_continue(self) -> Self {
self.and(|rule| rule.string_escape().or(|rule| rule.not_char('\'')))
}
/// Matches one quote-delimited string literal
pub fn string(self) -> Self {
self.char('"').and_any(Rule::string_continue).char('"')
}
/// Matches one string escape sequence or non-`"` characcter
pub fn string_continue(self) -> Self {
self.and(Rule::string_escape).or(|rule| rule.not_char('"'))
fn block_comment(&mut self) -> LResult<Token> {
while let Ok(c) = self.next() {
if '*' == c && Ok('/') == self.next() {
break;
}
}
self.produce(Type::Comment, ())
}
}
impl<'t> Rule<'t> {
/// Matches a char lexicographically between start and end
pub fn char_between(self, start: char, end: char) -> Self {
self.char_fn(|c| start <= c && c <= end)
/// Identifiers
impl<'t> Lexer<'t> {
fn identifier(&mut self) -> LResult<Token> {
let mut out = String::from(self.xid_start()?);
while let Ok(c) = self.xid_continue() {
out.push(c)
}
if let Ok(keyword) = Keyword::from_str(&out) {
self.produce(Type::Keyword(keyword), ())
} else {
self.produce(Type::Identifier, TokenData::Identifier(out.into()))
}
}
/// Matches a single char
pub fn char(self, c: char) -> Self {
self.has(|rule| rule.text.starts_with(c), 1)
fn xid_start(&mut self) -> LResult<char> {
match self.peek()? {
xid if xid == '_' || xid.is_xid_start() => {
self.consume()?;
Ok(xid)
}
bad => Err(Error::not_identifier(bad, self.line(), self.col())),
}
}
/// Matches the entirety of a string slice
pub fn str(self, s: &str) -> Self {
self.has(|rule| rule.text.starts_with(s), s.len())
fn xid_continue(&mut self) -> LResult<char> {
match self.peek()? {
xid if xid.is_xid_continue() => {
self.consume()?;
Ok(xid)
}
bad => Err(Error::not_identifier(bad, self.line(), self.col())),
}
}
/// Matches a char based on the output of a function
pub fn char_fn(self, f: impl Fn(char) -> bool) -> Self {
self.and(|rule| match rule.text.strip_prefix(&f) {
Some(text) => Self { text, taken: rule.taken + next_utf8(rule.text, 1), ..rule },
None => Self { is_alright: false, ..rule },
}
/// Integers
impl<'t> Lexer<'t> {
fn int_with_base(&mut self) -> LResult<Token> {
match self.peek() {
Ok('x') => self.consume()?.digits::<16>(),
Ok('d') => self.consume()?.digits::<10>(),
Ok('o') => self.consume()?.digits::<8>(),
Ok('b') => self.consume()?.digits::<2>(),
Ok('0'..='9') => self.digits::<10>(),
_ => self.produce(Type::Integer, 0),
}
}
fn digits<const B: u32>(&mut self) -> LResult<Token> {
let mut value = self.digit::<B>()? as u128;
while let Ok(true) = self.peek().as_ref().map(char::is_ascii_alphanumeric) {
value = value * B as u128 + self.digit::<B>()? as u128;
}
self.produce(Type::Integer, value)
}
fn digit<const B: u32>(&mut self) -> LResult<u32> {
let digit = self.peek()?;
self.consume()?;
digit
.to_digit(B)
.ok_or(Error::invalid_digit(digit, self.line(), self.col()))
}
}
/// Strings and characters
impl<'t> Lexer<'t> {
fn string(&mut self) -> LResult<Token> {
let mut value = String::new();
while '"'
!= self
.peek()
.map_err(|e| e.mask_reason(Reason::UnmatchedDelimiters('"')))?
{
value.push(self.unescape()?)
}
self.consume()?.produce(Type::String, value)
}
fn character(&mut self) -> LResult<Token> {
let out = self.unescape()?;
match self.peek()? {
'\'' => self.consume()?.produce(Type::Character, out),
_ => Err(Error::unmatched_delimiters('\'', self.line(), self.col())),
}
}
/// Unescape a single character
fn unescape(&mut self) -> LResult<char> {
match self.next() {
Ok('\\') => (),
other => return other,
}
Ok(match self.next()? {
'a' => '\x07',
'b' => '\x08',
'f' => '\x0c',
'n' => '\n',
'r' => '\r',
't' => '\t',
'x' => self.hex_escape()?,
'u' => self.unicode_escape()?,
'0' => '\0',
chr => chr,
})
}
/// Matches a single char except c
pub fn not_char(self, c: char) -> Self {
self.has(|rule| !rule.text.starts_with(c), 1)
/// unescape a single 2-digit hex escape
fn hex_escape(&mut self) -> LResult<char> {
let out = (self.digit::<16>()? << 4) + self.digit::<16>()?;
char::from_u32(out).ok_or(Error::bad_unicode(out, self.line(), self.col()))
}
/// Matches a single char unless the text starts with s
pub fn not_str(self, s: &str) -> Self {
self.has(|rule| !rule.text.starts_with(s), 1)
}
// commonly used character classes
/// Matches one of any character
pub fn any(self) -> Self {
self.has(|_| true, 1)
}
/// Matches one whitespace
pub fn whitespace(self) -> Self {
self.char_fn(|c| c.is_whitespace())
}
/// Matches one whitespace, except `'\n'`
pub fn whitespace_not_newline(self) -> Self {
self.char_fn(|c| '\n' != c && c.is_whitespace())
}
/// Matches anything but whitespace
pub fn not_whitespace(self) -> Self {
self.char_fn(|c| !c.is_whitespace())
}
/// Matches one XID_START
pub fn xid_start(self) -> Self {
use unicode_xid::UnicodeXID;
self.char_fn(UnicodeXID::is_xid_start)
}
/// Matches one XID_CONTINUE
pub fn xid_continue(self) -> Self {
use unicode_xid::UnicodeXID;
self.char_fn(UnicodeXID::is_xid_continue)
}
/// Matches one hexadecimal digit
pub fn hex_digit(self) -> Self {
self.char_fn(|c| c.is_ascii_hexdigit())
}
/// Matches one decimal digit
pub fn dec_digit(self) -> Self {
self.char_fn(|c| c.is_ascii_digit())
}
/// Matches one octal digit
pub fn oct_digit(self) -> Self {
self.char_between('0', '7')
}
/// Matches one binary digit
pub fn bin_digit(self) -> Self {
self.char_between('0', '1')
}
/// Matches any string escape "\."
pub fn string_escape(self) -> Self {
self.char('\\').and(Rule::any)
}
/// Performs a consuming condition assertion on the input
fn has(self, condition: impl Fn(&Self) -> bool, len: usize) -> Self {
let len = next_utf8(self.text, len);
self.and(|rule| match condition(&rule) && !rule.text.is_empty() {
true => Self { text: &rule.text[len..], taken: rule.taken + len, ..rule },
false => Self { is_alright: false, ..rule },
})
/// unescape a single \u{} unicode escape
fn unicode_escape(&mut self) -> LResult<char> {
let mut out = 0;
let Ok('{') = self.peek() else {
return Err(Error::invalid_escape('u', self.line(), self.col()));
};
self.consume()?;
while let Ok(c) = self.peek() {
match c {
'}' => {
self.consume()?;
return char::from_u32(out).ok_or(Error::bad_unicode(
out,
self.line(),
self.col(),
));
}
_ => out = (out << 4) + self.digit::<16>()?,
}
}
Err(Error::invalid_escape('u', self.line(), self.col()))
}
}
impl<'t> lerox::Combinator for Rule<'t> {
fn is_alright(&self) -> bool {
self.is_alright
}
fn into_alright(self) -> Self {
Self { is_alright: true, ..self }
}
}
use error::{Error, LResult, Reason};
pub mod error {
use std::fmt::Display;
/// Returns the index of the next unicode character, rounded up
fn next_utf8(text: &str, mut index: usize) -> usize {
index = index.min(text.len());
while !text.is_char_boundary(index) {
index += 1
pub type LResult<T> = Result<T, Error>;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Error {
pub reason: Reason,
pub line: u32,
pub col: u32,
}
/// The reason for the [Error]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum Reason {
UnmatchedDelimiters(char),
UnexpectedChar(char),
NotIdentifier(char),
UnknownEscape(char),
InvalidEscape(char),
InvalidDigit(char),
UnknownBase(char),
BadUnicode(u32),
EndOfFile,
}
error_impl! {
unmatched_delimiters(c: char) => Reason::UnmatchedDelimiters(c),
unexpected_char(c: char) => Reason::UnexpectedChar(c),
not_identifier(c: char) => Reason::NotIdentifier(c),
unknown_escape(e: char) => Reason::UnknownEscape(e),
invalid_escape(e: char) => Reason::InvalidEscape(e),
invalid_digit(digit: char) => Reason::InvalidDigit(digit),
unknown_base(base: char) => Reason::UnknownBase(base),
bad_unicode(value: u32) => Reason::BadUnicode(value),
end_of_file => Reason::EndOfFile,
}
impl Error {
/// Changes the [Reason] of this error
pub(super) fn mask_reason(self, reason: Reason) -> Self {
Self { reason, ..self }
}
/// Gets the (line, col) where the error happened
pub fn location(&self) -> (u32, u32) {
(self.line, self.col)
}
}
macro error_impl ($($fn:ident$(( $($p:ident: $t:ty),* ))? => $reason:expr),*$(,)?) {
#[allow(dead_code)]
impl Error {
$(pub(super) fn $fn ($($($p: $t),*,)? line: u32, col: u32) -> Self {
Self { reason: $reason, line, col }
})*
}
}
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}: {}", self.line, self.col, self.reason)
}
}
impl Display for Reason {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Reason::UnmatchedDelimiters(c) => write! {f, "Unmatched `{c}` in input"},
Reason::UnexpectedChar(c) => write!(f, "Character `{c}` not expected"),
Reason::NotIdentifier(c) => write!(f, "Character `{c}` not valid in identifiers"),
Reason::UnknownEscape(c) => write!(f, "`\\{c}` is not a known escape sequence"),
Reason::InvalidEscape(c) => write!(f, "Escape sequence `\\{c}`... is malformed"),
Reason::InvalidDigit(c) => write!(f, "`{c}` is not a valid digit"),
Reason::UnknownBase(c) => write!(f, "`0{c}`... is not a valid base"),
Reason::BadUnicode(c) => write!(f, "`{c}` is not a valid unicode code-point"),
Reason::EndOfFile => write!(f, "Reached end of input"),
}
}
}
index
}

View File

@ -1,12 +1,10 @@
//! Parses [tokens](super::token) into an [AST](super::ast)
use std::vec;
use super::{
ast::preamble::*,
lexer::Lexer,
token::{Keyword, Token, Type},
token::{Keyword, Token, TokenData, Type},
};
use constr::ConstrTools;
use error::{Error, Reason::*, *};
pub mod error {
@ -16,6 +14,7 @@ pub mod error {
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub enum Reason {
Expected(Type),
Unexpected(Type),
NotIdentifier,
NotOperator,
NotLiteral,
@ -29,7 +28,6 @@ pub mod error {
IntOverflow,
NotBranch,
IncompleteBranch,
AllElseFailed,
EndOfFile,
PanicStackUnderflow,
#[default]
@ -41,6 +39,7 @@ pub mod error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Expected(t) => write!(f, "Expected {t}"),
Self::Unexpected(t) => write!(f, "Unexpected {t} in bagging area"),
Self::NotIdentifier => "Not an identifier".fmt(f),
Self::NotOperator => "Not an operator".fmt(f),
Self::NotLiteral => "Not a literal".fmt(f),
@ -54,7 +53,6 @@ pub mod error {
Self::IntOverflow => "Integer too large".fmt(f),
Self::IncompleteBranch => "Branch expression was incomplete".fmt(f),
Self::NotBranch => "Expected branch expression".fmt(f),
Self::AllElseFailed => "Did not match any rule".fmt(f),
Self::EndOfFile => "Got end of file".fmt(f),
Self::PanicStackUnderflow => "Could not recover from panic".fmt(f),
Self::Unspecified => {
@ -66,7 +64,7 @@ pub mod error {
/// [Parser](super::Parser) [Result]
pub type PResult<T> = Result<T, Error>;
#[derive(Clone, Debug, Default, PartialEq, Eq)]
#[derive(Clone, Debug, Default, PartialEq)]
pub struct Error {
reason: Reason,
start: Option<Token>,
@ -74,7 +72,7 @@ pub mod error {
impl Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(token) = self.start {
if let Some(token) = &self.start {
write!(f, "{}:{}: ", token.line(), token.col())?;
}
write!(f, "{}", self.reason)
@ -95,14 +93,15 @@ pub mod error {
pub fn maybe_token(self, start: Option<Token>) -> Self {
Self { start, ..self }
}
pub fn start(&self) -> Option<Token> {
self.start
pub fn start(&self) -> Option<&Token> {
self.start.as_ref()
}
pub fn reason(self, reason: Reason) -> Self {
Self { reason, ..self }
}
error_impl! {
expected(e: Type): Expected,
unexpected(e: Type): Unexpected,
not_identifier: NotIdentifier,
not_operator: NotOperator,
not_literal: NotLiteral,
@ -115,7 +114,6 @@ pub mod error {
not_int: NotInt,
int_overflow: IntOverflow,
not_branch: NotBranch,
all_else_failed: AllElseFailed,
end_of_file: EndOfFile,
panic_underflow: PanicStackUnderflow,
unspecified: Unspecified,
@ -125,27 +123,32 @@ pub mod error {
/// The Parser performs recursive descent on the AST's grammar
/// using a provided [Lexer].
pub struct Parser<'t> {
pub struct Parser {
tokens: Vec<Token>,
panic_stack: Vec<usize>,
text: &'t str,
curr: usize,
}
impl<'t> From<Lexer<'t>> for Parser<'t> {
impl<'t> From<Lexer<'t>> for Parser {
fn from(value: Lexer<'t>) -> Self {
let (tokens, text) = value.consume();
Self::new(tokens, text)
let mut tokens = vec![];
for result in value {
match result {
Ok(t) => tokens.push(t),
Err(e) => println!("{e}"),
}
}
Self::new(tokens)
}
}
impl<'t> Parser<'t> {
impl Parser {
/// Create a new [Parser] from a list of [Tokens][1]
/// and the [text](str) used to generate that list
/// (as [Tokens][1] do not store their strings)
///
/// [1]: Token
pub fn new(tokens: Vec<Token>, text: &'t str) -> Self {
Self { tokens, text, panic_stack: vec![], curr: 0 }
pub fn new(tokens: Vec<Token>) -> Self {
Self { tokens, panic_stack: vec![], curr: 0 }
}
/// Parse the [start of an AST](Start)
pub fn parse(&mut self) -> PResult<Start> {
@ -170,7 +173,7 @@ impl<'t> Parser<'t> {
pub fn peek(&self) -> PResult<&Token> {
self.tokens
.get(self.curr)
.ok_or(Error::end_of_file().maybe_token(self.tokens.last().copied()))
.ok_or(Error::end_of_file().maybe_token(self.tokens.last().cloned()))
}
/// Records the current position on the panic stack
pub fn mark(&mut self) -> &mut Self {
@ -198,7 +201,7 @@ impl<'t> Parser<'t> {
}
}
/// Helpers
impl<'t> Parser<'t> {
impl Parser {
fn consume_type(&mut self, t: Type) -> PResult<&mut Self> {
self.matches(t)?;
Ok(self.consume())
@ -207,17 +210,17 @@ impl<'t> Parser<'t> {
if self.curr < self.tokens.len() {
Ok(self)
} else {
Err(Error::end_of_file().maybe_token(self.tokens.last().copied()))
Err(Error::end_of_file().maybe_token(self.tokens.last().cloned()))
}
}
fn todo_error(&mut self, l: u32, c: u32, s: &str) -> Error {
eprintln!("TODO: {s}:{l}:{c}");
Error::unspecified().token(*self.peek().unwrap())
Error::unspecified().token(self.peek().unwrap().clone())
}
fn matches(&mut self, e: Type) -> PResult<&Token> {
let t = self.check_eof()?.peek().expect("self should not be eof");
if t.ty() != e {
Err(Error::expected(e).token(*t))?
Err(Error::expected(e).token(t.clone()))?
}
Ok(t)
}
@ -250,51 +253,54 @@ macro ptodo($self:expr $(, $t:expr)*) {
}
/// # Terminals and Pseudo-Terminals
impl<'t> Parser<'t> {
impl Parser {
fn identifier(&mut self) -> PResult<Identifier> {
let token = *self
.matches(Type::Identifier)
.map_err(|e| Error::not_identifier().maybe_token(e.start()))?;
Ok(Identifier(self.consume().text[&token].into()))
let out = match self.matches(Type::Identifier)?.data() {
TokenData::Identifier(id) => Identifier(id.to_string()),
_ => Err(Error::not_identifier())?,
};
self.consume();
Ok(out)
}
fn literal(&mut self) -> PResult<literal::Literal> {
use literal::Literal::*;
use Keyword::{False, True};
let tok = self.peek()?;
match tok.ty() {
let token = self.peek()?;
match token.ty() {
Type::Float => self.float().map(Float),
Type::Integer => self.int().map(Int),
Type::String => self.string().map(String),
Type::Character => self.char().map(Char),
Type::Keyword(True | False) => self.bool().map(Bool),
_ => Err(Error::not_literal().token(*tok)),
_ => Err(Error::not_literal().token(token.clone())),
}
}
fn float(&mut self) -> PResult<literal::Float> {
ptodo!(self)
}
fn int(&mut self) -> PResult<u128> {
let token = *self.matches(Type::Integer)?;
self.consume().text[&token]
.chars()
.parse_int::<u128>()
.next()
.ok_or(Error::not_int().token(token))
let out = match self.matches(Type::Integer)?.data() {
TokenData::Integer(i) => *i,
_ => Err(Error::not_int())?,
};
self.consume();
Ok(out)
}
fn string(&mut self) -> PResult<String> {
let range = self
.matches(Type::String)
.map_err(|e| e.reason(NotString))?
.range();
Ok(self.consume().text[range].chars().unescape().collect())
let out = match self.matches(Type::String)?.data() {
TokenData::String(s) => s.clone(),
_ => Err(Error::not_string())?,
};
self.consume();
Ok(out)
}
fn char(&mut self) -> PResult<char> {
let token = *self.matches(Type::Character)?;
self.consume().text[&token]
.chars()
.unescape()
.next()
.ok_or(Error::not_char().token(token))
let out = match self.matches(Type::Character)?.data() {
TokenData::Character(c) => *c,
_ => Err(Error::not_char())?,
};
self.consume();
Ok(out)
}
fn bool(&mut self) -> PResult<bool> {
use Keyword::{False, True};
@ -302,14 +308,14 @@ impl<'t> Parser<'t> {
let out = match token.ty() {
Type::Keyword(False) => false,
Type::Keyword(True) => true,
_ => Err(Error::not_bool().token(*token))?,
_ => Err(Error::not_bool().token(token.clone()))?,
};
self.consume();
Ok(out)
}
}
/// Expressions
impl<'t> Parser<'t> {
impl Parser {
fn expr(&mut self) -> PResult<expression::Expr> {
use expression::Expr;
Ok(Expr { ignore: self.ignore()? })
@ -335,7 +341,7 @@ impl<'t> Parser<'t> {
}
fn primary(&mut self) -> PResult<expression::Primary> {
use expression::Primary;
let token = *self.peek()?;
let token = self.peek()?;
match token.ty() {
Type::Identifier => self.identifier().map(Primary::Identifier),
Type::String
@ -346,7 +352,7 @@ impl<'t> Parser<'t> {
Type::LCurly => self.block().map(Primary::Block),
Type::LParen => self.group().map(Primary::Group),
Type::Keyword(_) => self.flow().map(Primary::Branch),
_ => Err(Error::all_else_failed().token(token))?,
e => Err(Error::unexpected(e).token(token.clone()))?,
}
}
}
@ -377,7 +383,7 @@ macro binary ($($f:ident = $a:ident, $b:ident);*$(;)?) {$(
}
)*}
/// # [Arithmetic and Logical Subexpressions](math)
impl<'t> Parser<'t> {
impl Parser {
binary! {
//name operands operators
ignore = assign, ignore_op;
@ -400,18 +406,19 @@ impl<'t> Parser<'t> {
}
macro operator_impl ($($(#[$m:meta])* $f:ident : {$($type:pat => $op:ident),*$(,)?})*) {
$($(#[$m])* fn $f(&mut self) -> PResult<operator::Binary> {
use operator::Binary;
let token = *self.peek()?;
let token = self.peek()?;
let out = Ok(match token.ty() {
$($type => Binary::$op,)*
_ => Err(Error::not_operator().token(token))?,
_ => Err(Error::not_operator().token(token.clone()))?,
});
self.consume();
out
})*
}
/// # [Operators](operator)
impl<'t> Parser<'t> {
impl Parser {
operator_impl! {
factor_op: {
Type::Star => Mul,
@ -465,7 +472,7 @@ impl<'t> Parser<'t> {
/// Parse a [unary operator](operator::Unary)
fn unary_op(&mut self) -> PResult<operator::Unary> {
use operator::Unary;
let token = *self.peek()?;
let token = self.peek()?;
let out = Ok(match token.ty() {
Type::AmpAmp => Unary::RefRef,
Type::Amp => Unary::Ref,
@ -475,18 +482,18 @@ impl<'t> Parser<'t> {
Type::At => Unary::At,
Type::Hash => Unary::Hash,
Type::Tilde => Unary::Tilde,
_ => Err(Error::not_operator().token(token))?,
_ => Err(Error::not_operator().token(token.clone()))?,
});
self.consume();
out
}
}
/// # [Control Flow](control)
impl<'t> Parser<'t> {
impl Parser {
fn flow(&mut self) -> PResult<control::Flow> {
use control::Flow;
use Keyword::{Break, Continue, For, If, Return, While};
let token = *self.peek()?;
let token = self.peek()?;
match token.ty() {
Type::Keyword(While) => self.parse_while().map(Flow::While),
Type::Keyword(For) => self.parse_for().map(Flow::For),
@ -494,9 +501,9 @@ impl<'t> Parser<'t> {
Type::Keyword(Break) => self.parse_break().map(Flow::Break),
Type::Keyword(Return) => self.parse_return().map(Flow::Return),
Type::Keyword(Continue) => self.parse_continue().map(Flow::Continue),
_ => Err(Error::all_else_failed().token(token)),
e => Err(Error::unexpected(e).token(token.clone()))?,
}
.map_err(|e| e.reason(IncompleteBranch).token(token))
.map_err(|e| e.reason(IncompleteBranch))
}
fn parse_if(&mut self) -> PResult<control::If> {
self.keyword(Keyword::If)?;

View File

@ -1,476 +1,180 @@
mod token {
use crate::token::*;
#[test]
fn token_has_type() {
assert_eq!(Token::new(Type::Comment, 0, 10, 1, 1).ty(), Type::Comment);
assert_eq!(
Token::new(Type::Identifier, 0, 10, 1, 1).ty(),
Type::Identifier
);
}
#[test]
fn token_has_range() {
let t = Token::new(Type::Comment, 0, 10, 1, 1);
assert_eq!(t.range(), 0..10);
}
// TODO
}
mod ast {
// TODO
}
mod lexer {
use std::ops::Range;
#[allow(unused_imports)]
use crate::{
lexer::*,
token::{Token, Type},
lexer::Lexer,
token::{Token, TokenData, Keyword, Type},
};
fn assert_whole_input_is_token<'t, F>(input: &'t str, f: F, ty: Type)
where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
assert_has_type_and_range(input, f, ty, 0..input.len())
}
fn assert_has_type_and_range<'t, F>(input: &'t str, f: F, ty: Type, range: Range<usize>)
where F: FnOnce(&mut Lexer<'t>) -> Option<Token> {
let tok =
f(&mut Lexer::new(input)).unwrap_or_else(|| panic!("Should be {ty:?}, {range:?}"));
assert_eq!(ty, tok.ty());
assert_eq!(range, tok.range());
}
mod comment {
use super::*;
macro test_lexer_output_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
#[test]
fn line_comment() {
assert_whole_input_is_token("// comment!", Lexer::comment, Type::Comment);
}
#[test]
#[should_panic]
fn not_line_comment() {
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
}
#[test]
fn block_comment() {
assert_whole_input_is_token("/* comment! */", Lexer::comment, Type::Comment);
}
#[test]
fn nested_block_comment() {
assert_whole_input_is_token(
"/* a /* nested */ comment */",
Lexer::comment,
Type::Comment,
fn $f() {$(
assert_eq!(
Lexer::new($test)
.into_iter()
.map(|t| t.unwrap().ty())
.collect::<Vec<_>>(),
dbg!($expect)
);
}
)*}
)*}
macro test_lexer_data_type ($($f:ident {$($test:expr => $expect:expr),*$(,)?})*) {$(
#[test]
#[should_panic]
fn unclosed_nested_comment() {
assert_whole_input_is_token(
"/* improperly /* nested */ comment",
Lexer::comment,
Type::Comment,
fn $f() {$(
assert_eq!(
Lexer::new($test)
.into_iter()
.map(|t| t.unwrap().into_data())
.collect::<Vec<_>>(),
dbg!($expect)
);
}
#[test]
#[should_panic]
fn not_block_comment() {
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
}
#[test]
fn shebang_comment() {
assert_whole_input_is_token("#!/ comment!", Lexer::comment, Type::Comment);
}
#[test]
#[should_panic]
fn not_shebang_comment() {
assert_whole_input_is_token("fn main() {}", Lexer::comment, Type::Comment);
}
}
mod identifier {
use super::*;
)*}
)*}
#[test]
fn identifier() {
assert_whole_input_is_token("valid_identifier", Lexer::identifier, Type::Identifier);
assert_whole_input_is_token("_0", Lexer::identifier, Type::Identifier);
assert_whole_input_is_token("_", Lexer::identifier, Type::Identifier);
}
#[test]
fn unicode_identifier() {
assert_whole_input_is_token("ζ_ζζζ_ζζζ_ζζζ", Lexer::identifier, Type::Identifier);
assert_whole_input_is_token("_ζζζ_ζζζ_ζζζ_", Lexer::identifier, Type::Identifier);
}
#[test]
#[should_panic]
fn not_identifier() {
assert_whole_input_is_token("123456789", Lexer::identifier, Type::Identifier);
}
/// Convert an `[ expr, ... ]` into a `[ *, ... ]`
macro td ($($id:expr),*) {
[$($id.into()),*]
}
mod literal {
use super::*;
#[test]
fn literal_class() {
assert_whole_input_is_token("1_00000", Lexer::literal, Type::Integer);
assert_whole_input_is_token("1.00000", Lexer::literal, Type::Float);
assert_has_type_and_range("\"1.0\"", Lexer::literal, Type::String, 1..4);
assert_has_type_and_range("'\"'", Lexer::literal, Type::Character, 1..2);
}
mod integer {
use super::*;
#[test]
fn bare() {
assert_whole_input_is_token("10010110", Lexer::integer, Type::Integer);
assert_whole_input_is_token("12345670", Lexer::integer, Type::Integer);
assert_whole_input_is_token("1234567890", Lexer::integer, Type::Integer);
}
#[test]
fn base16() {
assert_has_type_and_range("0x1234", Lexer::integer, Type::Integer, 0..6);
assert_has_type_and_range("0x1234 \"hello\"", Lexer::integer, Type::Integer, 0..6);
}
#[test]
fn base10() {
assert_whole_input_is_token("0d1234", Lexer::integer, Type::Integer);
}
#[test]
fn base8() {
assert_whole_input_is_token("0o1234", Lexer::integer, Type::Integer);
}
#[test]
fn base2() {
assert_whole_input_is_token("0b1010", Lexer::integer, Type::Integer);
}
}
mod float {
use super::*;
#[test]
fn number_dot_number_is_float() {
assert_whole_input_is_token("1.0", Lexer::float, Type::Float);
}
#[test]
fn nothing_dot_number_is_float() {
assert_whole_input_is_token(".0", Lexer::float, Type::Float);
}
#[test]
#[should_panic]
fn number_dot_nothing_is_not_float() {
assert_whole_input_is_token("1.", Lexer::float, Type::Float);
}
#[test]
#[should_panic]
fn nothing_dot_nothing_is_not_float() {
assert_whole_input_is_token(".", Lexer::float, Type::Float);
}
}
mod string {
use super::*;
#[test]
fn empty_string() {
assert_has_type_and_range("\"\"", Lexer::string, Type::String, 1..1);
}
#[test]
fn unicode_string() {
assert_has_type_and_range("\"I 💙 🦈!\"", Lexer::string, Type::String, 1..13);
}
#[test]
fn escape_string() {
assert_has_type_and_range(
"\" \\\"This is a quote\\\" \"",
Lexer::string,
Type::String,
1..22,
);
}
}
mod char {
use super::*;
#[test]
fn plain_char() {
assert_has_type_and_range("'A'", Lexer::character, Type::Character, 1..2);
assert_has_type_and_range("'a'", Lexer::character, Type::Character, 1..2);
assert_has_type_and_range("'#'", Lexer::character, Type::Character, 1..2);
}
#[test]
fn unicode_char() {
assert_has_type_and_range("'ε'", Lexer::character, Type::Character, 1..3);
}
#[test]
fn escaped_char() {
assert_has_type_and_range("'\\n'", Lexer::character, Type::Character, 1..3);
}
#[test]
#[should_panic]
fn no_char() {
assert_has_type_and_range("''", Lexer::character, Type::Character, 1..1);
}
}
}
mod delimiter {
use super::*;
#[test]
fn delimiter_class() {
assert_whole_input_is_token("[", Lexer::delimiter, Type::LBrack);
assert_whole_input_is_token("]", Lexer::delimiter, Type::RBrack);
assert_whole_input_is_token("{", Lexer::delimiter, Type::LCurly);
assert_whole_input_is_token("}", Lexer::delimiter, Type::RCurly);
assert_whole_input_is_token("(", Lexer::delimiter, Type::LParen);
assert_whole_input_is_token(")", Lexer::delimiter, Type::RParen);
}
#[test]
fn l_brack() {
assert_whole_input_is_token("[", Lexer::l_brack, Type::LBrack);
}
#[test]
fn r_brack() {
assert_whole_input_is_token("]", Lexer::r_brack, Type::RBrack);
}
#[test]
fn l_curly() {
assert_whole_input_is_token("{", Lexer::l_curly, Type::LCurly);
}
#[test]
fn r_curly() {
assert_whole_input_is_token("}", Lexer::r_curly, Type::RCurly);
}
#[test]
fn l_paren() {
assert_whole_input_is_token("(", Lexer::l_paren, Type::LParen);
mod ident {
use super::*;
macro ident ($($id:literal),*) {
[$(TokenData::Identifier($id.into())),*]
}
#[test]
fn r_paren() {
assert_whole_input_is_token(")", Lexer::r_paren, Type::RParen);
test_lexer_data_type! {
underscore { "_ _" => ident!["_", "_"] }
unicode { "_ε ε_" => ident!["", "ε_"] }
many_underscore { "____________________________________" =>
ident!["____________________________________"] }
}
}
mod punctuation {
mod keyword {
use super::*;
mod compound {
use super::*;
#[test]
fn dot_dot() {
assert_whole_input_is_token("..", Lexer::dot_dot, Type::DotDot)
macro kw($($k:ident),*) {
[ $(Type::Keyword(Keyword::$k),)* ]
}
test_lexer_output_type! {
kw_break { "break break" => kw![Break, Break] }
kw_continue { "continue continue" => kw![Continue, Continue] }
kw_else { "else else" => kw![Else, Else] }
kw_false { "false false" => kw![False, False] }
kw_for { "for for" => kw![For, For] }
kw_fn { "fn fn" => kw![Fn, Fn] }
kw_if { "if if" => kw![If, If] }
kw_in { "in in" => kw![In, In] }
kw_let { "let let" => kw![Let, Let] }
kw_return { "return return" => kw![Return, Return] }
kw_true { "true true" => kw![True, True] }
kw_while { "while while" => kw![While, While] }
keywords { "break continue else false for fn if in let return true while" =>
kw![Break, Continue, Else, False, For, Fn, If, In, Let, Return, True, While] }
}
}
mod integer {
use super::*;
test_lexer_data_type! {
hex {
"0x0 0x1 0x15 0x2100 0x8000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
#[test]
fn dot_dot_eq() {
assert_whole_input_is_token("..=", Lexer::dot_dot_eq, Type::DotDotEq)
dec {
"0d0 0d1 0d21 0d8448 0d32768" =>
td![0, 0x1, 0x15, 0x2100, 0x8000]
}
#[test]
fn lt_lt() {
assert_whole_input_is_token("<<", Lexer::lt_lt, Type::LtLt)
oct {
"0o0 0o1 0o25 0o20400 0o100000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
#[test]
fn gt_gt() {
assert_whole_input_is_token(">>", Lexer::gt_gt, Type::GtGt)
bin {
"0b0 0b1 0b10101 0b10000100000000 0b1000000000000000" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
#[test]
fn amp_amp() {
assert_whole_input_is_token("&&", Lexer::amp_amp, Type::AmpAmp)
}
#[test]
fn bar_bar() {
assert_whole_input_is_token("||", Lexer::bar_bar, Type::BarBar)
}
#[test]
fn bang_bang() {
assert_whole_input_is_token("!!", Lexer::bang_bang, Type::BangBang)
}
#[test]
fn xor_xor() {
assert_whole_input_is_token("^^", Lexer::xor_xor, Type::XorXor)
}
#[test]
fn eq_eq() {
assert_whole_input_is_token("==", Lexer::eq_eq, Type::EqEq)
}
#[test]
fn gt_eq() {
assert_whole_input_is_token(">=", Lexer::gt_eq, Type::GtEq)
}
#[test]
fn lt_eq() {
assert_whole_input_is_token("<=", Lexer::lt_eq, Type::LtEq)
}
#[test]
fn bang_eq() {
assert_whole_input_is_token("!=", Lexer::bang_eq, Type::BangEq)
}
#[test]
fn star_eq() {
assert_whole_input_is_token("*=", Lexer::star_eq, Type::StarEq)
}
#[test]
fn slash_eq() {
assert_whole_input_is_token("/=", Lexer::slash_eq, Type::SlashEq)
}
#[test]
fn plus_eq() {
assert_whole_input_is_token("+=", Lexer::plus_eq, Type::PlusEq)
}
#[test]
fn minus_eq() {
assert_whole_input_is_token("-=", Lexer::minus_eq, Type::MinusEq)
}
#[test]
fn amp_eq() {
assert_whole_input_is_token("&=", Lexer::amp_eq, Type::AmpEq)
}
#[test]
fn bar_eq() {
assert_whole_input_is_token("|=", Lexer::bar_eq, Type::BarEq)
}
#[test]
fn xor_eq() {
assert_whole_input_is_token("^=", Lexer::xor_eq, Type::XorEq)
}
#[test]
fn lt_lt_eq() {
assert_whole_input_is_token("<<=", Lexer::lt_lt_eq, Type::LtLtEq)
}
#[test]
fn gt_gt_eq() {
assert_whole_input_is_token(">>=", Lexer::gt_gt_eq, Type::GtGtEq)
baseless {
"0 1 21 8448 32768" =>
td![0x0, 0x1, 0x15, 0x2100, 0x8000]
}
}
mod simple {
use super::*;
#[test]
fn punctuation_class() {
// go from least to most specific
assert_whole_input_is_token(";", Lexer::punctuation, Type::Semi);
assert_whole_input_is_token(".", Lexer::punctuation, Type::Dot);
assert_whole_input_is_token("*", Lexer::punctuation, Type::Star);
assert_whole_input_is_token("/", Lexer::punctuation, Type::Slash);
assert_whole_input_is_token("+", Lexer::punctuation, Type::Plus);
assert_whole_input_is_token("-", Lexer::punctuation, Type::Minus);
assert_whole_input_is_token("%", Lexer::punctuation, Type::Rem);
assert_whole_input_is_token("!", Lexer::punctuation, Type::Bang);
assert_whole_input_is_token("=", Lexer::punctuation, Type::Eq);
assert_whole_input_is_token("<", Lexer::punctuation, Type::Lt);
assert_whole_input_is_token(">", Lexer::punctuation, Type::Gt);
assert_whole_input_is_token("&", Lexer::punctuation, Type::Amp);
assert_whole_input_is_token("|", Lexer::punctuation, Type::Bar);
assert_whole_input_is_token("^", Lexer::punctuation, Type::Xor);
assert_whole_input_is_token("#", Lexer::punctuation, Type::Hash);
assert_whole_input_is_token("@", Lexer::punctuation, Type::At);
assert_whole_input_is_token(":", Lexer::punctuation, Type::Colon);
assert_whole_input_is_token("?", Lexer::punctuation, Type::Question);
assert_whole_input_is_token(",", Lexer::punctuation, Type::Comma);
assert_whole_input_is_token("~", Lexer::punctuation, Type::Tilde);
assert_whole_input_is_token("`", Lexer::punctuation, Type::Grave);
assert_whole_input_is_token("\\", Lexer::punctuation, Type::Backslash);
assert_whole_input_is_token("<<", Lexer::punctuation, Type::LtLt);
assert_whole_input_is_token(">>", Lexer::punctuation, Type::GtGt);
assert_whole_input_is_token("&&", Lexer::punctuation, Type::AmpAmp);
assert_whole_input_is_token("||", Lexer::punctuation, Type::BarBar);
assert_whole_input_is_token("!!", Lexer::punctuation, Type::BangBang);
assert_whole_input_is_token("^^", Lexer::punctuation, Type::XorXor);
assert_whole_input_is_token("==", Lexer::punctuation, Type::EqEq);
assert_whole_input_is_token(">=", Lexer::punctuation, Type::GtEq);
assert_whole_input_is_token("<=", Lexer::punctuation, Type::LtEq);
assert_whole_input_is_token("!=", Lexer::punctuation, Type::BangEq);
assert_whole_input_is_token("*=", Lexer::punctuation, Type::StarEq);
assert_whole_input_is_token("/=", Lexer::punctuation, Type::SlashEq);
assert_whole_input_is_token("+=", Lexer::punctuation, Type::PlusEq);
assert_whole_input_is_token("-=", Lexer::punctuation, Type::MinusEq);
assert_whole_input_is_token("&=", Lexer::punctuation, Type::AmpEq);
assert_whole_input_is_token("|=", Lexer::punctuation, Type::BarEq);
assert_whole_input_is_token("^=", Lexer::punctuation, Type::XorEq);
assert_whole_input_is_token("..", Lexer::punctuation, Type::DotDot);
assert_whole_input_is_token("..=", Lexer::punctuation, Type::DotDotEq);
assert_whole_input_is_token("<<=", Lexer::punctuation, Type::LtLtEq);
assert_whole_input_is_token(">>=", Lexer::punctuation, Type::GtGtEq);
}
mod string {
use super::*;
test_lexer_data_type! {
empty_string {
"\"\"" =>
td![String::from("")]
}
// individual functions below
#[test]
fn semi() {
assert_whole_input_is_token(";", Lexer::semi, Type::Semi)
unicode_string {
"\"I 💙 🦈!\"" =>
td![String::from("I 💙 🦈!")]
}
#[test]
fn dot() {
assert_whole_input_is_token(".", Lexer::dot, Type::Dot)
}
#[test]
fn star() {
assert_whole_input_is_token("*", Lexer::star, Type::Star)
}
#[test]
fn slash() {
assert_whole_input_is_token("/", Lexer::slash, Type::Slash)
}
#[test]
fn plus() {
assert_whole_input_is_token("+", Lexer::plus, Type::Plus)
}
#[test]
fn minus() {
assert_whole_input_is_token("-", Lexer::minus, Type::Minus)
}
#[test]
fn rem() {
assert_whole_input_is_token("%", Lexer::rem, Type::Rem)
}
#[test]
fn bang() {
assert_whole_input_is_token("!", Lexer::bang, Type::Bang)
}
#[test]
fn eq() {
assert_whole_input_is_token("=", Lexer::eq, Type::Eq)
}
#[test]
fn lt() {
assert_whole_input_is_token("<", Lexer::lt, Type::Lt)
}
#[test]
fn gt() {
assert_whole_input_is_token(">", Lexer::gt, Type::Gt)
}
#[test]
fn amp() {
assert_whole_input_is_token("&", Lexer::amp, Type::Amp)
}
#[test]
fn bar() {
assert_whole_input_is_token("|", Lexer::bar, Type::Bar)
}
#[test]
fn xor() {
assert_whole_input_is_token("^", Lexer::xor, Type::Xor)
}
#[test]
fn hash() {
assert_whole_input_is_token("#", Lexer::hash, Type::Hash)
}
#[test]
fn at() {
assert_whole_input_is_token("@", Lexer::at, Type::At)
}
#[test]
fn colon() {
assert_whole_input_is_token(":", Lexer::colon, Type::Colon)
}
#[test]
fn backslash() {
assert_whole_input_is_token("\\", Lexer::backslash, Type::Backslash)
}
#[test]
fn question() {
assert_whole_input_is_token("?", Lexer::question, Type::Question)
}
#[test]
fn comma() {
assert_whole_input_is_token(",", Lexer::comma, Type::Comma)
}
#[test]
fn tilde() {
assert_whole_input_is_token("~", Lexer::tilde, Type::Tilde)
}
#[test]
fn grave() {
assert_whole_input_is_token("`", Lexer::grave, Type::Grave)
escape_string {
" \"This is a shark: \\u{1f988}\" " =>
td![String::from("This is a shark: 🦈")]
}
}
}
mod punct {
use super::*;
test_lexer_output_type! {
l_curly { "{ {" => [ Type::LCurly, Type::LCurly ] }
r_curly { "} }" => [ Type::RCurly, Type::RCurly ] }
l_brack { "[ [" => [ Type::LBrack, Type::LBrack ] }
r_brack { "] ]" => [ Type::RBrack, Type::RBrack ] }
l_paren { "( (" => [ Type::LParen, Type::LParen ] }
r_paren { ") )" => [ Type::RParen, Type::RParen ] }
amp { "& &" => [ Type::Amp, Type::Amp ] }
amp_amp { "&& &&" => [ Type::AmpAmp, Type::AmpAmp ] }
amp_eq { "&= &=" => [ Type::AmpEq, Type::AmpEq ] }
arrow { "-> ->" => [ Type::Arrow, Type::Arrow] }
at { "@ @" => [ Type::At, Type::At] }
backslash { "\\ \\" => [ Type::Backslash, Type::Backslash] }
bang { "! !" => [ Type::Bang, Type::Bang] }
bangbang { "!! !!" => [ Type::BangBang, Type::BangBang] }
bangeq { "!= !=" => [ Type::BangEq, Type::BangEq] }
bar { "| |" => [ Type::Bar, Type::Bar] }
barbar { "|| ||" => [ Type::BarBar, Type::BarBar] }
bareq { "|= |=" => [ Type::BarEq, Type::BarEq] }
colon { ": :" => [ Type::Colon, Type::Colon] }
comma { ", ," => [ Type::Comma, Type::Comma] }
dot { ". ." => [ Type::Dot, Type::Dot] }
dotdot { ".. .." => [ Type::DotDot, Type::DotDot] }
dotdoteq { "..= ..=" => [ Type::DotDotEq, Type::DotDotEq] }
eq { "= =" => [ Type::Eq, Type::Eq] }
eqeq { "== ==" => [ Type::EqEq, Type::EqEq] }
fatarrow { "=> =>" => [ Type::FatArrow, Type::FatArrow] }
grave { "` `" => [ Type::Grave, Type::Grave] }
gt { "> >" => [ Type::Gt, Type::Gt] }
gteq { ">= >=" => [ Type::GtEq, Type::GtEq] }
gtgt { ">> >>" => [ Type::GtGt, Type::GtGt] }
gtgteq { ">>= >>=" => [ Type::GtGtEq, Type::GtGtEq] }
hash { "# #" => [ Type::Hash, Type::Hash] }
lt { "< <" => [ Type::Lt, Type::Lt] }
lteq { "<= <=" => [ Type::LtEq, Type::LtEq] }
ltlt { "<< <<" => [ Type::LtLt, Type::LtLt] }
ltlteq { "<<= <<=" => [ Type::LtLtEq, Type::LtLtEq] }
minus { "- -" => [ Type::Minus, Type::Minus] }
minuseq { "-= -=" => [ Type::MinusEq, Type::MinusEq] }
plus { "+ +" => [ Type::Plus, Type::Plus] }
pluseq { "+= +=" => [ Type::PlusEq, Type::PlusEq] }
question { "? ?" => [ Type::Question, Type::Question] }
rem { "% %" => [ Type::Rem, Type::Rem] }
remeq { "%= %=" => [ Type::RemEq, Type::RemEq] }
semi { "; ;" => [ Type::Semi, Type::Semi] }
slash { "/ /" => [ Type::Slash, Type::Slash] }
slasheq { "/= /=" => [ Type::SlashEq, Type::SlashEq] }
star { "* *" => [ Type::Star, Type::Star] }
stareq { "*= *=" => [ Type::StarEq, Type::StarEq] }
tilde { "~ ~" => [ Type::Tilde, Type::Tilde] }
xor { "^ ^" => [ Type::Xor, Type::Xor] }
xoreq { "^= ^=" => [ Type::XorEq, Type::XorEq] }
xorxor { "^^ ^^" => [ Type::XorXor, Type::XorXor] }
}
}
}
mod parser {
// TODO

View File

@ -1,5 +1,4 @@
//! Stores a component of a file as a type and span
use std::ops::Range;
mod token_type;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@ -88,54 +87,60 @@ pub enum Keyword {
While,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, Debug, PartialEq)]
pub enum TokenData {
Identifier(Box<str>),
String(String),
Character(char),
Integer(u128),
Float(f64),
None,
}
from! {
value: &str => Self::Identifier(value.into()),
value: String => Self::String(value),
value: u128 => Self::Integer(value),
value: f64 => Self::Float(value),
value: char => Self::Character(value),
_v: () => Self::None,
}
macro from($($value:ident: $src:ty => $dst:expr),*$(,)?) {
$(impl From<$src> for TokenData {
fn from($value: $src) -> Self { $dst }
})*
}
#[derive(Clone, Debug, PartialEq)]
pub struct Token {
ty: Type,
pub head: usize,
pub tail: usize,
data: TokenData,
line: u32,
col: u32,
}
impl Token {
pub fn new(ty: Type, head: usize, tail: usize, line: u32, col: u32) -> Self {
Self { ty, head, tail, line, col }
/// Creates a new [Token] out of a [Type], [TokenData], line, and column.
pub fn new(ty: Type, data: impl Into<TokenData>, line: u32, col: u32) -> Self {
Self { ty, data: data.into(), line, col }
}
/// Cast this [Token] to a new [Type]
/// Casts this token to a new [Type]
pub fn cast(self, ty: Type) -> Self {
Self { ty, ..self }
}
/// Hack to work around the current [lexer's design limitations](crate::lexer)
pub fn rebound(self, head: usize, tail: usize) -> Self {
Self { head, tail, ..self }
}
/// Gets the line from this token
pub fn line(&self) -> u32 {
self.line
}
/// Gets the column from this token
pub fn col(&self) -> u32 {
self.col
}
pub fn is_empty(&self) -> bool {
self.tail == self.head
}
/// Gets the length of the token, in bytes
pub fn len(&self) -> usize {
self.tail - self.head
}
/// Gets the [Type] of the token
/// Gets the [Type] of this token
pub fn ty(&self) -> Type {
self.ty
}
/// Gets the exclusive range of the token
pub fn range(&self) -> Range<usize> {
self.head..self.tail
}
}
impl std::ops::Index<&Token> for str {
type Output = str;
fn index(&self, index: &Token) -> &Self::Output {
&self[index.range()]
/// Gets the [TokenData] of this token
pub fn data(&self) -> &TokenData {
&self.data
}
pub fn into_data(self) -> TokenData {
self.data
}
pub fn line(&self) -> u32 {
self.line
}
pub fn col(&self) -> u32 {
self.col
}
}